79 files changed, 43799 insertions, 0 deletions
diff --git a/tesseract/src/textord/alignedblob.cpp b/tesseract/src/textord/alignedblob.cpp
new file mode 100644
index 00000000..4c17584b
--- /dev/null
+++ b/tesseract/src/textord/alignedblob.cpp
@@ -0,0 +1,535 @@
+///////////////////////////////////////////////////////////////////////
+// File:        alignedblob.cpp
+// Description: Subclass of BBGrid to find vertically aligned blobs.
+// Author:      Ray Smith
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "alignedblob.h"
+
+#include <algorithm>
+
+namespace tesseract {
+
+INT_VAR(textord_debug_tabfind, 0, "Debug tab finding");
+INT_VAR(textord_debug_bugs, 0, "Turn on output related to bugs in tab finding");
+static INT_VAR(textord_testregion_left, -1, "Left edge of debug reporting rectangle in Leptonica coords (bottom=0/top=height), with horizontal lines x/y-flipped");
+static INT_VAR(textord_testregion_top, INT32_MAX, "Top edge of debug reporting rectangle in Leptonica coords (bottom=0/top=height), with horizontal lines x/y-flipped");
+static INT_VAR(textord_testregion_right, INT32_MAX, "Right edge of debug rectangle in Leptonica coords (bottom=0/top=height), with horizontal lines x/y-flipped");
+static INT_VAR(textord_testregion_bottom, -1, "Bottom edge of debug rectangle in Leptonica coords (bottom=0/top=height), with horizontal lines x/y-flipped");
+BOOL_VAR(textord_debug_printable, false, "Make debug windows printable");
+
+// Fraction of resolution used as alignment tolerance for aligned tabs.
+const double kAlignedFraction = 0.03125;
+// Fraction of resolution used as alignment tolerance for ragged tabs.
+const double kRaggedFraction = 2.5;
+// Fraction of height used as a minimum gutter gap for aligned blobs.
+const double kAlignedGapFraction = 0.75;
+// Fraction of height used as a minimum gutter gap for ragged tabs.
+const double kRaggedGapFraction = 1.0;
+// Constant number of pixels used as alignment tolerance for line finding.
+const int kVLineAlignment = 3;
+// Constant number of pixels used as gutter gap tolerance for line finding.
+const int kVLineGutter = 1;
+// Constant number of pixels used as the search size for line finding.
+const int kVLineSearchSize = 150;
+// Min number of points to accept for a ragged tab stop.
+const int kMinRaggedTabs = 5;
+// Min number of points to accept for an aligned tab stop.
+const int kMinAlignedTabs = 4;
+// Constant number of pixels minimum height of a vertical line.
+const int kVLineMinLength = 300;
+// Minimum gradient for a vertical tab vector. Used to prune away junk
+// tab vectors with what would be a ridiculously large skew angle.
+// Value corresponds to tan(90 - max allowed skew angle)
+const double kMinTabGradient = 4.0;
+// Tolerance to skew on top of current estimate of skew. Divide x or y length
+// by kMaxSkewFactor to get the y or x skew distance.
+// If the angle is small, the angle in degrees is roughly 60/kMaxSkewFactor.
+const int kMaxSkewFactor = 15;
+
+// Constructor to set the parameters for finding aligned and ragged tabs.
+// Vertical_x and vertical_y are the current estimates of the true vertical
+// direction (up) in the image. Height is the height of the starter blob.
+// v_gap_multiple is the multiple of height that will be used as a limit
+// on vertical gap before giving up and calling the line ended.
+// resolution is the original image resolution, and align0 indicates the
+// type of tab stop to be found.
+AlignedBlobParams::AlignedBlobParams(int vertical_x, int vertical_y,
+                                     int height, int v_gap_multiple,
+                                     int min_gutter_width,
+                                     int resolution, TabAlignment align0)
+  : right_tab(align0 == TA_RIGHT_RAGGED || align0 == TA_RIGHT_ALIGNED),
+    ragged(align0 == TA_LEFT_RAGGED || align0 == TA_RIGHT_RAGGED),
+    alignment(align0),
+    confirmed_type(TT_CONFIRMED),
+    min_length(0) {
+  // Set the tolerances according to the type of line sought.
+  // For tab search, these are based on the image resolution for most, or
+  // the height of the starting blob for the maximum vertical gap.
+  max_v_gap = height * v_gap_multiple;
+  if (ragged) {
+    // In the case of a ragged edge, we are much more generous with the
+    // inside alignment fraction, but also require a much bigger gutter.
+    gutter_fraction = kRaggedGapFraction;
+    if (alignment == TA_RIGHT_RAGGED) {
+      l_align_tolerance = static_cast<int>(resolution * kRaggedFraction + 0.5);
+      r_align_tolerance = static_cast<int>(resolution * kAlignedFraction + 0.5);
+    } else {
+      l_align_tolerance = static_cast<int>(resolution * kAlignedFraction + 0.5);
+      r_align_tolerance = static_cast<int>(resolution * kRaggedFraction + 0.5);
+    }
+    min_points = kMinRaggedTabs;
+  } else {
+    gutter_fraction = kAlignedGapFraction;
+    l_align_tolerance = static_cast<int>(resolution * kAlignedFraction + 0.5);
+    r_align_tolerance = static_cast<int>(resolution * kAlignedFraction + 0.5);
+    min_points = kMinAlignedTabs;
+  }
+  min_gutter = static_cast<int>(height * gutter_fraction + 0.5);
+  if (min_gutter < min_gutter_width)
+    min_gutter = min_gutter_width;
+  // Fit the vertical vector into an ICOORD, which is 16 bit.
+  set_vertical(vertical_x, vertical_y);
+}
+
+// Constructor to set the parameters for finding vertical lines.
+// Vertical_x and vertical_y are the current estimates of the true vertical
+// direction (up) in the image. Width is the width of the starter blob.
+AlignedBlobParams::AlignedBlobParams(int vertical_x, int vertical_y,
+                                     int width)
+  : gutter_fraction(0.0),
+    right_tab(false),
+    ragged(false),
+    alignment(TA_SEPARATOR),
+    confirmed_type(TT_VLINE),
+    max_v_gap(kVLineSearchSize),
+    min_gutter(kVLineGutter),
+    min_points(1),
+    min_length(kVLineMinLength) {
+  // Compute threshold for left and right alignment.
+  l_align_tolerance = std::max(kVLineAlignment, width);
+  r_align_tolerance = std::max(kVLineAlignment, width);
+
+  // Fit the vertical vector into an ICOORD, which is 16 bit.
+  set_vertical(vertical_x, vertical_y);
+}
+
+// Fit the vertical vector into an ICOORD, which is 16 bit.
+void AlignedBlobParams::set_vertical(int vertical_x, int vertical_y) {
+  int factor = 1;
+  if (vertical_y > INT16_MAX)
+    factor = vertical_y / INT16_MAX + 1;
+  vertical.set_x(vertical_x / factor);
+  vertical.set_y(vertical_y / factor);
+}
+
+
+AlignedBlob::AlignedBlob(int gridsize,
+                         const ICOORD& bleft, const ICOORD& tright)
+  : BlobGrid(gridsize, bleft, tright) {
+}
+
+// Return true if the given coordinates are within the test rectangle
+// and the debug level is at least the given detail level.
+bool AlignedBlob::WithinTestRegion(int detail_level, int x, int y) {
+  if (textord_debug_tabfind < detail_level)
+    return false;
+  return x >= textord_testregion_left && x <= textord_testregion_right &&
+         y <= textord_testregion_top && y >= textord_testregion_bottom;
+}
+
+#ifndef GRAPHICS_DISABLED
+
+// Display the tab codes of the BLOBNBOXes in this grid.
+ScrollView* AlignedBlob::DisplayTabs(const char* window_name,
+                                     ScrollView* tab_win) {
+  if (tab_win == nullptr)
+    tab_win = MakeWindow(0, 50, window_name);
+  // For every tab in the grid, display it.
+  GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> gsearch(this);
+  gsearch.StartFullSearch();
+  BLOBNBOX* bbox;
+  while ((bbox = gsearch.NextFullSearch()) != nullptr) {
+    const TBOX& box = bbox->bounding_box();
+    int left_x = box.left();
+    int right_x = box.right();
+    int top_y = box.top();
+    int bottom_y = box.bottom();
+    TabType tabtype = bbox->left_tab_type();
+    if (tabtype != TT_NONE) {
+      if (tabtype == TT_MAYBE_ALIGNED)
+        tab_win->Pen(ScrollView::BLUE);
+      else if (tabtype == TT_MAYBE_RAGGED)
+        tab_win->Pen(ScrollView::YELLOW);
+      else if (tabtype == TT_CONFIRMED)
+        tab_win->Pen(ScrollView::GREEN);
+      else
+        tab_win->Pen(ScrollView::GREY);
+      tab_win->Line(left_x, top_y, left_x, bottom_y);
+    }
+    tabtype = bbox->right_tab_type();
+    if (tabtype != TT_NONE) {
+      if (tabtype == TT_MAYBE_ALIGNED)
+        tab_win->Pen(ScrollView::MAGENTA);
+      else if (tabtype == TT_MAYBE_RAGGED)
+        tab_win->Pen(ScrollView::ORANGE);
+      else if (tabtype == TT_CONFIRMED)
+        tab_win->Pen(ScrollView::RED);
+      else
+        tab_win->Pen(ScrollView::GREY);
+      tab_win->Line(right_x, top_y, right_x, bottom_y);
+    }
+  }
+  tab_win->Update();
+  return tab_win;
+}
+
+#endif // !GRAPHICS_DISABLED
+
+// Helper returns true if the total number of line_crossings of all the blobs
+// in the list is at least 2.
+static bool AtLeast2LineCrossings(BLOBNBOX_CLIST* blobs) {
+  BLOBNBOX_C_IT it(blobs);
+  int total_crossings = 0;
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    total_crossings += it.data()->line_crossings();
+  }
+  return total_crossings >= 2;
+}
+
+// Destructor.
+// It is defined here, so the compiler can create a single vtable
+// instead of weak vtables in every compilation unit.
+AlignedBlob::~AlignedBlob() = default;
+
+// Finds a vector corresponding to a set of vertically aligned blob edges
+// running through the given box. The type of vector returned and the
+// search parameters are determined by the AlignedBlobParams.
+// vertical_x and y are updated with an estimate of the real
+// vertical direction. (skew finding.)
+// Returns nullptr if no decent vector can be found.
+TabVector* AlignedBlob::FindVerticalAlignment(AlignedBlobParams align_params,
+                                              BLOBNBOX* bbox,
+                                              int* vertical_x,
+                                              int* vertical_y) {
+  int ext_start_y, ext_end_y;
+  BLOBNBOX_CLIST good_points;
+  // Search up and then down from the starting bbox.
+  TBOX box = bbox->bounding_box();
+  bool debug = WithinTestRegion(2, box.left(), box.bottom());
+  int pt_count = AlignTabs(align_params, false, bbox, &good_points, &ext_end_y);
+  pt_count += AlignTabs(align_params, true, bbox, &good_points, &ext_start_y);
+  BLOBNBOX_C_IT it(&good_points);
+  it.move_to_last();
+  box = it.data()->bounding_box();
+  int end_y = box.top();
+  int end_x = align_params.right_tab ? box.right() : box.left();
+  it.move_to_first();
+  box = it.data()->bounding_box();
+  int start_x = align_params.right_tab ? box.right() : box.left();
+  int start_y = box.bottom();
+  // Acceptable tab vectors must have a minimum number of points,
+  // have a minimum acceptable length, and have a minimum gradient.
+  // The gradient corresponds to the skew angle.
+  // Ragged tabs don't need to satisfy the gradient condition, as they
+  // will always end up parallel to the vertical direction.
+  bool at_least_2_crossings = AtLeast2LineCrossings(&good_points);
+  if ((pt_count >= align_params.min_points &&
+      end_y - start_y >= align_params.min_length &&
+      (align_params.ragged ||
+          end_y - start_y >= abs(end_x - start_x) * kMinTabGradient)) ||
+      at_least_2_crossings) {
+    int confirmed_points = 0;
+    // Count existing confirmed points to see if vector is acceptable.
+    for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+      bbox = it.data();
+      if (align_params.right_tab) {
+        if (bbox->right_tab_type() == align_params.confirmed_type)
+          ++confirmed_points;
+      } else {
+        if (bbox->left_tab_type() == align_params.confirmed_type)
+          ++confirmed_points;
+      }
+    }
+    // Ragged vectors are not allowed to use too many already used points.
+    if (!align_params.ragged ||
+        confirmed_points + confirmed_points < pt_count) {
+      const TBOX& box = bbox->bounding_box();
+      if (debug) {
+        tprintf("Confirming tab vector of %d pts starting at %d,%d\n",
+                pt_count, box.left(), box.bottom());
+      }
+      // Flag all the aligned neighbours as confirmed .
+      for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+        bbox = it.data();
+        if (align_params.right_tab) {
+          bbox->set_right_tab_type(align_params.confirmed_type);
+        } else {
+          bbox->set_left_tab_type(align_params.confirmed_type);
+        }
+        if (debug) {
+          bbox->bounding_box().print();
+        }
+      }
+      // Now make the vector and return it.
+      TabVector* result = TabVector::FitVector(align_params.alignment,
+                                               align_params.vertical,
+                                               ext_start_y, ext_end_y,
+                                               &good_points,
+                                               vertical_x, vertical_y);
+      result->set_intersects_other_lines(at_least_2_crossings);
+      if (debug) {
+        tprintf("Box was %d, %d\n", box.left(), box.bottom());
+        result->Print("After fitting");
+      }
+      return result;
+    } else if (debug) {
+      tprintf("Ragged tab used too many used points: %d out of %d\n",
+              confirmed_points, pt_count);
+    }
+  } else if (debug) {
+    tprintf("Tab vector failed basic tests: pt count %d vs min %d, "
+            "length %d vs min %d, min grad %g\n",
+            pt_count, align_params.min_points, end_y - start_y,
+            align_params.min_length, abs(end_x - start_x) * kMinTabGradient);
+  }
+  return nullptr;
+}
+
+// Find a set of blobs that are aligned in the given vertical
+// direction with the given blob. Returns a list of aligned
+// blobs and the number in the list.
+// For other parameters see FindAlignedBlob below.
+int AlignedBlob::AlignTabs(const AlignedBlobParams& params,
+                           bool top_to_bottom, BLOBNBOX* bbox,
+                           BLOBNBOX_CLIST* good_points, int* end_y) {
+  int ptcount = 0;
+  BLOBNBOX_C_IT it(good_points);
+
+  TBOX box = bbox->bounding_box();
+  bool debug = WithinTestRegion(2, box.left(), box.bottom());
+  if (debug) {
+    tprintf("Starting alignment run at blob:");
+    box.print();
+  }
+  int x_start = params.right_tab ? box.right() : box.left();
+  while (bbox != nullptr) {
+    // Add the blob to the list if the appropriate side is a tab candidate,
+    // or if we are working on a ragged tab.
+    TabType type = params.right_tab ? bbox->right_tab_type()
+                                    : bbox->left_tab_type();
+    if (((type != TT_NONE && type != TT_MAYBE_RAGGED) || params.ragged) &&
+        (it.empty() || it.data() != bbox)) {
+      if (top_to_bottom)
+        it.add_before_then_move(bbox);
+      else
+        it.add_after_then_move(bbox);
+      ++ptcount;
+    }
+    // Find the next blob that is aligned with the current one.
+    // FindAlignedBlob guarantees that forward progress will be made in the
+    // top_to_bottom direction, and therefore eventually it will return nullptr,
+    // making this while (bbox != nullptr) loop safe.
+    bbox = FindAlignedBlob(params, top_to_bottom, bbox, x_start, end_y);
+    if (bbox != nullptr) {
+      box = bbox->bounding_box();
+      if (!params.ragged)
+        x_start = params.right_tab ? box.right() : box.left();
+    }
+  }
+  if (debug) {
+    tprintf("Alignment run ended with %d pts at blob:", ptcount);
+    box.print();
+  }
+  return ptcount;
+}
+
+// Search vertically for a blob that is aligned with the input bbox.
+// The search parameters are determined by AlignedBlobParams.
+// top_to_bottom tells whether to search down or up.
+// The return value is nullptr if nothing was found in the search box
+// or if a blob was found in the gutter. On a nullptr return, end_y
+// is set to the edge of the search box or the leading edge of the
+// gutter blob if one was found.
+BLOBNBOX* AlignedBlob::FindAlignedBlob(const AlignedBlobParams& p,
+                                       bool top_to_bottom, BLOBNBOX* bbox,
+                                       int x_start, int* end_y) {
+  TBOX box = bbox->bounding_box();
+  // If there are separator lines, get the column edges.
+  int left_column_edge = bbox->left_rule();
+  int right_column_edge = bbox->right_rule();
+  // start_y is used to guarantee that forward progress is made and the
+  // search does not go into an infinite loop. New blobs must extend the
+  // line beyond start_y.
+  int start_y = top_to_bottom ? box.bottom() : box.top();
+  if (WithinTestRegion(2, x_start, start_y)) {
+    tprintf("Column edges for blob at (%d,%d)->(%d,%d) are [%d, %d]\n",
+            box.left(), box.top(), box.right(), box.bottom(),
+            left_column_edge, right_column_edge);
+  }
+  // Compute skew tolerance.
+  int skew_tolerance = p.max_v_gap / kMaxSkewFactor;
+  // Calculate xmin and xmax of the search box so that it contains
+  // all possibly relevant boxes up to p.max_v_gap above or below according
+  // to top_to_bottom.
+  // Start with a notion of vertical with the current estimate.
+  int x2 = (p.max_v_gap * p.vertical.x() + p.vertical.y()/2) / p.vertical.y();
+  if (top_to_bottom) {
+    x2 = x_start - x2;
+    *end_y = start_y - p.max_v_gap;
+  } else {
+    x2 = x_start + x2;
+    *end_y = start_y + p.max_v_gap;
+  }
+  // Expand the box by an additional skew tolerance
+  int xmin = std::min(x_start, x2) - skew_tolerance;
+  int xmax = std::max(x_start, x2) + skew_tolerance;
+  // Now add direction-specific tolerances.
+  if (p.right_tab) {
+    xmax += p.min_gutter;
+    xmin -= p.l_align_tolerance;
+  } else {
+    xmax += p.r_align_tolerance;
+    xmin -= p.min_gutter;
+  }
+  // Setup a vertical search for an aligned blob.
+  GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> vsearch(this);
+  if (WithinTestRegion(2, x_start, start_y))
+    tprintf("Starting %s %s search at %d-%d,%d, search_size=%d, gutter=%d\n",
+            p.ragged ? "Ragged" : "Aligned", p.right_tab ? "Right" : "Left",
+            xmin, xmax, start_y, p.max_v_gap, p.min_gutter);
+  vsearch.StartVerticalSearch(xmin, xmax, start_y);
+  // result stores the best real return value.
+  BLOBNBOX* result = nullptr;
+  // The backup_result is not a tab candidate and can be used if no
+  // real tab candidate result is found.
+  BLOBNBOX* backup_result = nullptr;
+  // neighbour is the blob that is currently being investigated.
+  BLOBNBOX* neighbour = nullptr;
+  while ((neighbour = vsearch.NextVerticalSearch(top_to_bottom)) != nullptr) {
+    if (neighbour == bbox)
+      continue;
+    TBOX nbox = neighbour->bounding_box();
+    int n_y = (nbox.top() + nbox.bottom()) / 2;
+    if ((!top_to_bottom && n_y > start_y + p.max_v_gap) ||
+        (top_to_bottom && n_y < start_y - p.max_v_gap)) {
+      if (WithinTestRegion(2, x_start, start_y))
+        tprintf("Neighbour too far at (%d,%d)->(%d,%d)\n",
+                nbox.left(), nbox.bottom(), nbox.right(), nbox.top());
+      break;  // Gone far enough.
+    }
+    // It is CRITICAL to ensure that forward progress is made, (strictly
+    // in/decreasing n_y) or the caller could loop infinitely, while
+    // waiting for a sequence of blobs in a line to end.
+    // NextVerticalSearch alone does not guarantee this, as there may be
+    // more than one blob in a grid cell. See comment in AlignTabs.
+    if ((n_y < start_y) != top_to_bottom || nbox.y_overlap(box))
+      continue;  // Only look in the required direction.
+    if (result != nullptr && result->bounding_box().y_gap(nbox) > gridsize())
+      return result;  // This result is clear.
+    if (backup_result != nullptr && p.ragged && result == nullptr &&
+        backup_result->bounding_box().y_gap(nbox) > gridsize())
+      return backup_result;  // This result is clear.
+
+    // If the neighbouring blob is the wrong side of a separator line, then it
+    // "doesn't exist" as far as we are concerned.
+    int x_at_n_y = x_start + (n_y - start_y) * p.vertical.x() / p.vertical.y();
+    if (x_at_n_y < neighbour->left_crossing_rule() ||
+        x_at_n_y > neighbour->right_crossing_rule())
+      continue;  // Separator line in the way.
+    int n_left = nbox.left();
+    int n_right = nbox.right();
+    int n_x = p.right_tab ? n_right : n_left;
+    if (WithinTestRegion(2, x_start, start_y))
+      tprintf("neighbour at (%d,%d)->(%d,%d), n_x=%d, n_y=%d, xatn=%d\n",
+              nbox.left(), nbox.bottom(), nbox.right(), nbox.top(),
+              n_x, n_y, x_at_n_y);
+    if (p.right_tab &&
+        n_left < x_at_n_y + p.min_gutter &&
+        n_right > x_at_n_y + p.r_align_tolerance &&
+        (p.ragged || n_left < x_at_n_y + p.gutter_fraction * nbox.height())) {
+      // In the gutter so end of line.
+      if (bbox->right_tab_type() >= TT_MAYBE_ALIGNED)
+        bbox->set_right_tab_type(TT_DELETED);
+      *end_y = top_to_bottom ? nbox.top() : nbox.bottom();
+      if (WithinTestRegion(2, x_start, start_y))
+        tprintf("gutter\n");
+      return nullptr;
+    }
+    if (!p.right_tab &&
+        n_left < x_at_n_y - p.l_align_tolerance &&
+        n_right > x_at_n_y - p.min_gutter &&
+        (p.ragged || n_right > x_at_n_y - p.gutter_fraction * nbox.height())) {
+      // In the gutter so end of line.
+      if (bbox->left_tab_type() >= TT_MAYBE_ALIGNED)
+        bbox->set_left_tab_type(TT_DELETED);
+      *end_y = top_to_bottom ? nbox.top() : nbox.bottom();
+      if (WithinTestRegion(2, x_start, start_y))
+        tprintf("gutter\n");
+      return nullptr;
+    }
+    if ((p.right_tab && neighbour->leader_on_right()) ||
+        (!p.right_tab && neighbour->leader_on_left()))
+      continue;  // Neighbours of leaders are not allowed to be used.
+    if (n_x <= x_at_n_y + p.r_align_tolerance &&
+        n_x >= x_at_n_y - p.l_align_tolerance) {
+      // Aligned so keep it. If it is a marked tab save it as result,
+      // otherwise keep it as backup_result to return in case of later failure.
+      if (WithinTestRegion(2, x_start, start_y))
+        tprintf("aligned, seeking%d, l=%d, r=%d\n",
+                p.right_tab, neighbour->left_tab_type(),
+                neighbour->right_tab_type());
+      TabType n_type = p.right_tab ? neighbour->right_tab_type()
+                                   : neighbour->left_tab_type();
+      if (n_type != TT_NONE && (p.ragged || n_type != TT_MAYBE_RAGGED)) {
+        if (result == nullptr) {
+          result = neighbour;
+        } else {
+          // Keep the closest neighbour by Euclidean distance.
+          // This prevents it from picking a tab blob in another column.
+          const TBOX& old_box = result->bounding_box();
+          int x_diff = p.right_tab ? old_box.right() : old_box.left();
+          x_diff -= x_at_n_y;
+          int y_diff = (old_box.top() + old_box.bottom()) / 2 - start_y;
+          int old_dist = x_diff * x_diff + y_diff * y_diff;
+          x_diff = n_x - x_at_n_y;
+          y_diff = n_y - start_y;
+          int new_dist = x_diff * x_diff + y_diff * y_diff;
+          if (new_dist < old_dist)
+            result = neighbour;
+        }
+      } else if (backup_result == nullptr) {
+        if (WithinTestRegion(2, x_start, start_y))
+          tprintf("Backup\n");
+        backup_result = neighbour;
+      } else {
+        TBOX backup_box = backup_result->bounding_box();
+        if ((p.right_tab && backup_box.right() < nbox.right()) ||
+            (!p.right_tab && backup_box.left() > nbox.left())) {
+          if (WithinTestRegion(2, x_start, start_y))
+            tprintf("Better backup\n");
+          backup_result = neighbour;
+        }
+      }
+    }
+  }
+  return result != nullptr ? result : backup_result;
+}
+
+}  // namespace tesseract.
diff --git a/tesseract/src/textord/alignedblob.h b/tesseract/src/textord/alignedblob.h
new file mode 100644
index 00000000..e69b3354
--- /dev/null
+++ b/tesseract/src/textord/alignedblob.h
@@ -0,0 +1,124 @@
+///////////////////////////////////////////////////////////////////////
+// File:        alignedblob.h
+// Description: A class to find vertically aligned blobs in a BBGrid,
+//              and a struct to hold control parameters.
+// Author:      Ray Smith
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TEXTORD_ALIGNEDBLOB_H_
+#define TESSERACT_TEXTORD_ALIGNEDBLOB_H_
+
+#include "bbgrid.h"
+#include "blobbox.h"
+#include "tabvector.h"
+
+namespace tesseract {
+
+extern INT_VAR_H(textord_debug_bugs, 0,
+                 "Turn on output related to bugs in tab finding");
+extern INT_VAR_H(textord_debug_tabfind, 2, "Debug tab finding");
+extern BOOL_VAR_H(textord_debug_printable, false,
+                  "Make debug windows printable");
+
+// Simple structure to hold the search parameters for AlignedBlob.
+// The members are mostly derived from constants, which are
+// conditioned on the alignment parameter.
+// For finding vertical lines, a different set of constants are
+// used, conditioned on the different constructor.
+struct AlignedBlobParams {
+  // Constructor to set the parameters for finding aligned and ragged tabs.
+  // Vertical_x and vertical_y are the current estimates of the true vertical
+  // direction (up) in the image. Height is the height of the starter blob.
+  // v_gap_multiple is the multiple of height that will be used as a limit
+  // on vertical gap before giving up and calling the line ended.
+  // resolution is the original image resolution, and align0 indicates the
+  // type of tab stop to be found.
+  AlignedBlobParams(int vertical_x, int vertical_y, int height,
+                    int v_gap_multiple, int min_gutter_width, int resolution,
+                    TabAlignment alignment0);
+  // Constructor to set the parameters for finding vertical lines.
+  // Vertical_x and vertical_y are the current estimates of the true vertical
+  // direction (up) in the image. Width is the width of the starter blob.
+  AlignedBlobParams(int vertical_x, int vertical_y, int width);
+
+  // Fit the vertical vector into an ICOORD, which is 16 bit.
+  void set_vertical(int vertical_x, int vertical_y);
+
+  double gutter_fraction;  // Multiple of height used for min_gutter.
+  bool right_tab;          // We are looking at right edges.
+  bool ragged;             // We are looking for a ragged (vs aligned) edge.
+  TabAlignment alignment;  // The type we are trying to produce.
+  TabType confirmed_type;  // Type to flag blobs if accepted.
+  int max_v_gap;           // Max vertical gap to be tolerated.
+  int min_gutter;          // Minimum gutter between columns.
+  // Tolerances allowed on horizontal alignment of aligned edges.
+  int l_align_tolerance;   // Left edges.
+  int r_align_tolerance;   // Right edges.
+  // Conditions for accepting a line.
+  int min_points;          // Minimum number of points to be OK.
+  int min_length;          // Min length of completed line.
+
+  ICOORD vertical;         // Current estimate of logical vertical.
+};
+
+// The AlignedBlob class contains code to find vertically aligned blobs.
+// This is factored out into a separate class, so it can be used by both
+// vertical line finding (LineFind) and tabstop finding (TabFind).
+class TESS_API AlignedBlob : public BlobGrid {
+ public:
+  AlignedBlob(int gridsize, const ICOORD& bleft, const ICOORD& tright);
+  ~AlignedBlob() override;
+
+  // Return true if the given coordinates are within the test rectangle
+  // and the debug level is at least the given detail level.
+  static bool WithinTestRegion(int detail_level, int x, int y);
+
+  // Display the tab codes of the BLOBNBOXes in this grid.
+  ScrollView* DisplayTabs(const char* window_name, ScrollView* tab_win);
+
+  // Finds a vector corresponding to a set of vertically aligned blob edges
+  // running through the given box. The type of vector returned and the
+  // search parameters are determined by the AlignedBlobParams.
+  // vertical_x and y are updated with an estimate of the real
+  // vertical direction. (skew finding.)
+  // Returns nullptr if no decent vector can be found.
+  TabVector* FindVerticalAlignment(AlignedBlobParams align_params,
+                                   BLOBNBOX* bbox,
+                                   int* vertical_x, int* vertical_y);
+
+ private:
+  // Find a set of blobs that are aligned in the given vertical
+  // direction with the given blob. Returns a list of aligned
+  // blobs and the number in the list.
+  // For other parameters see FindAlignedBlob below.
+  int AlignTabs(const AlignedBlobParams& params,
+                bool top_to_bottom, BLOBNBOX* bbox,
+                BLOBNBOX_CLIST* good_points, int* end_y);
+
+  // Search vertically for a blob that is aligned with the input bbox.
+  // The search parameters are determined by AlignedBlobParams.
+  // top_to_bottom tells whether to search down or up.
+  // The return value is nullptr if nothing was found in the search box
+  // or if a blob was found in the gutter. On a nullptr return, end_y
+  // is set to the edge of the search box or the leading edge of the
+  // gutter blob if one was found.
+  BLOBNBOX* FindAlignedBlob(const AlignedBlobParams& p,
+                            bool top_to_bottom, BLOBNBOX* bbox,
+                            int x_start, int* end_y);
+};
+
+}  // namespace tesseract.
+
+#endif  // TESSERACT_TEXTORD_ALIGNEDBLOB_H_
diff --git a/tesseract/src/textord/baselinedetect.cpp b/tesseract/src/textord/baselinedetect.cpp
new file mode 100644
index 00000000..ef3b91c8
--- /dev/null
+++ b/tesseract/src/textord/baselinedetect.cpp
@@ -0,0 +1,869 @@
+///////////////////////////////////////////////////////////////////////
+// File:        baselinedetect.cpp
+// Description: Initial Baseline Determination.
+// Copyright 2012 Google Inc. All Rights Reserved.
+// Author:      rays@google.com (Ray Smith)
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#define _USE_MATH_DEFINES       // for M_PI
+
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "baselinedetect.h"
+
+#include <algorithm>
+#include <cfloat>               // for FLT_MAX
+#include <cmath>                // for M_PI
+#include "allheaders.h"
+#include "blobbox.h"
+#include "detlinefit.h"
+#include "drawtord.h"
+#include "helpers.h"
+#include "linlsq.h"
+#include "makerow.h"
+#include "textord.h"
+#include "tprintf.h"
+#include "underlin.h"
+
+// Number of displacement modes kept in displacement_modes_;
+const int kMaxDisplacementsModes = 3;
+// Number of points to skip when retrying initial fit.
+const int kNumSkipPoints = 3;
+// Max angle deviation (in radians) allowed to keep the independent baseline.
+const double kMaxSkewDeviation = 1.0 / 64;
+// Fraction of line spacing estimate for quantization of blob displacements.
+const double kOffsetQuantizationFactor = 3.0 / 64;
+// Fraction of line spacing estimate for computing blob fit error.
+const double kFitHalfrangeFactor = 6.0 / 64;
+// Max fraction of line spacing allowed before a baseline counts as badly fitting.
+const double kMaxBaselineError = 3.0 / 64;
+// Multiple of linespacing that sets max_blob_size in TO_BLOCK.
+// Copied from textord_excess_blobsize.
+const double kMaxBlobSizeMultiple = 1.3;
+// Min fraction of linespacing gaps that should be close to the model before
+// we will force the linespacing model on all the lines.
+const double kMinFittingLinespacings = 0.25;
+// A y-coordinate within a textline that is to be debugged.
+//#define kDebugYCoord 1525
+
+namespace tesseract {
+
+BaselineRow::BaselineRow(double line_spacing, TO_ROW* to_row)
+  : blobs_(to_row->blob_list()),
+    baseline_pt1_(0.0f, 0.0f), baseline_pt2_(0.0f, 0.0f),
+    baseline_error_(0.0), good_baseline_(false) {
+  ComputeBoundingBox();
+  // Compute a scale factor for rounding to ints.
+  disp_quant_factor_ = kOffsetQuantizationFactor * line_spacing;
+  fit_halfrange_ = kFitHalfrangeFactor * line_spacing;
+  max_baseline_error_ = kMaxBaselineError * line_spacing;
+}
+
+// Sets the TO_ROW with the output straight line.
+void BaselineRow::SetupOldLineParameters(TO_ROW* row) const {
+  // TODO(rays) get rid of this when m and c are no longer used.
+  double gradient = tan(BaselineAngle());
+  // para_c is the actual intercept of the baseline on the y-axis.
+  float para_c = StraightYAtX(0.0);
+  row->set_line(gradient, para_c, baseline_error_);
+  row->set_parallel_line(gradient, para_c, baseline_error_);
+}
+
+// Outputs diagnostic information.
+void BaselineRow::Print() const {
+  tprintf("Baseline (%g,%g)->(%g,%g), angle=%g, intercept=%g\n",
+          baseline_pt1_.x(), baseline_pt1_.y(),
+          baseline_pt2_.x(), baseline_pt2_.y(),
+          BaselineAngle(), StraightYAtX(0.0));
+  tprintf("Quant factor=%g, error=%g, good=%d, box:",
+          disp_quant_factor_, baseline_error_, good_baseline_);
+  bounding_box_.print();
+}
+
+// Returns the skew angle (in radians) of the current baseline in [-pi,pi].
+double BaselineRow::BaselineAngle() const {
+  FCOORD baseline_dir(baseline_pt2_ - baseline_pt1_);
+  double angle = baseline_dir.angle();
+  // Baseline directions are only unique in a range of pi so constrain to
+  // [-pi/2, pi/2].
+  return fmod(angle + M_PI * 1.5, M_PI) - M_PI * 0.5;
+}
+
+// Computes and returns the linespacing at the middle of the overlap
+// between this and other.
+double BaselineRow::SpaceBetween(const BaselineRow& other) const {
+  // Find the x-centre of overlap of the lines.
+  float x = (std::max(bounding_box_.left(), other.bounding_box_.left()) +
+          std::min(bounding_box_.right(), other.bounding_box_.right())) / 2.0f;
+  // Find the vertical centre between them.
+  float y = (StraightYAtX(x) + other.StraightYAtX(x)) / 2.0f;
+  // Find the perpendicular distance of (x,y) from each line.
+  FCOORD pt(x, y);
+  return PerpDistanceFromBaseline(pt) + other.PerpDistanceFromBaseline(pt);
+}
+
+// Computes and returns the displacement of the center of the line
+// perpendicular to the given direction.
+double BaselineRow::PerpDisp(const FCOORD& direction) const {
+  float middle_x = (bounding_box_.left() + bounding_box_.right()) / 2.0f;
+  FCOORD middle_pos(middle_x, StraightYAtX(middle_x));
+  return direction * middle_pos / direction.length();
+}
+
+// Computes the y coordinate at the given x using the straight baseline
+// defined by baseline_pt1_ and baseline_pt2__.
+double BaselineRow::StraightYAtX(double x) const {
+  double denominator = baseline_pt2_.x() - baseline_pt1_.x();
+  if (denominator == 0.0)
+    return (baseline_pt1_.y() + baseline_pt2_.y()) / 2.0;
+  return baseline_pt1_.y() +
+      (x - baseline_pt1_.x()) * (baseline_pt2_.y() - baseline_pt1_.y()) /
+          denominator;
+}
+
+// Fits a straight baseline to the points. Returns true if it had enough
+// points to be reasonably sure of the fitted baseline.
+// If use_box_bottoms is false, baselines positions are formed by
+// considering the outlines of the blobs.
+bool BaselineRow::FitBaseline(bool use_box_bottoms) {
+  // Deterministic fitting is used wherever possible.
+  fitter_.Clear();
+  // Linear least squares is a backup if the DetLineFit produces a bad line.
+  LLSQ llsq;
+  BLOBNBOX_IT blob_it(blobs_);
+
+  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+    BLOBNBOX* blob = blob_it.data();
+    if (!use_box_bottoms) blob->EstimateBaselinePosition();
+    const TBOX& box = blob->bounding_box();
+    int x_middle = (box.left() + box.right()) / 2;
+#ifdef kDebugYCoord
+    if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) {
+      tprintf("Box bottom = %d, baseline pos=%d for box at:",
+              box.bottom(), blob->baseline_position());
+      box.print();
+    }
+#endif
+    fitter_.Add(ICOORD(x_middle, blob->baseline_position()), box.width() / 2);
+    llsq.add(x_middle, blob->baseline_position());
+  }
+  // Fit the line.
+  ICOORD pt1, pt2;
+  baseline_error_ = fitter_.Fit(&pt1, &pt2);
+  baseline_pt1_ = pt1;
+  baseline_pt2_ = pt2;
+  if (baseline_error_ > max_baseline_error_ &&
+      fitter_.SufficientPointsForIndependentFit()) {
+    // The fit was bad but there were plenty of points, so try skipping
+    // the first and last few, and use the new line if it dramatically improves
+    // the error of fit.
+    double error = fitter_.Fit(kNumSkipPoints, kNumSkipPoints, &pt1, &pt2);
+    if (error < baseline_error_ / 2.0) {
+      baseline_error_ = error;
+      baseline_pt1_ = pt1;
+      baseline_pt2_ = pt2;
+    }
+  }
+  int debug = 0;
+#ifdef kDebugYCoord
+  Print();
+  debug = bounding_box_.bottom() < kDebugYCoord &&
+      bounding_box_.top() > kDebugYCoord
+            ? 3 : 2;
+#endif
+  // Now we obtained a direction from that fit, see if we can improve the
+  // fit using the same direction and some other start point.
+  FCOORD direction(pt2 - pt1);
+  double target_offset = direction * pt1;
+  good_baseline_ = false;
+  FitConstrainedIfBetter(debug, direction, 0.0, target_offset);
+  // Wild lines can be produced because DetLineFit allows vertical lines, but
+  // vertical text has been rotated so angles over pi/4 should be disallowed.
+  // Near vertical lines can still be produced by vertically aligned components
+  // on very short lines.
+  double angle = BaselineAngle();
+  if (fabs(angle) > M_PI * 0.25) {
+    // Use the llsq fit as a backup.
+    baseline_pt1_ = llsq.mean_point();
+    baseline_pt2_ = baseline_pt1_ + FCOORD(1.0f, llsq.m());
+    // TODO(rays) get rid of this when m and c are no longer used.
+    double m = llsq.m();
+    double c = llsq.c(m);
+    baseline_error_ = llsq.rms(m, c);
+    good_baseline_ = false;
+  }
+  return good_baseline_;
+}
+
+// Modifies an existing result of FitBaseline to be parallel to the given
+// direction vector if that produces a better result.
+void BaselineRow::AdjustBaselineToParallel(int debug,
+                                           const FCOORD& direction) {
+  SetupBlobDisplacements(direction);
+  if (displacement_modes_.empty())
+    return;
+#ifdef kDebugYCoord
+  if (bounding_box_.bottom() < kDebugYCoord &&
+      bounding_box_.top() > kDebugYCoord && debug < 3)
+    debug = 3;
+#endif
+  FitConstrainedIfBetter(debug, direction, 0.0, displacement_modes_[0]);
+}
+
+// Modifies the baseline to snap to the textline grid if the existing
+// result is not good enough.
+double BaselineRow::AdjustBaselineToGrid(int debug,
+                                         const FCOORD& direction,
+                                         double line_spacing,
+                                         double line_offset) {
+  if (blobs_->empty()) {
+    if (debug > 1) {
+      tprintf("Row empty at:");
+      bounding_box_.print();
+    }
+    return line_offset;
+  }
+  // Find the displacement_modes_ entry nearest to the grid.
+  double best_error = 0.0;
+  int best_index = -1;
+  for (int i = 0; i < displacement_modes_.size(); ++i) {
+    double blob_y = displacement_modes_[i];
+    double error = BaselineBlock::SpacingModelError(blob_y, line_spacing,
+                                                    line_offset);
+    if (debug > 1) {
+      tprintf("Mode at %g has error %g from model \n", blob_y, error);
+    }
+    if (best_index < 0 || error < best_error) {
+      best_error = error;
+      best_index = i;
+    }
+  }
+  // We will move the baseline only if the chosen mode is close enough to the
+  // model.
+  double model_margin = max_baseline_error_ - best_error;
+  if (best_index >= 0 && model_margin > 0.0) {
+    // But if the current baseline is already close to the mode there is no
+    // point, and only the potential to damage accuracy by changing its angle.
+    double perp_disp = PerpDisp(direction);
+    double shift = displacement_modes_[best_index] - perp_disp;
+    if (fabs(shift) > max_baseline_error_) {
+      if (debug > 1) {
+        tprintf("Attempting linespacing model fit with mode %g to row at:",
+                displacement_modes_[best_index]);
+        bounding_box_.print();
+      }
+      FitConstrainedIfBetter(debug, direction, model_margin,
+                             displacement_modes_[best_index]);
+    } else if (debug > 1) {
+      tprintf("Linespacing model only moves current line by %g for row at:",
+              shift);
+      bounding_box_.print();
+    }
+  } else if (debug > 1) {
+    tprintf("Linespacing model not close enough to any mode for row at:");
+    bounding_box_.print();
+  }
+  return fmod(PerpDisp(direction), line_spacing);
+}
+
+// Sets up displacement_modes_ with the top few modes of the perpendicular
+// distance of each blob from the given direction vector, after rounding.
+void BaselineRow::SetupBlobDisplacements(const FCOORD& direction) {
+  // Set of perpendicular displacements of the blob bottoms from the required
+  // baseline direction.
+  GenericVector<double> perp_blob_dists;
+  displacement_modes_.truncate(0);
+  // Gather the skew-corrected position of every blob.
+  double min_dist = FLT_MAX;
+  double max_dist = -FLT_MAX;
+  BLOBNBOX_IT blob_it(blobs_);
+#ifdef kDebugYCoord
+  bool debug = false;
+#endif
+  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+    BLOBNBOX* blob = blob_it.data();
+    const TBOX& box = blob->bounding_box();
+#ifdef kDebugYCoord
+    if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) debug = true;
+#endif
+    FCOORD blob_pos((box.left() + box.right()) / 2.0f,
+                    blob->baseline_position());
+    double offset = direction * blob_pos;
+    perp_blob_dists.push_back(offset);
+#ifdef kDebugYCoord
+    if (debug) {
+      tprintf("Displacement %g for blob at:", offset);
+      box.print();
+    }
+#endif
+    UpdateRange(offset, &min_dist, &max_dist);
+  }
+  // Set up a histogram using disp_quant_factor_ as the bucket size.
+  STATS dist_stats(IntCastRounded(min_dist / disp_quant_factor_),
+                   IntCastRounded(max_dist / disp_quant_factor_) + 1);
+  for (int i = 0; i < perp_blob_dists.size(); ++i) {
+    dist_stats.add(IntCastRounded(perp_blob_dists[i] / disp_quant_factor_), 1);
+  }
+  GenericVector<KDPairInc<float, int> > scaled_modes;
+  dist_stats.top_n_modes(kMaxDisplacementsModes, &scaled_modes);
+#ifdef kDebugYCoord
+  if (debug) {
+    for (int i = 0; i < scaled_modes.size(); ++i) {
+      tprintf("Top mode = %g * %d\n",
+              scaled_modes[i].key * disp_quant_factor_, scaled_modes[i].data());
+    }
+  }
+#endif
+  for (int i = 0; i < scaled_modes.size(); ++i)
+    displacement_modes_.push_back(disp_quant_factor_ * scaled_modes[i].key());
+}
+
+// Fits a line in the given direction to blobs that are close to the given
+// target_offset perpendicular displacement from the direction. The fit
+// error is allowed to be cheat_allowance worse than the existing fit, and
+// will still be used.
+// If cheat_allowance > 0, the new fit will be good and replace the current
+// fit if it has better fit (with cheat) OR its error is below
+// max_baseline_error_ and the old fit is marked bad.
+// Otherwise the new fit will only replace the old if it is really better,
+// or the old fit is marked bad and the new fit has sufficient points, as
+// well as being within the max_baseline_error_.
+void BaselineRow::FitConstrainedIfBetter(int debug,
+                                         const FCOORD& direction,
+                                         double cheat_allowance,
+                                         double target_offset) {
+  double halfrange = fit_halfrange_ * direction.length();
+  double min_dist = target_offset - halfrange;
+  double max_dist = target_offset + halfrange;
+  ICOORD line_pt;
+  double new_error = fitter_.ConstrainedFit(direction, min_dist, max_dist,
+                                            debug > 2, &line_pt);
+  // Allow cheat_allowance off the new error
+  new_error -= cheat_allowance;
+  double old_angle = BaselineAngle();
+  double new_angle = direction.angle();
+  if (debug > 1) {
+    tprintf("Constrained error = %g, original = %g",
+            new_error, baseline_error_);
+    tprintf(" angles = %g, %g, delta=%g vs threshold %g\n",
+            old_angle, new_angle,
+            new_angle - old_angle, kMaxSkewDeviation);
+  }
+  bool new_good_baseline = new_error <= max_baseline_error_ &&
+      (cheat_allowance > 0.0 || fitter_.SufficientPointsForIndependentFit());
+  // The new will replace the old if any are true:
+  // 1. the new error is better
+  // 2. the old is NOT good, but the new is
+  // 3. there is a wild angular difference between them (assuming that the new
+  //    is a better guess at the angle.)
+  if (new_error <= baseline_error_ ||
+      (!good_baseline_ && new_good_baseline) ||
+      fabs(new_angle - old_angle) > kMaxSkewDeviation) {
+    baseline_error_ = new_error;
+    baseline_pt1_ = line_pt;
+    baseline_pt2_ = baseline_pt1_ + direction;
+    good_baseline_ = new_good_baseline;
+    if (debug > 1) {
+      tprintf("Replacing with constrained baseline, good = %d\n",
+              good_baseline_);
+    }
+  } else if (debug > 1) {
+    tprintf("Keeping old baseline\n");
+  }
+}
+
+// Returns the perpendicular distance of the point from the straight
+// baseline.
+float BaselineRow::PerpDistanceFromBaseline(const FCOORD& pt) const {
+  FCOORD baseline_vector(baseline_pt2_ - baseline_pt1_);
+  FCOORD offset_vector(pt - baseline_pt1_);
+  float distance = baseline_vector * offset_vector;
+  float sqlength = baseline_vector.sqlength();
+  if (sqlength == 0.0f) {
+    tprintf("unexpected baseline vector (0,0)\n");
+    return 0.0f;
+  }
+  return std::sqrt(distance * distance / sqlength);
+}
+
+// Computes the bounding box of the row.
+void BaselineRow::ComputeBoundingBox() {
+  BLOBNBOX_IT it(blobs_);
+  TBOX box;
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    box += it.data()->bounding_box();
+  }
+  bounding_box_ = box;
+}
+
+
+BaselineBlock::BaselineBlock(int debug_level, bool non_text, TO_BLOCK* block)
+  : block_(block), debug_level_(debug_level), non_text_block_(non_text),
+    good_skew_angle_(false), skew_angle_(0.0),
+    line_spacing_(block->line_spacing), line_offset_(0.0), model_error_(0.0) {
+  TO_ROW_IT row_it(block_->get_rows());
+  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+    // Sort the blobs on the rows.
+    row_it.data()->blob_list()->sort(blob_x_order);
+    rows_.push_back(new BaselineRow(block->line_spacing, row_it.data()));
+  }
+}
+
+// Computes and returns the absolute error of the given perp_disp from the
+// given linespacing model.
+double BaselineBlock::SpacingModelError(double perp_disp, double line_spacing,
+                                        double line_offset) {
+  // Round to the nearest multiple of line_spacing + line offset.
+  int multiple = IntCastRounded((perp_disp - line_offset) / line_spacing);
+  double model_y = line_spacing * multiple + line_offset;
+  return fabs(perp_disp - model_y);
+}
+
+// Fits straight line baselines and computes the skew angle from the
+// median angle. Returns true if a good angle is found.
+// If use_box_bottoms is false, baseline positions are formed by
+// considering the outlines of the blobs.
+bool BaselineBlock::FitBaselinesAndFindSkew(bool use_box_bottoms) {
+  if (non_text_block_) return false;
+  GenericVector<double> angles;
+  for (int r = 0; r < rows_.size(); ++r) {
+    BaselineRow* row = rows_[r];
+    if (row->FitBaseline(use_box_bottoms)) {
+      double angle = row->BaselineAngle();
+      angles.push_back(angle);
+    }
+    if (debug_level_ > 1)
+      row->Print();
+  }
+
+  if (!angles.empty()) {
+    skew_angle_ = MedianOfCircularValues(M_PI, &angles);
+    good_skew_angle_ = true;
+  } else {
+    skew_angle_ = 0.0f;
+    good_skew_angle_ = false;
+  }
+  if (debug_level_ > 0) {
+    tprintf("Initial block skew angle = %g, good = %d\n",
+            skew_angle_, good_skew_angle_);
+  }
+  return good_skew_angle_;
+}
+
+// Refits the baseline to a constrained angle, using the stored block
+// skew if good enough, otherwise the supplied default skew.
+void BaselineBlock::ParallelizeBaselines(double default_block_skew) {
+  if (non_text_block_) return;
+  if (!good_skew_angle_) skew_angle_ = default_block_skew;
+  if (debug_level_ > 0)
+    tprintf("Adjusting block to skew angle %g\n", skew_angle_);
+  FCOORD direction(cos(skew_angle_), sin(skew_angle_));
+  for (int r = 0; r < rows_.size(); ++r) {
+    BaselineRow* row = rows_[r];
+    row->AdjustBaselineToParallel(debug_level_, direction);
+    if (debug_level_ > 1)
+      row->Print();
+  }
+  if (rows_.size() < 3 || !ComputeLineSpacing())
+    return;
+  // Enforce the line spacing model on all lines that don't yet have a good
+  // baseline.
+  // Start by finding the row that is best fitted to the model.
+  int best_row = 0;
+  double best_error = SpacingModelError(rows_[0]->PerpDisp(direction),
+                                        line_spacing_, line_offset_);
+  for (int r = 1; r < rows_.size(); ++r) {
+    double error = SpacingModelError(rows_[r]->PerpDisp(direction),
+                                     line_spacing_, line_offset_);
+    if (error < best_error) {
+      best_error = error;
+      best_row = r;
+    }
+  }
+  // Starting at the best fitting row, work outwards, syncing the offset.
+  double offset = line_offset_;
+  for (int r = best_row + 1; r < rows_.size(); ++r) {
+    offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction,
+                                            line_spacing_, offset);
+  }
+  offset = line_offset_;
+  for (int r = best_row - 1; r >= 0; --r) {
+    offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction,
+                                            line_spacing_, offset);
+  }
+}
+
+// Sets the parameters in TO_BLOCK that are needed by subsequent processes.
+void BaselineBlock::SetupBlockParameters() const {
+  if (line_spacing_ > 0.0) {
+    // Where was block_line_spacing set before?
+    float min_spacing = std::min(block_->line_spacing, static_cast<float>(line_spacing_));
+    if (min_spacing < block_->line_size)
+      block_->line_size = min_spacing;
+    block_->line_spacing = line_spacing_;
+    block_->baseline_offset = line_offset_;
+    block_->max_blob_size = line_spacing_ * kMaxBlobSizeMultiple;
+  }
+  // Setup the parameters on all the rows.
+  TO_ROW_IT row_it(block_->get_rows());
+  for (int r = 0; r < rows_.size(); ++r, row_it.forward()) {
+    BaselineRow* row = rows_[r];
+    TO_ROW* to_row = row_it.data();
+    row->SetupOldLineParameters(to_row);
+  }
+}
+
+// Processing that is required before fitting baseline splines, but requires
+// linear baselines in order to be successful:
+//   Removes noise if required
+//   Separates out underlines
+//   Pre-associates blob fragments.
+// TODO(rays/joeliu) This entire section of code is inherited from the past
+// and could be improved/eliminated.
+// page_tr is used to size a debug window.
+void BaselineBlock::PrepareForSplineFitting(ICOORD page_tr, bool remove_noise) {
+  if (non_text_block_) return;
+  if (remove_noise) {
+    vigorous_noise_removal(block_);
+  }
+  FCOORD rotation(1.0f, 0.0f);
+  double gradient = tan(skew_angle_);
+  separate_underlines(block_, gradient, rotation, true);
+  pre_associate_blobs(page_tr, block_, rotation, true);
+}
+
+// Fits splines to the textlines, or creates fake QSPLINES from the straight
+// baselines that are already on the TO_ROWs.
+// As a side-effect, computes the xheights of the rows and the block.
+// Although x-height estimation is conceptually separate, it is part of
+// detecting perspective distortion and therefore baseline fitting.
+void BaselineBlock::FitBaselineSplines(bool enable_splines,
+                                       bool show_final_rows,
+                                       Textord* textord) {
+  double gradient = tan(skew_angle_);
+  FCOORD rotation(1.0f, 0.0f);
+
+  if (enable_splines) {
+    textord->make_spline_rows(block_, gradient, show_final_rows);
+  } else {
+    // Make a fake spline from the existing line.
+    TBOX block_box= block_->block->pdblk.bounding_box();
+    TO_ROW_IT row_it = block_->get_rows();
+    for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+      TO_ROW* row = row_it.data();
+      int32_t xstarts[2] = { block_box.left(), block_box.right() };
+      double coeffs[3] = { 0.0, row->line_m(), row->line_c() };
+      row->baseline = QSPLINE(1, xstarts, coeffs);
+      textord->compute_row_xheight(row, block_->block->classify_rotation(),
+                                   row->line_m(), block_->line_size);
+    }
+  }
+  textord->compute_block_xheight(block_, gradient);
+  block_->block->set_xheight(block_->xheight);
+  if (textord_restore_underlines)  // fix underlines
+    restore_underlined_blobs(block_);
+}
+
+#ifndef GRAPHICS_DISABLED
+
+// Draws the (straight) baselines and final blobs colored according to
+// what was discarded as noise and what is associated with each row.
+void BaselineBlock::DrawFinalRows(const ICOORD& page_tr) {
+  if (non_text_block_) return;
+  double gradient = tan(skew_angle_);
+  FCOORD rotation(1.0f, 0.0f);
+  int left_edge = block_->block->pdblk.bounding_box().left();
+  ScrollView* win = create_to_win(page_tr);
+  ScrollView::Color colour = ScrollView::RED;
+  TO_ROW_IT row_it = block_->get_rows();
+  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+    plot_parallel_row(row_it.data(), gradient, left_edge, colour, rotation);
+    colour = static_cast<ScrollView::Color>(colour + 1);
+    if (colour > ScrollView::MAGENTA)
+      colour = ScrollView::RED;
+  }
+  plot_blob_list(win, &block_->blobs, ScrollView::MAGENTA, ScrollView::WHITE);
+  // Show discarded blobs.
+  plot_blob_list(win, &block_->underlines,
+                 ScrollView::YELLOW, ScrollView::CORAL);
+  if (block_->blobs.length() > 0)
+    tprintf("%d blobs discarded as noise\n", block_->blobs.length());
+  draw_meanlines(block_, gradient, left_edge, ScrollView::WHITE, rotation);
+}
+
+#endif // !GRAPHICS_DISABLED
+
+void BaselineBlock::DrawPixSpline(Pix* pix_in) {
+  if (non_text_block_) return;
+  TO_ROW_IT row_it = block_->get_rows();
+  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+    row_it.data()->baseline.plot(pix_in);
+  }
+}
+
+// Top-level line-spacing calculation. Computes an estimate of the line-
+// spacing, using the current baselines in the TO_ROWS of the block, and
+// then refines it by fitting a regression line to the baseline positions
+// as a function of their integer index.
+// Returns true if it seems that the model is a reasonable fit to the
+// observations.
+bool BaselineBlock::ComputeLineSpacing() {
+  FCOORD direction(cos(skew_angle_), sin(skew_angle_));
+  GenericVector<double> row_positions;
+  ComputeBaselinePositions(direction, &row_positions);
+  if (row_positions.size() < 2) return false;
+  EstimateLineSpacing();
+  RefineLineSpacing(row_positions);
+  // Verify that the model is reasonable.
+  double max_baseline_error = kMaxBaselineError * line_spacing_;
+  int non_trivial_gaps = 0;
+  int fitting_gaps = 0;
+  for (int i = 1; i < row_positions.size(); ++i) {
+    double row_gap = fabs(row_positions[i - 1] - row_positions[i]);
+    if (row_gap > max_baseline_error) {
+      ++non_trivial_gaps;
+      if (fabs(row_gap - line_spacing_) <= max_baseline_error)
+        ++fitting_gaps;
+    }
+  }
+  if (debug_level_ > 0) {
+    tprintf("Spacing %g, in %d rows, %d gaps fitted out of %d non-trivial\n",
+            line_spacing_, row_positions.size(), fitting_gaps,
+            non_trivial_gaps);
+  }
+  return fitting_gaps > non_trivial_gaps * kMinFittingLinespacings;
+}
+
+// Computes the deskewed vertical position of each baseline in the block and
+// stores them in the given vector.
+// This is calculated as the perpendicular distance of the middle of each
+// baseline (in case it has a different skew angle) from the line passing
+// through the origin parallel to the block baseline angle.
+// NOTE that "distance" above is a signed quantity so we can tell which side
+// of the block baseline a line sits, hence the function and argument name
+// positions not distances.
+void BaselineBlock::ComputeBaselinePositions(const FCOORD& direction,
+                                             GenericVector<double>* positions) {
+  positions->clear();
+  for (int r = 0; r < rows_.size(); ++r) {
+    BaselineRow* row = rows_[r];
+    const TBOX& row_box = row->bounding_box();
+    float x_middle = (row_box.left() + row_box.right()) / 2.0f;
+    FCOORD row_pos(x_middle, static_cast<float>(row->StraightYAtX(x_middle)));
+    float offset = direction * row_pos;
+    positions->push_back(offset);
+  }
+}
+
+// Computes an estimate of the line spacing of the block from the median
+// of the spacings between adjacent overlapping textlines.
+void BaselineBlock::EstimateLineSpacing() {
+  GenericVector<float> spacings;
+  for (int r = 0; r < rows_.size(); ++r) {
+    BaselineRow* row = rows_[r];
+    // Exclude silly lines.
+    if (fabs(row->BaselineAngle()) > M_PI * 0.25) continue;
+    // Find the first row after row that overlaps it significantly.
+    const TBOX& row_box = row->bounding_box();
+    int r2;
+    for (r2 = r + 1; r2 < rows_.size() &&
+         !row_box.major_x_overlap(rows_[r2]->bounding_box());
+         ++r2);
+    if (r2 < rows_.size()) {
+      BaselineRow* row2 = rows_[r2];
+      // Exclude silly lines.
+      if (fabs(row2->BaselineAngle()) > M_PI * 0.25) continue;
+      float spacing = row->SpaceBetween(*row2);
+      spacings.push_back(spacing);
+    }
+  }
+  // If we have at least one value, use it, otherwise leave the previous
+  // value unchanged.
+  if (!spacings.empty()) {
+    line_spacing_ = spacings[spacings.choose_nth_item(spacings.size() / 2)];
+    if (debug_level_ > 1)
+      tprintf("Estimate of linespacing = %g\n", line_spacing_);
+  }
+}
+
+// Refines the line spacing of the block by fitting a regression
+// line to the deskewed y-position of each baseline as a function of its
+// estimated line index, allowing for a small error in the initial linespacing
+// and choosing the best available model.
+void BaselineBlock::RefineLineSpacing(const GenericVector<double>& positions) {
+  double spacings[3], offsets[3], errors[3];
+  int index_range;
+  errors[0] = FitLineSpacingModel(positions, line_spacing_,
+                                  &spacings[0], &offsets[0], &index_range);
+  if (index_range > 1) {
+    double spacing_plus = line_spacing_ / (1.0 + 1.0 / index_range);
+    // Try the hypotheses that there might be index_range +/- 1 line spaces.
+    errors[1] = FitLineSpacingModel(positions, spacing_plus,
+                                    &spacings[1], &offsets[1], nullptr);
+    double spacing_minus = line_spacing_ / (1.0 - 1.0 / index_range);
+    errors[2] = FitLineSpacingModel(positions, spacing_minus,
+                                    &spacings[2], &offsets[2], nullptr);
+    for (int i = 1; i <= 2; ++i) {
+      if (errors[i] < errors[0]) {
+        spacings[0] = spacings[i];
+        offsets[0] = offsets[i];
+        errors[0] = errors[i];
+      }
+    }
+  }
+  if (spacings[0] > 0.0) {
+    line_spacing_ = spacings[0];
+    line_offset_ = offsets[0];
+    model_error_ = errors[0];
+    if (debug_level_ > 0) {
+      tprintf("Final linespacing model = %g + offset %g, error %g\n",
+              line_spacing_, line_offset_, model_error_);
+    }
+  }
+}
+
+// Given an initial estimate of line spacing (m_in) and the positions of each
+// baseline, computes the line spacing of the block more accurately in m_out,
+// and the corresponding intercept in c_out, and the number of spacings seen
+// in index_delta. Returns the error of fit to the line spacing model.
+// Uses a simple linear regression, but optimized the offset using the median.
+double BaselineBlock::FitLineSpacingModel(
+    const GenericVector<double>& positions, double m_in,
+    double* m_out, double* c_out, int* index_delta) {
+  if (m_in == 0.0f || positions.size() < 2) {
+    *m_out = m_in;
+    *c_out = 0.0;
+    if (index_delta != nullptr) *index_delta = 0;
+    return 0.0;
+  }
+  GenericVector<double> offsets;
+  // Get the offset (remainder) linespacing for each line and choose the median.
+  for (int i = 0; i < positions.size(); ++i)
+    offsets.push_back(fmod(positions[i], m_in));
+  // Get the median offset.
+  double median_offset = MedianOfCircularValues(m_in, &offsets);
+  // Now fit a line to quantized line number and offset.
+  LLSQ llsq;
+  int min_index = INT32_MAX;
+  int max_index = -INT32_MAX;
+  for (int i = 0; i < positions.size(); ++i) {
+    double y_pos = positions[i];
+    int row_index = IntCastRounded((y_pos - median_offset) / m_in);
+    UpdateRange(row_index, &min_index, &max_index);
+    llsq.add(row_index, y_pos);
+  }
+  // Get the refined line spacing.
+  *m_out = llsq.m();
+  // Use the median offset rather than the mean.
+  offsets.truncate(0);
+  for (int i = 0; i < positions.size(); ++i)
+    offsets.push_back(fmod(positions[i], *m_out));
+  // Get the median offset.
+  if (debug_level_ > 2) {
+    for (int i = 0; i < offsets.size(); ++i)
+      tprintf("%d: %g\n", i, offsets[i]);
+  }
+  *c_out = MedianOfCircularValues(*m_out, &offsets);
+  if (debug_level_ > 1) {
+    tprintf("Median offset = %g, compared to mean of %g.\n",
+            *c_out, llsq.c(*m_out));
+  }
+  // Index_delta is the number of hypothesized line gaps present.
+  if (index_delta != nullptr)
+    *index_delta = max_index - min_index;
+  // Use the regression model's intercept to compute the error, as it may be
+  // a full line-spacing in disagreement with the median.
+  double rms_error = llsq.rms(*m_out, llsq.c(*m_out));
+  if (debug_level_ > 1) {
+    tprintf("Linespacing of y=%g x + %g improved to %g x + %g, rms=%g\n",
+            m_in, median_offset, *m_out, *c_out, rms_error);
+  }
+  return rms_error;
+}
+
+BaselineDetect::BaselineDetect(int debug_level, const FCOORD& page_skew,
+                               TO_BLOCK_LIST* blocks)
+    : page_skew_(page_skew), debug_level_(debug_level) {
+  TO_BLOCK_IT it(blocks);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    TO_BLOCK* to_block = it.data();
+    BLOCK* block = to_block->block;
+    POLY_BLOCK* pb = block->pdblk.poly_block();
+    // A note about non-text blocks.
+    // On output, non-text blocks are supposed to contain a single empty word
+    // in each incoming text line. These mark out the polygonal bounds of the
+    // block. Ideally no baselines should be required, but currently
+    // make_words crashes if a baseline and xheight are not provided, so we
+    // include non-text blocks here, but flag them for special treatment.
+    bool non_text = pb != nullptr && !pb->IsText();
+    blocks_.push_back(new BaselineBlock(debug_level_, non_text, to_block));
+  }
+}
+
+// Finds the initial baselines for each TO_ROW in each TO_BLOCK, gathers
+// block-wise and page-wise data to smooth small blocks/rows, and applies
+// smoothing based on block/page-level skew and block-level linespacing.
+void BaselineDetect::ComputeStraightBaselines(bool use_box_bottoms) {
+  GenericVector<double> block_skew_angles;
+  for (int i = 0; i < blocks_.size(); ++i) {
+    BaselineBlock* bl_block = blocks_[i];
+    if (debug_level_ > 0)
+      tprintf("Fitting initial baselines...\n");
+    if (bl_block->FitBaselinesAndFindSkew(use_box_bottoms)) {
+      block_skew_angles.push_back(bl_block->skew_angle());
+    }
+  }
+  // Compute a page-wide default skew for blocks with too little information.
+  double default_block_skew = page_skew_.angle();
+  if (!block_skew_angles.empty()) {
+    default_block_skew = MedianOfCircularValues(M_PI, &block_skew_angles);
+  }
+  if (debug_level_ > 0) {
+    tprintf("Page skew angle = %g\n", default_block_skew);
+  }
+  // Set bad lines in each block to the default block skew and then force fit
+  // a linespacing model where it makes sense to do so.
+  for (int i = 0; i < blocks_.size(); ++i) {
+    BaselineBlock* bl_block = blocks_[i];
+    bl_block->ParallelizeBaselines(default_block_skew);
+    bl_block->SetupBlockParameters();  // This replaced compute_row_stats.
+  }
+}
+
+// Computes the baseline splines for each TO_ROW in each TO_BLOCK and
+// other associated side-effects, including pre-associating blobs, computing
+// x-heights and displaying debug information.
+// NOTE that ComputeStraightBaselines must have been called first as this
+// sets up data in the TO_ROWs upon which this function depends.
+void BaselineDetect::ComputeBaselineSplinesAndXheights(const ICOORD& page_tr,
+                                                       bool enable_splines,
+                                                       bool remove_noise,
+                                                       bool show_final_rows,
+                                                      Textord* textord) {
+  for (int i = 0; i < blocks_.size(); ++i) {
+    BaselineBlock* bl_block = blocks_[i];
+    if (enable_splines)
+      bl_block->PrepareForSplineFitting(page_tr, remove_noise);
+    bl_block->FitBaselineSplines(enable_splines, show_final_rows, textord);
+#ifndef GRAPHICS_DISABLED
+    if (show_final_rows) {
+      bl_block->DrawFinalRows(page_tr);
+    }
+#endif
+  }
+}
+
+}  // namespace tesseract.
diff --git a/tesseract/src/textord/baselinedetect.h b/tesseract/src/textord/baselinedetect.h
new file mode 100644
index 00000000..579558ed
--- /dev/null
+++ b/tesseract/src/textord/baselinedetect.h
@@ -0,0 +1,276 @@
+///////////////////////////////////////////////////////////////////////
+// File:        baselinedetect.h
+// Description: Initial Baseline Determination.
+// Copyright 2012 Google Inc. All Rights Reserved.
+// Author:      rays@google.com (Ray Smith)
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TEXTORD_BASELINEDETECT_H_
+#define TESSERACT_TEXTORD_BASELINEDETECT_H_
+
+#include "detlinefit.h"
+#include "points.h"
+#include "rect.h"
+
+#include "genericvector.h"
+
+struct Pix;
+
+namespace tesseract {
+
+class Textord;
+class BLOBNBOX_LIST;
+class TO_BLOCK;
+class TO_BLOCK_LIST;
+class TO_ROW;
+
+// Class to compute and hold baseline data for a TO_ROW.
+class BaselineRow {
+ public:
+  BaselineRow(double line_size, TO_ROW* to_row);
+
+  const TBOX& bounding_box() const {
+    return bounding_box_;
+  }
+  // Sets the TO_ROW with the output straight line.
+  void SetupOldLineParameters(TO_ROW* row) const;
+
+  // Outputs diagnostic information.
+  void Print() const;
+
+  // Returns the skew angle (in radians) of the current baseline in [-pi,pi].
+  double BaselineAngle() const;
+  // Computes and returns the linespacing at the middle of the overlap
+  // between this and other.
+  double SpaceBetween(const BaselineRow& other) const;
+  // Computes and returns the displacement of the center of the line
+  // perpendicular to the given direction.
+  double PerpDisp(const FCOORD& direction) const;
+  // Computes the y coordinate at the given x using the straight baseline
+  // defined by baseline1_ and baseline2_.
+  double StraightYAtX(double x) const;
+
+  // Fits a straight baseline to the points. Returns true if it had enough
+  // points to be reasonably sure of the fitted baseline.
+  // If use_box_bottoms is false, baselines positions are formed by
+  // considering the outlines of the blobs.
+  bool FitBaseline(bool use_box_bottoms);
+  // Modifies an existing result of FitBaseline to be parallel to the given
+  // vector if that produces a better result.
+  void AdjustBaselineToParallel(int debug, const FCOORD& direction);
+  // Modifies the baseline to snap to the textline grid if the existing
+  // result is not good enough.
+  double AdjustBaselineToGrid(int debug, const FCOORD& direction,
+                              double line_spacing, double line_offset);
+
+ private:
+  // Sets up displacement_modes_ with the top few modes of the perpendicular
+  // distance of each blob from the given direction vector, after rounding.
+  void SetupBlobDisplacements(const FCOORD& direction);
+
+  // Fits a line in the given direction to blobs that are close to the given
+  // target_offset perpendicular displacement from the direction. The fit
+  // error is allowed to be cheat_allowance worse than the existing fit, and
+  // will still be used.
+  // If cheat_allowance > 0, the new fit will be good and replace the current
+  // fit if it has better fit (with cheat) OR its error is below
+  // max_baseline_error_ and the old fit is marked bad.
+  // Otherwise the new fit will only replace the old if it is really better,
+  // or the old fit is marked bad and the new fit has sufficient points, as
+  // well as being within the max_baseline_error_.
+  void FitConstrainedIfBetter(int debug, const FCOORD& direction,
+                              double cheat_allowance,
+                              double target_offset);
+  // Returns the perpendicular distance of the point from the straight
+  // baseline.
+  float PerpDistanceFromBaseline(const FCOORD& pt) const;
+  // Computes the bounding box of the row.
+  void ComputeBoundingBox();
+
+  // The blobs of the row to which this BaselineRow adds extra information
+  // during baseline fitting. Note that blobs_ could easily come from either
+  // a TO_ROW or a ColPartition.
+  BLOBNBOX_LIST* blobs_;
+  // Bounding box of all the blobs.
+  TBOX bounding_box_;
+  // Fitter used to fit lines to the blobs.
+  DetLineFit fitter_;
+  // 2 points on the straight baseline.
+  FCOORD baseline_pt1_;
+  FCOORD baseline_pt2_;
+  // Set of modes of displacements. They indicate preferable baseline positions.
+  GenericVector<double> displacement_modes_;
+  // Quantization factor used for displacement_modes_.
+  double disp_quant_factor_;
+  // Half the acceptance range of blob displacements for computing the
+  // error during a constrained fit.
+  double fit_halfrange_;
+  // Max baseline error before a line is regarded as fitting badly.
+  double max_baseline_error_;
+  // The error of fit of the baseline.
+  double baseline_error_;
+  // True if this row seems to have a good baseline.
+  bool good_baseline_;
+};
+
+// Class to compute and hold baseline data for a TO_BLOCK.
+class BaselineBlock {
+ public:
+  BaselineBlock(int debug_level, bool non_text, TO_BLOCK* block);
+
+  TO_BLOCK* block() const {
+    return block_;
+  }
+  double skew_angle() const {
+    return skew_angle_;
+  }
+
+  // Computes and returns the absolute error of the given perp_disp from the
+  // given linespacing model.
+  static double SpacingModelError(double perp_disp, double line_spacing,
+                                  double line_offset);
+
+  // Fits straight line baselines and computes the skew angle from the
+  // median angle. Returns true if a good angle is found.
+  // If use_box_bottoms is false, baseline positions are formed by
+  // considering the outlines of the blobs.
+  bool FitBaselinesAndFindSkew(bool use_box_bottoms);
+
+  // Refits the baseline to a constrained angle, using the stored block
+  // skew if good enough, otherwise the supplied default skew.
+  void ParallelizeBaselines(double default_block_skew);
+
+  // Sets the parameters in TO_BLOCK that are needed by subsequent processes.
+  void SetupBlockParameters() const;
+
+  // Processing that is required before fitting baseline splines, but requires
+  // linear baselines in order to be successful:
+  //   Removes noise if required
+  //   Separates out underlines
+  //   Pre-associates blob fragments.
+  // TODO(rays/joeliu) This entire section of code is inherited from the past
+  // and could be improved/eliminated.
+  // page_tr is used to size a debug window.
+  void PrepareForSplineFitting(ICOORD page_tr, bool remove_noise);
+
+  // Fits splines to the textlines, or creates fake QSPLINES from the straight
+  // baselines that are already on the TO_ROWs.
+  // As a side-effect, computes the xheights of the rows and the block.
+  // Although x-height estimation is conceptually separate, it is part of
+  // detecting perspective distortion and therefore baseline fitting.
+  void FitBaselineSplines(bool enable_splines, bool show_final_rows,
+                          Textord* textord);
+
+  // Draws the (straight) baselines and final blobs colored according to
+  // what was discarded as noise and what is associated with each row.
+  void DrawFinalRows(const ICOORD& page_tr);
+
+  // Render the generated spline baselines for this block on pix_in.
+  void DrawPixSpline(Pix* pix_in);
+
+ private:
+  // Top-level line-spacing calculation. Computes an estimate of the line-
+  // spacing, using the current baselines in the TO_ROWS of the block, and
+  // then refines it by fitting a regression line to the baseline positions
+  // as a function of their integer index.
+  // Returns true if it seems that the model is a reasonable fit to the
+  // observations.
+  bool ComputeLineSpacing();
+
+  // Computes the deskewed vertical position of each baseline in the block and
+  // stores them in the given vector.
+  void ComputeBaselinePositions(const FCOORD& direction,
+                                GenericVector<double>* positions);
+
+  // Computes an estimate of the line spacing of the block from the median
+  // of the spacings between adjacent overlapping textlines.
+  void EstimateLineSpacing();
+
+  // Refines the line spacing of the block by fitting a regression
+  // line to the deskewed y-position of each baseline as a function of its
+  // estimated line index, allowing for a small error in the initial linespacing
+  // and choosing the best available model.
+  void RefineLineSpacing(const GenericVector<double>& positions);
+
+  // Given an initial estimate of line spacing (m_in) and the positions of each
+  // baseline, computes the line spacing of the block more accurately in m_out,
+  // and the corresponding intercept in c_out, and the number of spacings seen
+  // in index_delta. Returns the error of fit to the line spacing model.
+  double FitLineSpacingModel(const GenericVector<double>& positions,
+                             double m_in, double* m_out, double* c_out,
+                             int* index_delta);
+
+
+  // The block to which this class adds extra information used during baseline
+  // calculation.
+  TO_BLOCK* block_;
+  // The rows in the block that we will be working with.
+  PointerVector<BaselineRow> rows_;
+  // Amount of debugging output to provide.
+  int debug_level_;
+  // True if the block is non-text (graphic).
+  bool non_text_block_;
+  // True if the block has at least one good enough baseline to compute the
+  // skew angle and therefore skew_angle_ is valid.
+  bool good_skew_angle_;
+  // Angle of skew in radians using the conventional anticlockwise from x-axis.
+  double skew_angle_;
+  // Current best estimate line spacing in pixels perpendicular to skew_angle_.
+  double line_spacing_;
+  // Offset for baseline positions, in pixels. Each baseline is at
+  // line_spacing_ * n + line_offset_ for integer n, which represents
+  // [textline] line number in a line numbering system that has line 0 on or
+  // at least near the x-axis. Not equal to the actual line number of a line
+  // within a block as most blocks are not near the x-axis.
+  double line_offset_;
+  // The error of the line spacing model.
+  double model_error_;
+};
+
+class BaselineDetect {
+ public:
+  BaselineDetect(int debug_level, const FCOORD& page_skew,
+                 TO_BLOCK_LIST* blocks);
+
+  ~BaselineDetect() = default;
+
+  // Finds the initial baselines for each TO_ROW in each TO_BLOCK, gathers
+  // block-wise and page-wise data to smooth small blocks/rows, and applies
+  // smoothing based on block/page-level skew and block-level linespacing.
+  void ComputeStraightBaselines(bool use_box_bottoms);
+
+  // Computes the baseline splines for each TO_ROW in each TO_BLOCK and
+  // other associated side-effects, including pre-associating blobs, computing
+  // x-heights and displaying debug information.
+  // NOTE that ComputeStraightBaselines must have been called first as this
+  // sets up data in the TO_ROWs upon which this function depends.
+  void ComputeBaselineSplinesAndXheights(const ICOORD& page_tr,
+                                         bool enable_splines,
+                                         bool remove_noise,
+                                         bool show_final_rows,
+                                         Textord* textord);
+
+ private:
+  // Average (median) skew of the blocks on the page among those that have
+  // a good angle of their own.
+  FCOORD page_skew_;
+  // Amount of debug output to produce.
+  int debug_level_;
+  // The blocks that we are working with.
+  PointerVector<BaselineBlock> blocks_;
+};
+
+}  // namespace tesseract
+
+#endif  // TESSERACT_TEXTORD_BASELINEDETECT_H_
diff --git a/tesseract/src/textord/bbgrid.cpp b/tesseract/src/textord/bbgrid.cpp
new file mode 100644
index 00000000..6e3e3346
--- /dev/null
+++ b/tesseract/src/textord/bbgrid.cpp
@@ -0,0 +1,285 @@
+///////////////////////////////////////////////////////////////////////
+// File:        bbgrid.cpp
+// Description: Class to hold BLOBNBOXs in a grid for fast access
+//              to neighbours.
+// Author:      Ray Smith
+//
+// (C) Copyright 2007, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "bbgrid.h"
+#include "helpers.h"
+#include "ocrblock.h"
+
+namespace tesseract {
+
+///////////////////////////////////////////////////////////////////////
+// BBGrid IMPLEMENTATION.
+///////////////////////////////////////////////////////////////////////
+GridBase::GridBase(int gridsize, const ICOORD& bleft, const ICOORD& tright) {
+  Init(gridsize, bleft, tright);
+}
+
+// Destructor.
+// It is defined here, so the compiler can create a single vtable
+// instead of weak vtables in every compilation unit.
+GridBase::~GridBase() = default;
+
+// (Re)Initialize the grid. The gridsize is the size in pixels of each cell,
+// and bleft, tright are the bounding box of everything to go in it.
+void GridBase::Init(int gridsize, const ICOORD& bleft, const ICOORD& tright) {
+  gridsize_ = gridsize;
+  bleft_ = bleft;
+  tright_ = tright;
+  if (gridsize_ == 0)
+    gridsize_ = 1;
+  gridwidth_ = (tright.x() - bleft.x() + gridsize_ - 1) / gridsize_;
+  gridheight_ = (tright.y() - bleft.y() + gridsize_ - 1) / gridsize_;
+  gridbuckets_ = gridwidth_ * gridheight_;
+}
+
+// Compute the given grid coordinates from image coords.
+void GridBase::GridCoords(int x, int y, int* grid_x, int* grid_y) const {
+  *grid_x = (x - bleft_.x()) / gridsize_;
+  *grid_y = (y - bleft_.y()) / gridsize_;
+  ClipGridCoords(grid_x, grid_y);
+}
+
+// Clip the given grid coordinates to fit within the grid.
+void GridBase::ClipGridCoords(int* x, int* y) const {
+  *x = ClipToRange(*x, 0, gridwidth_ - 1);
+  *y = ClipToRange(*y, 0, gridheight_ - 1);
+}
+
+IntGrid::IntGrid() {
+  grid_ = nullptr;
+}
+
+IntGrid::IntGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright)
+  : grid_(nullptr) {
+  Init(gridsize, bleft, tright);
+}
+
+IntGrid::~IntGrid() {
+  delete [] grid_;
+}
+
+// (Re)Initialize the grid. The gridsize is the size in pixels of each cell,
+// and bleft, tright are the bounding box of everything to go in it.
+void IntGrid::Init(int gridsize, const ICOORD& bleft, const ICOORD& tright) {
+  GridBase::Init(gridsize, bleft, tright);
+  delete [] grid_;
+  grid_ = new int[gridbuckets_];
+  Clear();
+}
+
+// Clear all the ints in the grid to zero.
+void IntGrid::Clear() {
+  for (int i = 0; i < gridbuckets_; ++i) {
+    grid_[i] = 0;
+  }
+}
+
+// Rotate the grid by rotation, keeping cell contents.
+// rotation must be a multiple of 90 degrees.
+// NOTE: due to partial cells, cell coverage in the rotated grid will be
+// inexact. This is why there is no Rotate for the generic BBGrid.
+// TODO(rays) investigate fixing this inaccuracy by moving the origin after
+// rotation.
+void IntGrid::Rotate(const FCOORD& rotation) {
+  ASSERT_HOST(rotation.x() == 0.0f || rotation.y() == 0.0f);
+  ICOORD old_bleft(bleft());
+  //ICOORD old_tright(tright());
+  int old_width = gridwidth();
+  int old_height = gridheight();
+  TBOX box(bleft(), tright());
+  box.rotate(rotation);
+  int* old_grid = grid_;
+  grid_ = nullptr;
+  Init(gridsize(), box.botleft(), box.topright());
+  // Iterate over the old grid, copying data to the rotated position in the new.
+  int oldi = 0;
+  FCOORD x_step(rotation);
+  x_step *= gridsize();
+  for (int oldy = 0; oldy < old_height; ++oldy) {
+    FCOORD line_pos(old_bleft.x(), old_bleft.y() + gridsize() * oldy);
+    line_pos.rotate(rotation);
+    for (int oldx = 0; oldx < old_width; ++oldx, line_pos += x_step, ++oldi) {
+      int grid_x, grid_y;
+      GridCoords(static_cast<int>(line_pos.x() + 0.5),
+                 static_cast<int>(line_pos.y() + 0.5),
+                 &grid_x, &grid_y);
+      grid_[grid_y * gridwidth() + grid_x] = old_grid[oldi];
+    }
+  }
+  delete [] old_grid;
+}
+
+// Returns a new IntGrid containing values equal to the sum of all the
+// neighbouring cells. The returned grid must be deleted after use.
+// For ease of implementation, edge cells are double counted, to make them
+// have the same range as the non-edge cells.
+IntGrid* IntGrid::NeighbourhoodSum() const {
+  auto* sumgrid = new IntGrid(gridsize(), bleft(), tright());
+  for (int y = 0; y < gridheight(); ++y) {
+    for (int x = 0; x < gridwidth(); ++x) {
+      int cell_count = 0;
+      for (int yoffset = -1; yoffset <= 1; ++yoffset) {
+        for (int xoffset = -1; xoffset <= 1; ++xoffset) {
+          int grid_x = x + xoffset;
+          int grid_y = y + yoffset;
+          ClipGridCoords(&grid_x, &grid_y);
+          cell_count += GridCellValue(grid_x, grid_y);
+        }
+      }
+      if (GridCellValue(x, y) > 1)
+        sumgrid->SetGridCell(x, y, cell_count);
+    }
+  }
+  return sumgrid;
+}
+
+// Returns true if more than half the area of the rect is covered by grid
+// cells that are over the threshold.
+bool IntGrid::RectMostlyOverThreshold(const TBOX& rect, int threshold) const {
+  int min_x, min_y, max_x, max_y;
+  GridCoords(rect.left(), rect.bottom(), &min_x, &min_y);
+  GridCoords(rect.right(), rect.top(), &max_x, &max_y);
+  int total_area = 0;
+  for (int y = min_y; y <= max_y; ++y) {
+    for (int x = min_x; x <= max_x; ++x) {
+      int value = GridCellValue(x, y);
+      if (value > threshold) {
+        TBOX cell_box(x * gridsize_, y * gridsize_,
+                      (x + 1) * gridsize_, (y + 1) * gridsize_);
+        cell_box &= rect;  // This is in-place box intersection.
+        total_area += cell_box.area();
+      }
+    }
+  }
+  return total_area * 2 > rect.area();
+}
+
+// Returns true if any cell value in the given rectangle is zero.
+bool IntGrid::AnyZeroInRect(const TBOX& rect) const {
+  int min_x, min_y, max_x, max_y;
+  GridCoords(rect.left(), rect.bottom(), &min_x, &min_y);
+  GridCoords(rect.right(), rect.top(), &max_x, &max_y);
+  for (int y = min_y; y <= max_y; ++y) {
+    for (int x = min_x; x <= max_x; ++x) {
+      if (GridCellValue(x, y) == 0)
+        return true;
+    }
+  }
+  return false;
+}
+
+// Returns a full-resolution binary pix in which each cell over the given
+// threshold is filled as a black square. pixDestroy after use.
+// Edge cells, which have a zero 4-neighbour, are not marked.
+Pix* IntGrid::ThresholdToPix(int threshold) const {
+  Pix* pix = pixCreate(tright().x() - bleft().x(),
+                       tright().y() - bleft().y(), 1);
+  int cellsize = gridsize();
+  for (int y = 0; y < gridheight(); ++y) {
+    for (int x = 0; x < gridwidth(); ++x) {
+      if (GridCellValue(x, y) > threshold &&
+          GridCellValue(x - 1, y) > 0 && GridCellValue(x + 1, y) > 0 &&
+              GridCellValue(x, y - 1) > 0 && GridCellValue(x, y + 1) > 0) {
+        pixRasterop(pix, x * cellsize, tright().y() - ((y + 1) * cellsize),
+                    cellsize, cellsize, PIX_SET, nullptr, 0, 0);
+      }
+    }
+  }
+  return pix;
+}
+
+// Make a Pix of the correct scaled size for the TraceOutline functions.
+static Pix* GridReducedPix(const TBOX& box, int gridsize,
+                           ICOORD bleft, int* left, int* bottom) {
+  // Compute grid bounds of the outline and pad all round by 1.
+  int grid_left = (box.left() - bleft.x()) / gridsize - 1;
+  int grid_bottom = (box.bottom() - bleft.y()) / gridsize - 1;
+  int grid_right = (box.right() - bleft.x()) / gridsize + 1;
+  int grid_top = (box.top() - bleft.y()) / gridsize + 1;
+  *left = grid_left;
+  *bottom = grid_bottom;
+  return pixCreate(grid_right - grid_left + 1,
+                   grid_top - grid_bottom + 1,
+                   1);
+}
+
+// Helper function to return a scaled Pix with one pixel per grid cell,
+// set (black) where the given outline enters the corresponding grid cell,
+// and clear where the outline does not touch the grid cell.
+// Also returns the grid coords of the bottom-left of the Pix, in *left
+// and *bottom, which corresponds to (0, 0) on the Pix.
+// Note that the Pix is used upside-down, with (0, 0) being the bottom-left.
+Pix* TraceOutlineOnReducedPix(C_OUTLINE* outline, int gridsize,
+                              ICOORD bleft, int* left, int* bottom) {
+  const TBOX& box = outline->bounding_box();
+  Pix* pix = GridReducedPix(box, gridsize, bleft, left, bottom);
+  int wpl = pixGetWpl(pix);
+  l_uint32* data = pixGetData(pix);
+  int length = outline->pathlength();
+  ICOORD pos = outline->start_pos();
+  for (int i = 0; i < length; ++i) {
+    int grid_x = (pos.x() - bleft.x()) / gridsize - *left;
+    int grid_y = (pos.y() - bleft.y()) / gridsize - *bottom;
+    SET_DATA_BIT(data + grid_y * wpl, grid_x);
+    pos += outline->step(i);
+  }
+  return pix;
+}
+#if 0  // Example code shows how to use TraceOutlineOnReducedPix.
+  C_OUTLINE_IT ol_it(blob->cblob()->out_list());
+  int grid_left, grid_bottom;
+  Pix* pix = TraceOutlineOnReducedPix(ol_it.data(), gridsize_, bleft_,
+                                      &grid_left, &grid_bottom);
+  grid->InsertPixPtBBox(grid_left, grid_bottom, pix, blob);
+  pixDestroy(&pix);
+#endif
+
+// As TraceOutlineOnReducedPix above, but on a BLOCK instead of a C_OUTLINE.
+Pix* TraceBlockOnReducedPix(BLOCK* block, int gridsize,
+                            ICOORD bleft, int* left, int* bottom) {
+  const TBOX& box = block->pdblk.bounding_box();
+  Pix* pix = GridReducedPix(box, gridsize, bleft, left, bottom);
+  int wpl = pixGetWpl(pix);
+  l_uint32* data = pixGetData(pix);
+  ICOORDELT_IT it(block->pdblk.poly_block()->points());
+  for (it.mark_cycle_pt(); !it.cycled_list();) {
+    ICOORD pos = *it.data();
+    it.forward();
+    ICOORD next_pos = *it.data();
+    ICOORD line_vector = next_pos - pos;
+    int major, minor;
+    ICOORD major_step, minor_step;
+    line_vector.setup_render(&major_step, &minor_step, &major, &minor);
+    int accumulator = major / 2;
+    while (pos != next_pos) {
+      int grid_x = (pos.x() - bleft.x()) / gridsize - *left;
+      int grid_y = (pos.y() - bleft.y()) / gridsize - *bottom;
+      SET_DATA_BIT(data + grid_y * wpl, grid_x);
+      pos += major_step;
+      accumulator += minor;
+      if (accumulator >= major) {
+        accumulator -= major;
+        pos += minor_step;
+      }
+    }
+  }
+  return pix;
+}
+
+}  // namespace tesseract.
diff --git a/tesseract/src/textord/bbgrid.h b/tesseract/src/textord/bbgrid.h
new file mode 100644
index 00000000..5d75aa38
--- /dev/null
+++ b/tesseract/src/textord/bbgrid.h
@@ -0,0 +1,957 @@
+///////////////////////////////////////////////////////////////////////
+// File:        bbgrid.h
+// Description: Class to hold BLOBNBOXs in a grid for fast access
+//              to neighbours.
+// Author:      Ray Smith
+//
+// (C) Copyright 2007, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TEXTORD_BBGRID_H_
+#define TESSERACT_TEXTORD_BBGRID_H_
+
+#include <unordered_set>
+
+#include "clst.h"
+#include "coutln.h"
+#include "rect.h"
+#include "scrollview.h"
+
+#include "allheaders.h"
+
+class BLOCK;
+
+namespace tesseract {
+
+// Helper function to return a scaled Pix with one pixel per grid cell,
+// set (black) where the given outline enters the corresponding grid cell,
+// and clear where the outline does not touch the grid cell.
+// Also returns the grid coords of the bottom-left of the Pix, in *left
+// and *bottom, which corresponds to (0, 0) on the Pix.
+// Note that the Pix is used upside-down, with (0, 0) being the bottom-left.
+Pix* TraceOutlineOnReducedPix(C_OUTLINE* outline, int gridsize,
+                              ICOORD bleft, int* left, int* bottom);
+// As TraceOutlineOnReducedPix above, but on a BLOCK instead of a C_OUTLINE.
+Pix* TraceBlockOnReducedPix(BLOCK* block, int gridsize,
+                            ICOORD bleft, int* left, int* bottom);
+
+template<class BBC, class BBC_CLIST, class BBC_C_IT> class GridSearch;
+
+// The GridBase class is the base class for BBGrid and IntGrid.
+// It holds the geometry and scale of the grid.
+class TESS_API GridBase {
+ public:
+  GridBase() = default;
+  GridBase(int gridsize, const ICOORD& bleft, const ICOORD& tright);
+  virtual ~GridBase();
+
+  // (Re)Initialize the grid. The gridsize is the size in pixels of each cell,
+  // and bleft, tright are the bounding box of everything to go in it.
+  void Init(int gridsize, const ICOORD& bleft, const ICOORD& tright);
+
+  // Simple accessors.
+  int gridsize() const {
+    return gridsize_;
+  }
+  int gridwidth() const {
+    return gridwidth_;
+  }
+  int gridheight() const {
+    return gridheight_;
+  }
+  const ICOORD& bleft() const {
+    return bleft_;
+  }
+  const ICOORD& tright() const {
+    return tright_;
+  }
+  // Compute the given grid coordinates from image coords.
+  void GridCoords(int x, int y, int* grid_x, int* grid_y) const;
+
+  // Clip the given grid coordinates to fit within the grid.
+  void ClipGridCoords(int* x, int* y) const;
+
+ protected:
+  // TODO(rays) Make these private and migrate to the accessors in subclasses.
+  int gridsize_;     // Pixel size of each grid cell.
+  int gridwidth_;    // Size of the grid in cells.
+  int gridheight_;
+  int gridbuckets_;  // Total cells in grid.
+  ICOORD bleft_;     // Pixel coords of bottom-left of grid.
+  ICOORD tright_;    // Pixel coords of top-right of grid.
+
+ private:
+};
+
+// The IntGrid maintains a single int for each cell in a grid.
+class IntGrid : public GridBase {
+ public:
+  IntGrid();
+  IntGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright);
+  ~IntGrid() override;
+
+  // (Re)Initialize the grid. The gridsize is the size in pixels of each cell,
+  // and bleft, tright are the bounding box of everything to go in it.
+  void Init(int gridsize, const ICOORD& bleft, const ICOORD& tright);
+
+  // Clear all the ints in the grid to zero.
+  void Clear();
+
+  // Rotate the grid by rotation, keeping cell contents.
+  // rotation must be a multiple of 90 degrees.
+  // NOTE: due to partial cells, cell coverage in the rotated grid will be
+  // inexact. This is why there is no Rotate for the generic BBGrid.
+  void Rotate(const FCOORD& rotation);
+
+  // Returns a new IntGrid containing values equal to the sum of all the
+  // neighbouring cells. The returned grid must be deleted after use.
+  IntGrid* NeighbourhoodSum() const;
+
+  int GridCellValue(int grid_x, int grid_y) const {
+    ClipGridCoords(&grid_x, &grid_y);
+    return grid_[grid_y * gridwidth_ + grid_x];
+  }
+  void SetGridCell(int grid_x, int grid_y, int value) {
+    ASSERT_HOST(grid_x >= 0 && grid_x < gridwidth());
+    ASSERT_HOST(grid_y >= 0 && grid_y < gridheight());
+    grid_[grid_y * gridwidth_ + grid_x] = value;
+  }
+  // Returns true if more than half the area of the rect is covered by grid
+  // cells that are over the threshold.
+  bool RectMostlyOverThreshold(const TBOX& rect, int threshold) const;
+
+  // Returns true if any cell value in the given rectangle is zero.
+  bool AnyZeroInRect(const TBOX& rect) const;
+
+  // Returns a full-resolution binary pix in which each cell over the given
+  // threshold is filled as a black square. pixDestroy after use.
+  Pix* ThresholdToPix(int threshold) const;
+
+ private:
+  int* grid_;  // 2-d array of ints.
+};
+
+// The BBGrid class holds C_LISTs of template classes BBC (bounding box class)
+// in a grid for fast neighbour access.
+// The BBC class must have a member const TBOX& bounding_box() const.
+// The BBC class must have been CLISTIZEH'ed elsewhere to make the
+// list class BBC_CLIST and the iterator BBC_C_IT.
+// Use of C_LISTs enables BBCs to exist in multiple cells simultaneously.
+// As a consequence, ownership of BBCs is assumed to be elsewhere and
+// persistent for at least the life of the BBGrid, or at least until Clear is
+// called which removes all references to inserted objects without actually
+// deleting them.
+// Most uses derive a class from a specific instantiation of BBGrid,
+// thereby making most of the ugly template notation go away.
+// The friend class GridSearch, with the same template arguments, is
+// used to search a grid efficiently in one of several search patterns.
+template<class BBC, class BBC_CLIST, class BBC_C_IT> class BBGrid
+  : public GridBase {
+  friend class GridSearch<BBC, BBC_CLIST, BBC_C_IT>;
+ public:
+  BBGrid();
+  BBGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright);
+  ~BBGrid() override;
+
+  // (Re)Initialize the grid. The gridsize is the size in pixels of each cell,
+  // and bleft, tright are the bounding box of everything to go in it.
+  void Init(int gridsize, const ICOORD& bleft, const ICOORD& tright);
+
+  // Empty all the lists but leave the grid itself intact.
+  void Clear();
+  // Deallocate the data in the lists but otherwise leave the lists and the grid
+  // intact.
+  void ClearGridData(void (*free_method)(BBC*));
+
+  // Insert a bbox into the appropriate place in the grid.
+  // If h_spread, then all cells covered horizontally by the box are
+  // used, otherwise, just the bottom-left. Similarly for v_spread.
+  // WARNING: InsertBBox may invalidate an active GridSearch. Call
+  // RepositionIterator() on any GridSearches that are active on this grid.
+  void InsertBBox(bool h_spread, bool v_spread, BBC* bbox);
+
+  // Using a pix from TraceOutlineOnReducedPix or TraceBlockOnReducedPix, in
+  // which each pixel corresponds to a grid cell, insert a bbox into every
+  // place in the grid where the corresponding pixel is 1. The Pix is handled
+  // upside-down to match the Tesseract coordinate system. (As created by
+  // TraceOutlineOnReducedPix or TraceBlockOnReducedPix.)
+  // (0, 0) in the pix corresponds to (left, bottom) in the
+  // grid (in grid coords), and the pix works up the grid from there.
+  // WARNING: InsertPixPtBBox may invalidate an active GridSearch. Call
+  // RepositionIterator() on any GridSearches that are active on this grid.
+  void InsertPixPtBBox(int left, int bottom, Pix* pix, BBC* bbox);
+
+  // Remove the bbox from the grid.
+  // WARNING: Any GridSearch operating on this grid could be invalidated!
+  // If a GridSearch is operating, call GridSearch::RemoveBBox() instead.
+  void RemoveBBox(BBC* bbox);
+
+  // Returns true if the given rectangle has no overlapping elements.
+  bool RectangleEmpty(const TBOX& rect);
+
+  // Returns an IntGrid showing the number of elements in each cell.
+  // Returned IntGrid must be deleted after use.
+  IntGrid* CountCellElements();
+
+  // Make a window of an appropriate size to display things in the grid.
+  ScrollView* MakeWindow(int x, int y, const char* window_name);
+
+  // Display the bounding boxes of the BLOBNBOXes in this grid.
+  // Use of this function requires an additional member of the BBC class:
+  // ScrollView::Color BBC::BoxColor() const.
+  void DisplayBoxes(ScrollView* window);
+
+  // ASSERT_HOST that every cell contains no more than one copy of each entry.
+  void AssertNoDuplicates();
+
+  // Handle a click event in a display window.
+  virtual void HandleClick(int x, int y);
+
+ protected:
+  BBC_CLIST* grid_;  // 2-d array of CLISTS of BBC elements.
+
+ private:
+};
+
+// Hash functor for generic pointers.
+template<typename T> struct PtrHash {
+  size_t operator()(const T* ptr) const {
+    return reinterpret_cast<uintptr_t>(ptr) / sizeof(T);
+  }
+};
+
+
+// The GridSearch class enables neighbourhood searching on a BBGrid.
+template<class BBC, class BBC_CLIST, class BBC_C_IT> class GridSearch {
+ public:
+  GridSearch(BBGrid<BBC, BBC_CLIST, BBC_C_IT>* grid)
+      : grid_(grid) {
+  }
+
+  // Get the grid x, y coords of the most recently returned BBC.
+  int GridX() const {
+    return x_;
+  }
+  int GridY() const {
+    return y_;
+  }
+
+  // Sets the search mode to return a box only once.
+  // Efficiency warning: Implementation currently uses a squared-order
+  // search in the number of returned elements. Use only where a small
+  // number of elements are spread over a wide area, eg ColPartitions.
+  void SetUniqueMode(bool mode) {
+    unique_mode_ = mode;
+  }
+  // TODO(rays) Replace calls to ReturnedSeedElement with SetUniqueMode.
+  // It only works if the search includes the bottom-left corner.
+  // Apart from full search, all other searches return a box several
+  // times if the box is inserted with h_spread or v_spread.
+  // This method will return true for only one occurrence of each box
+  // that was inserted with both h_spread and v_spread as true.
+  // It will usually return false for boxes that were not inserted with
+  // both h_spread=true and v_spread=true
+  bool ReturnedSeedElement() const {
+    TBOX box = previous_return_->bounding_box();
+    int x_center = (box.left()+box.right())/2;
+    int y_center = (box.top()+box.bottom())/2;
+    int grid_x, grid_y;
+    grid_->GridCoords(x_center, y_center, &grid_x, &grid_y);
+    return (x_ == grid_x) && (y_ == grid_y);
+  }
+
+  // Various searching iterations... Note that these iterations
+  // all share data members, so you can't run more than one iteration
+  // in parallel in a single GridSearch instance, but multiple instances
+  // can search the same BBGrid in parallel.
+  // Note that all the searches can return blobs that may not exactly
+  // match the search conditions, since they return everything in the
+  // covered grid cells. It is up to the caller to check for
+  // appropriateness.
+  // TODO(rays) NextRectSearch only returns valid elements. Make the other
+  // searches test before return also and remove the tests from code
+  // that uses GridSearch.
+
+  // Start a new full search. Will iterate all stored blobs, from the top.
+  // If the blobs have been inserted using InsertBBox, (not InsertPixPtBBox)
+  // then the full search guarantees to return each blob in the grid once.
+  // Other searches may return a blob more than once if they have been
+  // inserted using h_spread or v_spread.
+  void StartFullSearch();
+  // Return the next bbox in the search or nullptr if done.
+  BBC* NextFullSearch();
+
+  // Start a new radius search. Will search in a spiral up to a
+  // given maximum radius in grid cells from the given center in pixels.
+  void StartRadSearch(int x, int y, int max_radius);
+  // Return the next bbox in the radius search or nullptr if the
+  // maximum radius has been reached.
+  BBC* NextRadSearch();
+
+  // Start a new left or right-looking search. Will search to the side
+  // for a box that vertically overlaps the given vertical line segment.
+  // CAVEAT: This search returns all blobs from the cells to the side
+  // of the start, and somewhat below, since there is no guarantee
+  // that there may not be a taller object in a lower cell. The
+  // blobs returned will include all those that vertically overlap and
+  // are no more than twice as high, but may also include some that do
+  // not overlap and some that are more than twice as high.
+  void StartSideSearch(int x, int ymin, int ymax);
+  // Return the next bbox in the side search or nullptr if the
+  // edge has been reached. Searches left to right or right to left
+  // according to the flag.
+  BBC* NextSideSearch(bool right_to_left);
+
+  // Start a vertical-looking search. Will search up or down
+  // for a box that horizontally overlaps the given line segment.
+  void StartVerticalSearch(int xmin, int xmax, int y);
+  // Return the next bbox in the vertical search or nullptr if the
+  // edge has been reached. Searches top to bottom or bottom to top
+  // according to the flag.
+  BBC* NextVerticalSearch(bool top_to_bottom);
+
+  // Start a rectangular search. Will search for a box that overlaps the
+  // given rectangle.
+  void StartRectSearch(const TBOX& rect);
+  // Return the next bbox in the rectangular search or nullptr if complete.
+  BBC* NextRectSearch();
+
+  // Remove the last returned BBC. Will not invalidate this. May invalidate
+  // any other concurrent GridSearch on the same grid. If any others are
+  // in use, call RepositionIterator on those, to continue without harm.
+  void RemoveBBox();
+  void RepositionIterator();
+
+ private:
+  // Factored out helper to start a search.
+  void CommonStart(int x, int y);
+  // Factored out helper to complete a next search.
+  BBC* CommonNext();
+  // Factored out final return when search is exhausted.
+  BBC* CommonEnd();
+  // Factored out function to set the iterator to the current x_, y_
+  // grid coords and mark the cycle pt.
+  void SetIterator();
+
+ private:
+  // The grid we are searching.
+  BBGrid<BBC, BBC_CLIST, BBC_C_IT>* grid_ = nullptr;
+  // For executing a search. The different search algorithms use these in
+  // different ways, but most use x_origin_ and y_origin_ as the start position.
+  int x_origin_ = 0;
+  int y_origin_ = 0;
+  int max_radius_ = 0;
+  int radius_ = 0;
+  int rad_index_ = 0;
+  int rad_dir_ = 0;
+  TBOX rect_;
+  int x_ = 0; // The current location in grid coords, of the current search.
+  int y_ = 0;
+  bool unique_mode_ = false;
+  BBC* previous_return_ = nullptr; // Previous return from Next*.
+  BBC* next_return_ = nullptr; // Current value of it_.data() used for repositioning.
+  // An iterator over the list at (x_, y_) in the grid_.
+  BBC_C_IT it_;
+  // Set of unique returned elements used when unique_mode_ is true.
+  std::unordered_set<BBC*, PtrHash<BBC> > returns_;
+};
+
+// Sort function to sort a BBC by bounding_box().left().
+template<class BBC>
+int SortByBoxLeft(const void* void1, const void* void2) {
+  // The void*s are actually doubly indirected, so get rid of one level.
+  const BBC* p1 = *static_cast<const BBC* const*>(void1);
+  const BBC* p2 = *static_cast<const BBC* const*>(void2);
+  int result = p1->bounding_box().left() - p2->bounding_box().left();
+  if (result != 0)
+    return result;
+  result = p1->bounding_box().right() - p2->bounding_box().right();
+  if (result != 0)
+    return result;
+  result = p1->bounding_box().bottom() - p2->bounding_box().bottom();
+  if (result != 0)
+    return result;
+  return p1->bounding_box().top() - p2->bounding_box().top();
+}
+
+// Sort function to sort a BBC by bounding_box().right() in right-to-left order.
+template<class BBC>
+int SortRightToLeft(const void* void1, const void* void2) {
+  // The void*s are actually doubly indirected, so get rid of one level.
+  const BBC* p1 = *static_cast<const BBC* const*>(void1);
+  const BBC* p2 = *static_cast<const BBC* const*>(void2);
+  int result = p2->bounding_box().right() - p1->bounding_box().right();
+  if (result != 0)
+    return result;
+  result = p2->bounding_box().left() - p1->bounding_box().left();
+  if (result != 0)
+    return result;
+  result = p1->bounding_box().bottom() - p2->bounding_box().bottom();
+  if (result != 0)
+    return result;
+  return p1->bounding_box().top() - p2->bounding_box().top();
+}
+
+// Sort function to sort a BBC by bounding_box().bottom().
+template<class BBC>
+int SortByBoxBottom(const void* void1, const void* void2) {
+  // The void*s are actually doubly indirected, so get rid of one level.
+  const BBC* p1 = *static_cast<const BBC* const*>(void1);
+  const BBC* p2 = *static_cast<const BBC* const*>(void2);
+  int result = p1->bounding_box().bottom() - p2->bounding_box().bottom();
+  if (result != 0)
+    return result;
+  result =  p1->bounding_box().top() - p2->bounding_box().top();
+  if (result != 0)
+    return result;
+  result = p1->bounding_box().left() - p2->bounding_box().left();
+  if (result != 0)
+    return result;
+  return p1->bounding_box().right() - p2->bounding_box().right();
+}
+
+///////////////////////////////////////////////////////////////////////
+// BBGrid IMPLEMENTATION.
+///////////////////////////////////////////////////////////////////////
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+BBGrid<BBC, BBC_CLIST, BBC_C_IT>::BBGrid() : grid_(nullptr) {
+}
+
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+BBGrid<BBC, BBC_CLIST, BBC_C_IT>::BBGrid(
+  int gridsize, const ICOORD& bleft, const ICOORD& tright)
+    : grid_(nullptr) {
+  Init(gridsize, bleft, tright);
+}
+
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+BBGrid<BBC, BBC_CLIST, BBC_C_IT>::~BBGrid() {
+  delete [] grid_;
+}
+
+// (Re)Initialize the grid. The gridsize is the size in pixels of each cell,
+// and bleft, tright are the bounding box of everything to go in it.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+void BBGrid<BBC, BBC_CLIST, BBC_C_IT>::Init(int gridsize,
+                                            const ICOORD& bleft,
+                                            const ICOORD& tright) {
+  GridBase::Init(gridsize, bleft, tright);
+  delete [] grid_;
+  grid_ = new BBC_CLIST[gridbuckets_];
+}
+
+// Clear all lists, but leave the array of lists present.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+void BBGrid<BBC, BBC_CLIST, BBC_C_IT>::Clear() {
+  for (int i = 0; i < gridbuckets_; ++i) {
+    grid_[i].shallow_clear();
+  }
+}
+
+// Deallocate the data in the lists but otherwise leave the lists and the grid
+// intact.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+void BBGrid<BBC, BBC_CLIST, BBC_C_IT>::ClearGridData(
+    void (*free_method)(BBC*)) {
+  if (grid_ == nullptr) return;
+  GridSearch<BBC, BBC_CLIST, BBC_C_IT> search(this);
+  search.StartFullSearch();
+  BBC* bb;
+  BBC_CLIST bb_list;
+  BBC_C_IT it(&bb_list);
+  while ((bb = search.NextFullSearch()) != nullptr) {
+    it.add_after_then_move(bb);
+  }
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    free_method(it.data());
+  }
+}
+
+// Insert a bbox into the appropriate place in the grid.
+// If h_spread, then all cells covered horizontally by the box are
+// used, otherwise, just the bottom-left. Similarly for v_spread.
+// WARNING: InsertBBox may invalidate an active GridSearch. Call
+// RepositionIterator() on any GridSearches that are active on this grid.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+void BBGrid<BBC, BBC_CLIST, BBC_C_IT>::InsertBBox(bool h_spread, bool v_spread,
+                                                  BBC* bbox) {
+  TBOX box = bbox->bounding_box();
+  int start_x, start_y, end_x, end_y;
+  GridCoords(box.left(), box.bottom(), &start_x, &start_y);
+  GridCoords(box.right(), box.top(), &end_x, &end_y);
+  if (!h_spread)
+    end_x = start_x;
+  if (!v_spread)
+    end_y = start_y;
+  int grid_index = start_y * gridwidth_;
+  for (int y = start_y; y <= end_y; ++y, grid_index += gridwidth_) {
+    for (int x = start_x; x <= end_x; ++x) {
+      grid_[grid_index + x].add_sorted(SortByBoxLeft<BBC>, true, bbox);
+    }
+  }
+}
+
+// Using a pix from TraceOutlineOnReducedPix or TraceBlockOnReducedPix, in
+// which each pixel corresponds to a grid cell, insert a bbox into every
+// place in the grid where the corresponding pixel is 1. The Pix is handled
+// upside-down to match the Tesseract coordinate system. (As created by
+// TraceOutlineOnReducedPix or TraceBlockOnReducedPix.)
+// (0, 0) in the pix corresponds to (left, bottom) in the
+// grid (in grid coords), and the pix works up the grid from there.
+// WARNING: InsertPixPtBBox may invalidate an active GridSearch. Call
+// RepositionIterator() on any GridSearches that are active on this grid.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+void BBGrid<BBC, BBC_CLIST, BBC_C_IT>::InsertPixPtBBox(int left, int bottom,
+                                                       Pix* pix, BBC* bbox) {
+  int width = pixGetWidth(pix);
+  int height = pixGetHeight(pix);
+  for (int y = 0; y < height; ++y) {
+    l_uint32* data = pixGetData(pix) + y * pixGetWpl(pix);
+    for (int x = 0; x < width; ++x) {
+      if (GET_DATA_BIT(data, x)) {
+        grid_[(bottom + y) * gridwidth_ + x + left].
+          add_sorted(SortByBoxLeft<BBC>, true, bbox);
+      }
+    }
+  }
+}
+
+// Remove the bbox from the grid.
+// WARNING: Any GridSearch operating on this grid could be invalidated!
+// If a GridSearch is operating, call GridSearch::RemoveBBox() instead.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+void BBGrid<BBC, BBC_CLIST, BBC_C_IT>::RemoveBBox(BBC* bbox) {
+  TBOX box = bbox->bounding_box();
+  int start_x, start_y, end_x, end_y;
+  GridCoords(box.left(), box.bottom(), &start_x, &start_y);
+  GridCoords(box.right(), box.top(), &end_x, &end_y);
+  int grid_index = start_y * gridwidth_;
+  for (int y = start_y; y <= end_y; ++y, grid_index += gridwidth_) {
+    for (int x = start_x; x <= end_x; ++x) {
+      BBC_C_IT it(&grid_[grid_index + x]);
+      for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+        if (it.data() == bbox)
+          it.extract();
+      }
+    }
+  }
+}
+
+// Returns true if the given rectangle has no overlapping elements.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+bool BBGrid<BBC, BBC_CLIST, BBC_C_IT>::RectangleEmpty(const TBOX& rect) {
+  GridSearch<BBC, BBC_CLIST, BBC_C_IT> rsearch(this);
+  rsearch.StartRectSearch(rect);
+  return rsearch.NextRectSearch() == nullptr;
+}
+
+// Returns an IntGrid showing the number of elements in each cell.
+// Returned IntGrid must be deleted after use.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+IntGrid* BBGrid<BBC, BBC_CLIST, BBC_C_IT>::CountCellElements() {
+  auto* intgrid = new IntGrid(gridsize(), bleft(), tright());
+  for (int y = 0; y < gridheight(); ++y) {
+    for (int x = 0; x < gridwidth(); ++x) {
+      int cell_count = grid_[y * gridwidth() + x].length();
+      intgrid->SetGridCell(x, y, cell_count);
+    }
+  }
+  return intgrid;
+}
+
+
+template<class G> class TabEventHandler : public SVEventHandler {
+ public:
+  explicit TabEventHandler(G* grid) : grid_(grid) {
+  }
+  void Notify(const SVEvent* sv_event) override {
+    if (sv_event->type == SVET_CLICK) {
+      grid_->HandleClick(sv_event->x, sv_event->y);
+    }
+  }
+ private:
+  G* grid_;
+};
+
+#ifndef GRAPHICS_DISABLED
+
+// Make a window of an appropriate size to display things in the grid.
+// Position the window at the given x,y.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+ScrollView* BBGrid<BBC, BBC_CLIST, BBC_C_IT>::MakeWindow(
+    int x, int y, const char* window_name) {
+  auto tab_win = new ScrollView(window_name, x, y,
+                                tright_.x() - bleft_.x(),
+                                tright_.y() - bleft_.y(),
+                                tright_.x() - bleft_.x(),
+                                tright_.y() - bleft_.y(),
+                                true);
+  auto* handler =
+    new TabEventHandler<BBGrid<BBC, BBC_CLIST, BBC_C_IT> >(this);
+  tab_win->AddEventHandler(handler);
+  tab_win->Pen(ScrollView::GREY);
+  tab_win->Rectangle(0, 0, tright_.x() - bleft_.x(), tright_.y() - bleft_.y());
+  return tab_win;
+}
+
+// Create a window at (x,y) and display the bounding boxes of the
+// BLOBNBOXes in this grid.
+// Use of this function requires an additional member of the BBC class:
+// ScrollView::Color BBC::BoxColor() const.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+void BBGrid<BBC, BBC_CLIST, BBC_C_IT>::DisplayBoxes(ScrollView* tab_win) {
+  tab_win->Pen(ScrollView::BLUE);
+  tab_win->Brush(ScrollView::NONE);
+
+  // For every bbox in the grid, display it.
+  GridSearch<BBC, BBC_CLIST, BBC_C_IT> gsearch(this);
+  gsearch.StartFullSearch();
+  BBC* bbox;
+  while ((bbox = gsearch.NextFullSearch()) != nullptr) {
+    const TBOX& box = bbox->bounding_box();
+    int left_x = box.left();
+    int right_x = box.right();
+    int top_y = box.top();
+    int bottom_y = box.bottom();
+    ScrollView::Color box_color = bbox->BoxColor();
+    tab_win->Pen(box_color);
+    tab_win->Rectangle(left_x, bottom_y, right_x, top_y);
+  }
+  tab_win->Update();
+}
+
+#endif // !GRAPHICS_DISABLED
+
+// ASSERT_HOST that every cell contains no more than one copy of each entry.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+void BBGrid<BBC, BBC_CLIST, BBC_C_IT>::AssertNoDuplicates() {
+  // Process all grid cells.
+  for (int i = gridwidth_ * gridheight_ - 1; i >= 0; --i) {
+    // Iterate over all elements excent the last.
+    for (BBC_C_IT it(&grid_[i]); !it.at_last(); it.forward()) {
+      BBC* ptr = it.data();
+      BBC_C_IT it2(it);
+      // None of the rest of the elements in the list should equal ptr.
+      for (it2.forward(); !it2.at_first(); it2.forward()) {
+        ASSERT_HOST(it2.data() != ptr);
+      }
+    }
+  }
+}
+
+// Handle a click event in a display window.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+void BBGrid<BBC, BBC_CLIST, BBC_C_IT>::HandleClick(int x, int y) {
+  tprintf("Click at (%d, %d)\n", x, y);
+}
+
+///////////////////////////////////////////////////////////////////////
+// GridSearch IMPLEMENTATION.
+///////////////////////////////////////////////////////////////////////
+
+// Start a new full search. Will iterate all stored blobs.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+void GridSearch<BBC, BBC_CLIST, BBC_C_IT>::StartFullSearch() {
+  // Full search uses x_ and y_ as the current grid
+  // cell being searched.
+  CommonStart(grid_->bleft_.x(), grid_->tright_.y());
+}
+
+// Return the next bbox in the search or nullptr if done.
+// The other searches will return a box that overlaps the grid cell
+// thereby duplicating boxes, but NextFullSearch only returns each box once.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+BBC* GridSearch<BBC, BBC_CLIST, BBC_C_IT>::NextFullSearch() {
+  int x;
+  int y;
+  do {
+    while (it_.cycled_list()) {
+      ++x_;
+      if (x_ >= grid_->gridwidth_) {
+        --y_;
+        if (y_ < 0)
+          return CommonEnd();
+        x_ = 0;
+      }
+      SetIterator();
+    }
+    CommonNext();
+    TBOX box = previous_return_->bounding_box();
+    grid_->GridCoords(box.left(), box.bottom(), &x, &y);
+  } while (x != x_ || y != y_);
+  return previous_return_;
+}
+
+// Start a new radius search.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+void GridSearch<BBC, BBC_CLIST, BBC_C_IT>::StartRadSearch(int x, int y,
+                                                          int max_radius) {
+  // Rad search uses x_origin_ and y_origin_ as the center of the circle.
+  // The radius_ is the radius of the (diamond-shaped) circle and
+  // rad_index/rad_dir_ combine to determine the position around it.
+  max_radius_ = max_radius;
+  radius_ = 0;
+  rad_index_ = 0;
+  rad_dir_ = 3;
+  CommonStart(x, y);
+}
+
+// Return the next bbox in the radius search or nullptr if the
+// maximum radius has been reached.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+BBC* GridSearch<BBC, BBC_CLIST, BBC_C_IT>::NextRadSearch() {
+  do {
+    while (it_.cycled_list()) {
+      ++rad_index_;
+      if (rad_index_ >= radius_) {
+        ++rad_dir_;
+        rad_index_ = 0;
+        if (rad_dir_ >= 4) {
+          ++radius_;
+          if (radius_ > max_radius_)
+            return CommonEnd();
+          rad_dir_ = 0;
+        }
+      }
+      ICOORD offset = C_OUTLINE::chain_step(rad_dir_);
+      offset *= radius_ - rad_index_;
+      offset += C_OUTLINE::chain_step(rad_dir_ + 1) * rad_index_;
+      x_ = x_origin_ + offset.x();
+      y_ = y_origin_ + offset.y();
+      if (x_ >= 0 && x_ < grid_->gridwidth_ &&
+          y_ >= 0 && y_ < grid_->gridheight_)
+        SetIterator();
+    }
+    CommonNext();
+  } while (unique_mode_ && returns_.find(previous_return_) != returns_.end());
+  if (unique_mode_)
+    returns_.insert(previous_return_);
+  return previous_return_;
+}
+
+// Start a new left or right-looking search. Will search to the side
+// for a box that vertically overlaps the given vertical line segment.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+void GridSearch<BBC, BBC_CLIST, BBC_C_IT>::StartSideSearch(int x,
+                                                           int ymin, int ymax) {
+  // Right search records the x in x_origin_, the ymax in y_origin_
+  // and the size of the vertical strip to search in radius_.
+  // To guarantee finding overlapping objects of up to twice the
+  // given size, double the height.
+  radius_ = ((ymax - ymin) * 2 + grid_->gridsize_ - 1) / grid_->gridsize_;
+  rad_index_ = 0;
+  CommonStart(x, ymax);
+}
+
+// Return the next bbox in the side search or nullptr if the
+// edge has been reached. Searches left to right or right to left
+// according to the flag.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+BBC* GridSearch<BBC, BBC_CLIST, BBC_C_IT>::NextSideSearch(bool right_to_left) {
+  do {
+    while (it_.cycled_list()) {
+      ++rad_index_;
+      if (rad_index_ > radius_) {
+        if (right_to_left)
+          --x_;
+        else
+          ++x_;
+        rad_index_ = 0;
+        if (x_ < 0 || x_ >= grid_->gridwidth_)
+          return CommonEnd();
+      }
+      y_ = y_origin_ - rad_index_;
+      if (y_ >= 0 && y_ < grid_->gridheight_)
+        SetIterator();
+    }
+    CommonNext();
+  } while (unique_mode_ && returns_.find(previous_return_) != returns_.end());
+  if (unique_mode_)
+    returns_.insert(previous_return_);
+  return previous_return_;
+}
+
+// Start a vertical-looking search. Will search up or down
+// for a box that horizontally overlaps the given line segment.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+void GridSearch<BBC, BBC_CLIST, BBC_C_IT>::StartVerticalSearch(int xmin,
+                                                               int xmax,
+                                                               int y) {
+  // Right search records the xmin in x_origin_, the y in y_origin_
+  // and the size of the horizontal strip to search in radius_.
+  radius_ = (xmax - xmin + grid_->gridsize_ - 1) / grid_->gridsize_;
+  rad_index_ = 0;
+  CommonStart(xmin, y);
+}
+
+// Return the next bbox in the vertical search or nullptr if the
+// edge has been reached. Searches top to bottom or bottom to top
+// according to the flag.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+BBC* GridSearch<BBC, BBC_CLIST, BBC_C_IT>::NextVerticalSearch(
+    bool top_to_bottom) {
+  do {
+    while (it_.cycled_list()) {
+      ++rad_index_;
+      if (rad_index_ > radius_) {
+        if (top_to_bottom)
+          --y_;
+        else
+          ++y_;
+        rad_index_ = 0;
+        if (y_ < 0 || y_ >= grid_->gridheight_)
+          return CommonEnd();
+      }
+      x_ = x_origin_ + rad_index_;
+      if (x_ >= 0 && x_ < grid_->gridwidth_)
+        SetIterator();
+    }
+    CommonNext();
+  } while (unique_mode_ && returns_.find(previous_return_) != returns_.end());
+  if (unique_mode_)
+    returns_.insert(previous_return_);
+  return previous_return_;
+}
+
+// Start a rectangular search. Will search for a box that overlaps the
+// given rectangle.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+void GridSearch<BBC, BBC_CLIST, BBC_C_IT>::StartRectSearch(const TBOX& rect) {
+  // Rect search records the xmin in x_origin_, the ymin in y_origin_
+  // and the xmax in max_radius_.
+  // The search proceeds left to right, top to bottom.
+  rect_ = rect;
+  CommonStart(rect.left(), rect.top());
+  grid_->GridCoords(rect.right(), rect.bottom(),  // - rect.height(),
+                    &max_radius_, &y_origin_);
+}
+
+// Return the next bbox in the rectangular search or nullptr if complete.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+BBC* GridSearch<BBC, BBC_CLIST, BBC_C_IT>::NextRectSearch() {
+  do {
+    while (it_.cycled_list()) {
+      ++x_;
+      if (x_ > max_radius_) {
+        --y_;
+        x_ = x_origin_;
+        if (y_ < y_origin_)
+          return CommonEnd();
+      }
+      SetIterator();
+    }
+    CommonNext();
+  } while (!rect_.overlap(previous_return_->bounding_box()) ||
+           (unique_mode_ && returns_.find(previous_return_) != returns_.end()));
+  if (unique_mode_)
+    returns_.insert(previous_return_);
+  return previous_return_;
+}
+
+// Remove the last returned BBC. Will not invalidate this. May invalidate
+// any other concurrent GridSearch on the same grid. If any others are
+// in use, call RepositionIterator on those, to continue without harm.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+void GridSearch<BBC, BBC_CLIST, BBC_C_IT>::RemoveBBox() {
+  if (previous_return_ != nullptr) {
+    // Remove all instances of previous_return_ from the list, so the iterator
+    // remains valid after removal from the rest of the grid cells.
+    // if previous_return_ is not on the list, then it has been removed already.
+    BBC* prev_data = nullptr;
+    BBC* new_previous_return = nullptr;
+    it_.move_to_first();
+    for (it_.mark_cycle_pt(); !it_.cycled_list();) {
+      if (it_.data() ==  previous_return_) {
+        new_previous_return = prev_data;
+        it_.extract();
+        it_.forward();
+        next_return_ = it_.cycled_list() ? nullptr : it_.data();
+      } else {
+        prev_data = it_.data();
+        it_.forward();
+      }
+    }
+    grid_->RemoveBBox(previous_return_);
+    previous_return_ = new_previous_return;
+    RepositionIterator();
+  }
+}
+
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+void GridSearch<BBC, BBC_CLIST, BBC_C_IT>::RepositionIterator() {
+  // Something was deleted, so we have little choice but to clear the
+  // returns list.
+  returns_.clear();
+  // Reset the iterator back to one past the previous return.
+  // If the previous_return_ is no longer in the list, then
+  // next_return_ serves as a backup.
+  it_.move_to_first();
+  // Special case, the first element was removed and reposition
+  // iterator was called. In this case, the data is fine, but the
+  // cycle point is not. Detect it and return.
+  if (!it_.empty() && it_.data() == next_return_) {
+    it_.mark_cycle_pt();
+    return;
+  }
+  for (it_.mark_cycle_pt(); !it_.cycled_list(); it_.forward()) {
+    if (it_.data() == previous_return_ ||
+        it_.data_relative(1) == next_return_) {
+      CommonNext();
+      return;
+    }
+  }
+  // We ran off the end of the list. Move to a new cell next time.
+  previous_return_ = nullptr;
+  next_return_ = nullptr;
+}
+
+// Factored out helper to start a search.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+void GridSearch<BBC, BBC_CLIST, BBC_C_IT>::CommonStart(int x, int y) {
+  grid_->GridCoords(x, y, &x_origin_, &y_origin_);
+  x_ = x_origin_;
+  y_ = y_origin_;
+  SetIterator();
+  previous_return_ = nullptr;
+  next_return_ = it_.empty() ? nullptr : it_.data();
+  returns_.clear();
+}
+
+// Factored out helper to complete a next search.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+BBC* GridSearch<BBC, BBC_CLIST, BBC_C_IT>::CommonNext() {
+  previous_return_ = it_.data();
+  it_.forward();
+  next_return_ = it_.cycled_list() ? nullptr : it_.data();
+  return previous_return_;
+}
+
+// Factored out final return when search is exhausted.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+BBC* GridSearch<BBC, BBC_CLIST, BBC_C_IT>::CommonEnd() {
+  previous_return_ = nullptr;
+  next_return_ = nullptr;
+  return nullptr;
+}
+
+// Factored out function to set the iterator to the current x_, y_
+// grid coords and mark the cycle pt.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+void GridSearch<BBC, BBC_CLIST, BBC_C_IT>::SetIterator() {
+  it_= &(grid_->grid_[y_ * grid_->gridwidth_ + x_]);
+  it_.mark_cycle_pt();
+}
+
+}  // namespace tesseract.
+
+#endif  // TESSERACT_TEXTORD_BBGRID_H_
diff --git a/tesseract/src/textord/blkocc.cpp b/tesseract/src/textord/blkocc.cpp
new file mode 100644
index 00000000..f63b8ef9
--- /dev/null
+++ b/tesseract/src/textord/blkocc.cpp
@@ -0,0 +1,165 @@
+/*****************************************************************************
+ *
+ * File:         blkocc.cpp  (Formerly blockocc.c)
+ * Description:  Block Occupancy routines
+ * Author:       Chris Newton
+ *
+ * (c) Copyright 1991, Hewlett-Packard Company.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ ******************************************************************************/
+
+#include "blkocc.h"
+
+#include "drawtord.h"
+#include "errcode.h"
+
+#include <cctype>
+#include <cmath>
+
+#include "helpers.h"
+
+namespace tesseract {
+
+double_VAR(textord_underline_threshold, 0.5, "Fraction of width occupied");
+
+// Forward declarations of static functions
+static void horizontal_cblob_projection(C_BLOB *blob,   // blob to project
+                                        STATS *stats);  // output
+static void horizontal_coutline_projection(C_OUTLINE *outline,
+                                           STATS *stats);        // output
+
+/**
+ * test_underline
+ *
+ * Check to see if the blob is an underline.
+ * Return true if it is.
+ */
+
+bool test_underline(                   //look for underlines
+        bool testing_on,   ///< drawing blob
+        C_BLOB* blob,      ///< blob to test
+        int16_t baseline,  ///< coords of baseline
+        int16_t xheight    ///< height of line
+) {
+  int16_t occ;
+  int16_t blob_width;             //width of blob
+  TBOX blob_box;                  //bounding box
+  int32_t desc_occ;
+  int32_t x_occ;
+  int32_t asc_occ;
+  STATS projection;
+
+  blob_box = blob->bounding_box ();
+  blob_width = blob->bounding_box ().width ();
+  projection.set_range (blob_box.bottom (), blob_box.top () + 1);
+  if (testing_on) {
+    //              blob->plot(to_win,GOLDENROD,GOLDENROD);
+    //              line_color_index(to_win,GOLDENROD);
+    //              move2d(to_win,blob_box.left(),baseline);
+    //              draw2d(to_win,blob_box.right(),baseline);
+    //              move2d(to_win,blob_box.left(),baseline+xheight);
+    //              draw2d(to_win,blob_box.right(),baseline+xheight);
+    tprintf
+      ("Testing underline on blob at (%d,%d)->(%d,%d), base=%d\nOccs:",
+      blob->bounding_box ().left (), blob->bounding_box ().bottom (),
+      blob->bounding_box ().right (), blob->bounding_box ().top (),
+      baseline);
+  }
+  horizontal_cblob_projection(blob, &projection);
+  desc_occ = 0;
+  for (occ = blob_box.bottom (); occ < baseline; occ++)
+    if (occ <= blob_box.top () && projection.pile_count (occ) > desc_occ)
+                                 //max in region
+      desc_occ = projection.pile_count (occ);
+  x_occ = 0;
+  for (occ = baseline; occ <= baseline + xheight; occ++)
+    if (occ >= blob_box.bottom () && occ <= blob_box.top ()
+    && projection.pile_count (occ) > x_occ)
+                                 //max in region
+      x_occ = projection.pile_count (occ);
+  asc_occ = 0;
+  for (occ = baseline + xheight + 1; occ <= blob_box.top (); occ++)
+    if (occ >= blob_box.bottom () && projection.pile_count (occ) > asc_occ)
+      asc_occ = projection.pile_count (occ);
+  if (testing_on) {
+    tprintf ("%d %d %d\n", desc_occ, x_occ, asc_occ);
+  }
+  if (desc_occ == 0 && x_occ == 0 && asc_occ == 0) {
+    tprintf ("Bottom=%d, top=%d, base=%d, x=%d\n",
+      blob_box.bottom (), blob_box.top (), baseline, xheight);
+    projection.print();
+  }
+  if (desc_occ > x_occ + x_occ
+    && desc_occ > blob_width * textord_underline_threshold)
+    return true;                 //real underline
+  return asc_occ > x_occ + x_occ &&
+         asc_occ > blob_width * textord_underline_threshold;                 //overline
+  //neither
+}
+
+
+/**
+ * horizontal_cblob_projection
+ *
+ * Compute the horizontal projection of a cblob from its outlines
+ * and add to the given STATS.
+ */
+
+static void horizontal_cblob_projection(               //project outlines
+                                 C_BLOB *blob,  ///< blob to project
+                                 STATS *stats   ///< output
+                                ) {
+                                 //outlines of blob
+  C_OUTLINE_IT out_it = blob->out_list ();
+
+  for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
+    horizontal_coutline_projection (out_it.data (), stats);
+  }
+}
+
+
+/**
+ * horizontal_coutline_projection
+ *
+ * Compute the horizontal projection of a outline from its outlines
+ * and add to the given STATS.
+ */
+
+static void horizontal_coutline_projection(                     //project outlines
+                                    C_OUTLINE *outline,  ///< outline to project
+                                    STATS *stats         ///< output
+                                   ) {
+  ICOORD pos;                    //current point
+  ICOORD step;                   //edge step
+  int32_t length;                //of outline
+  int16_t stepindex;             //current step
+  C_OUTLINE_IT out_it = outline->child ();
+
+  pos = outline->start_pos ();
+  length = outline->pathlength ();
+  for (stepindex = 0; stepindex < length; stepindex++) {
+    step = outline->step (stepindex);
+    if (step.y () > 0) {
+      stats->add (pos.y (), pos.x ());
+    }
+    else if (step.y () < 0) {
+      stats->add (pos.y () - 1, -pos.x ());
+    }
+    pos += step;
+  }
+
+  for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
+    horizontal_coutline_projection (out_it.data (), stats);
+  }
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/textord/blkocc.h b/tesseract/src/textord/blkocc.h
new file mode 100644
index 00000000..4dee5cd8
--- /dev/null
+++ b/tesseract/src/textord/blkocc.h
@@ -0,0 +1,253 @@
+/******************************************************************************
+ *
+ * File:         blkocc.h  (Formerly blockocc.h)
+ * Description:  Block Occupancy routines
+ * Author:       Chris Newton
+ *
+ * (c) Copyright 1991, Hewlett-Packard Company.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ ******************************************************************************/
+
+#ifndef           BLKOCC_H
+#define           BLKOCC_H
+
+#include          "params.h"
+#include          "elst.h"
+
+namespace tesseract {
+
+class C_BLOB;
+
+/***************************************************************************
+CLASS REGION_OCC
+
+  The class REGION_OCC defines a section of outline which exists entirely
+  within a single region. The only data held is the min and max x limits of
+  the outline within the region.
+
+  REGION_OCCs are held on lists, one list for each region.  The lists are
+  built in sorted order of min x. Overlapping REGION_OCCs are not permitted on
+  a single list. An overlapping region to be added causes the existing region
+  to be extended. This extension may result in the following REGION_OCC on the
+  list overlapping the amended one. In this case the amended REGION_OCC is
+  further extended to include the range of the following one, so that the
+  following one can be deleted.
+
+****************************************************************************/
+
+class REGION_OCC : public ELIST_LINK
+{
+  public:
+    float min_x;                 //Lowest x in region
+    float max_x;                 //Highest x in region
+    int16_t region_type;           //Type of crossing
+
+    REGION_OCC() = default;  // constructor used
+    // only in COPIER etc
+    REGION_OCC(  //constructor
+               float min,
+               float max,
+               int16_t region) {
+      min_x = min;
+      max_x = max;
+      region_type = region;
+    }
+};
+
+ELISTIZEH (REGION_OCC)
+#define RANGE_IN_BAND(band_max, band_min, range_max, range_min) \
+(((range_min) >= (band_min)) && ((range_max) < (band_max)))
+/************************************************************************
+Adapted from the following procedure so that it can be used in the bands
+class in an include file...
+
+bool    range_in_band[
+              range within band?
+int16_t band_max,
+int16_t band_min,
+int16_t range_max,
+int16_t range_min]
+{
+  if ((range_min >= band_min) && (range_max < band_max))
+    return true;
+  else
+    return false;
+}
+***********************************************************************/
+#define RANGE_OVERLAPS_BAND(band_max, band_min, range_max, range_min) \
+(((range_max) >= (band_min)) && ((range_min) < (band_max)))
+/************************************************************************
+Adapted from the following procedure so that it can be used in the bands
+class in an include file...
+
+bool    range_overlaps_band[
+              range crosses band?
+int16_t band_max,
+int16_t band_min,
+int16_t range_max,
+int16_t range_min]
+{
+  if ((range_max >= band_min) && (range_min < band_max))
+    return true;
+  else
+    return false;
+}
+***********************************************************************/
+/**********************************************************************
+  Bands
+  -----
+
+  BAND 4
+--------------------------------
+  BAND 3
+--------------------------------
+
+  BAND 2
+
+--------------------------------
+
+  BAND 1
+
+Band 0 is the dot band
+
+Each band has an error margin above and below. An outline is not considered to
+have significantly changed bands until it has moved out of the error margin.
+*************************************************************************/
+class BAND
+{
+  public:
+    int16_t max_max;               //upper max
+    int16_t max;                   //nominal max
+    int16_t min_max;               //lower max
+    int16_t max_min;               //upper min
+    int16_t min;                   //nominal min
+    int16_t min_min;               //lower min
+
+    BAND() = default; // constructor
+
+    void set(                      // initialise a band
+             int16_t new_max_max,    // upper max
+             int16_t new_max,        // new nominal max
+             int16_t new_min_max,    // new lower max
+             int16_t new_max_min,    // new upper min
+             int16_t new_min,        // new nominal min
+             int16_t new_min_min) {  // new lower min
+      max_max = new_max_max;
+      max = new_max;
+      min_max = new_min_max;
+      max_min = new_max_min;
+      min = new_min;
+      min_min = new_min_min;
+    }
+
+    bool in_minimal(            //in minimal limits?
+            float y) {  //y value
+        return (y >= max_min) && (y < min_max);
+    }
+
+    bool in_nominal(            //in nominal limits?
+            float y) {  //y value
+        return (y >= min) && (y < max);
+    }
+
+    bool in_maximal(            //in maximal limits?
+            float y) {  //y value
+        return (y >= min_min) && (y < max_max);
+    }
+
+                                 //overlaps min limits?
+    bool range_overlaps_minimal(float y1,    //one range limit
+                                float y2) {  //other range limit
+      if (y1 > y2)
+        return RANGE_OVERLAPS_BAND (min_max, max_min, y1, y2);
+      else
+        return RANGE_OVERLAPS_BAND (min_max, max_min, y2, y1);
+    }
+
+                                 //overlaps nom limits?
+    bool range_overlaps_nominal(float y1,    //one range limit
+                                float y2) {  //other range limit
+      if (y1 > y2)
+        return RANGE_OVERLAPS_BAND (max, min, y1, y2);
+      else
+        return RANGE_OVERLAPS_BAND (max, min, y2, y1);
+    }
+
+                                 //overlaps max limits?
+    bool range_overlaps_maximal(float y1,    //one range limit
+                                float y2) {  //other range limit
+      if (y1 > y2)
+        return RANGE_OVERLAPS_BAND (max_max, min_min, y1, y2);
+      else
+        return RANGE_OVERLAPS_BAND (max_max, min_min, y2, y1);
+    }
+
+    bool range_in_minimal(             //within min limits?
+            float y1,    //one range limit
+            float y2) {  //other range limit
+      if (y1 > y2)
+        return RANGE_IN_BAND (min_max, max_min, y1, y2);
+      else
+        return RANGE_IN_BAND (min_max, max_min, y2, y1);
+    }
+
+    bool range_in_nominal(             //within nom limits?
+            float y1,    //one range limit
+            float y2) {  //other range limit
+      if (y1 > y2)
+        return RANGE_IN_BAND (max, min, y1, y2);
+      else
+        return RANGE_IN_BAND (max, min, y2, y1);
+    }
+
+    bool range_in_maximal(             //within max limits?
+            float y1,    //one range limit
+            float y2) {  //other range limit
+      if (y1 > y2)
+        return RANGE_IN_BAND (max_max, min_min, y1, y2);
+      else
+        return RANGE_IN_BAND (max_max, min_min, y2, y1);
+    }
+};
+
+/* Standard positions */
+
+#define MAX_NUM_BANDS 5
+#define UNDEFINED_BAND 99
+#define NO_LOWER_LIMIT -9999
+#define NO_UPPER_LIMIT 9999
+
+#define DOT_BAND 0
+
+/* Special occupancy code emitted for the 0 region at the end of a word */
+
+#define END_OF_WERD_CODE 255
+
+extern BOOL_VAR_H (blockocc_show_result, false, "Show intermediate results");
+extern INT_VAR_H (blockocc_desc_height, 0,
+"Descender height after normalisation");
+extern INT_VAR_H (blockocc_asc_height, 255,
+"Ascender height after normalisation");
+extern INT_VAR_H (blockocc_band_count, 4, "Number of bands used");
+extern double_VAR_H (textord_underline_threshold, 0.9,
+"Fraction of width occupied");
+
+bool test_underline(                   //look for underlines
+        bool testing_on,  //drawing blob
+        C_BLOB* blob,      //blob to test
+        int16_t baseline,    //coords of baseline
+        int16_t xheight      //height of line
+);
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/textord/blobgrid.cpp b/tesseract/src/textord/blobgrid.cpp
new file mode 100644
index 00000000..53a1d7b3
--- /dev/null
+++ b/tesseract/src/textord/blobgrid.cpp
@@ -0,0 +1,45 @@
+///////////////////////////////////////////////////////////////////////
+// File:        blobgrid.cpp
+// Description: BBGrid of BLOBNBOX with useful BLOBNBOX-specific methods.
+// Copyright 2011 Google Inc. All Rights Reserved.
+// Author: rays@google.com (Ray Smith)
+// Created:     Sat Jun 11 10:30:01 PST 2011
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "blobgrid.h"
+
+namespace tesseract {
+
+BlobGrid::BlobGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright)
+  : BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>(gridsize, bleft, tright) {
+}
+
+// Destructor.
+// It is defined here, so the compiler can create a single vtable
+// instead of weak vtables in every compilation unit.
+BlobGrid::~BlobGrid() = default;
+
+// Inserts all the blobs from the given list, with x and y spreading,
+// without removing from the source list, so ownership remains with the
+// source list.
+void BlobGrid::InsertBlobList(BLOBNBOX_LIST* blobs) {
+  BLOBNBOX_IT blob_it(blobs);
+  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+    BLOBNBOX* blob = blob_it.data();
+    if (!blob->joined_to_prev())
+      InsertBBox(true, true, blob);
+  }
+}
+
+}  // namespace tesseract.
diff --git a/tesseract/src/textord/blobgrid.h b/tesseract/src/textord/blobgrid.h
new file mode 100644
index 00000000..54b19aeb
--- /dev/null
+++ b/tesseract/src/textord/blobgrid.h
@@ -0,0 +1,46 @@
+///////////////////////////////////////////////////////////////////////
+// File:        blobgrid.h
+// Description: BBGrid of BLOBNBOX with useful BLOBNBOX-specific methods.
+// Copyright 2011 Google Inc. All Rights Reserved.
+// Author: rays@google.com (Ray Smith)
+// Created:     Sat Jun 11 10:26:01 PST 2011
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+
+#ifndef TESSERACT_TEXTORD_BLOBGRID_H_
+#define TESSERACT_TEXTORD_BLOBGRID_H_
+
+#include "bbgrid.h"
+#include "blobbox.h"
+
+namespace tesseract {
+
+CLISTIZEH(BLOBNBOX)
+
+using BlobGridSearch = GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>;
+
+class TESS_API BlobGrid : public BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> {
+ public:
+  BlobGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright);
+  ~BlobGrid() override;
+
+  // Inserts all the blobs from the given list, with x and y spreading,
+  // without removing from the source list, so ownership remains with the
+  // source list.
+  void InsertBlobList(BLOBNBOX_LIST* blobs);
+};
+
+}  // namespace tesseract.
+
+#endif  // TESSERACT_TEXTORD_BLOBGRID_H_
diff --git a/tesseract/src/textord/ccnontextdetect.cpp b/tesseract/src/textord/ccnontextdetect.cpp
new file mode 100644
index 00000000..cfbbb95a
--- /dev/null
+++ b/tesseract/src/textord/ccnontextdetect.cpp
@@ -0,0 +1,323 @@
+///////////////////////////////////////////////////////////////////////
+// File:        ccnontextdetect.cpp
+// Description: Connected-Component-based photo (non-text) detection.
+// Author:      rays@google.com (Ray Smith)
+//
+// Copyright 2011 Google Inc. All Rights Reserved.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "ccnontextdetect.h"
+#include "imagefind.h"
+#include "strokewidth.h"
+
+namespace tesseract {
+
+// Max number of neighbour small objects per squared gridsize before a grid
+// cell becomes image.
+const double kMaxSmallNeighboursPerPix = 1.0 / 32;
+// Max number of small blobs a large blob may overlap before it is rejected
+// and determined to be image.
+const int kMaxLargeOverlapsWithSmall = 3;
+// Max number of small blobs a medium blob may overlap before it is rejected
+// and determined to be image. Larger than for large blobs as medium blobs
+// may be complex Chinese characters. Very large Chinese characters are going
+// to overlap more medium blobs than small.
+const int kMaxMediumOverlapsWithSmall = 12;
+// Max number of normal blobs a large blob may overlap before it is rejected
+// and determined to be image. This is set higher to allow for drop caps, which
+// may overlap a lot of good text blobs.
+const int kMaxLargeOverlapsWithMedium = 12;
+// Multiplier of original noise_count used to test for the case of spreading
+// noise beyond where it should really be.
+const int kOriginalNoiseMultiple = 8;
+// Pixel padding for noise blobs when rendering on the image
+// mask to encourage them to join together. Make it too big and images
+// will fatten out too much and have to be clipped to text.
+const int kNoisePadding = 4;
+// Fraction of max_noise_count_ to be added to the noise count if there is
+// photo mask in the background.
+const double kPhotoOffsetFraction = 0.375;
+// Min ratio of perimeter^2/16area for a "good" blob in estimating noise
+// density. Good blobs are supposed to be highly likely real text.
+// We consider a square to have unit ratio, where A=(p/4)^2, hence the factor
+// of 16. Digital circles are weird and have a minimum ratio of pi/64, not
+// the 1/(4pi) that you would expect.
+const double kMinGoodTextPARatio = 1.5;
+
+CCNonTextDetect::CCNonTextDetect(int gridsize,
+                             const ICOORD& bleft, const ICOORD& tright)
+  : BlobGrid(gridsize, bleft, tright),
+    max_noise_count_(static_cast<int>(kMaxSmallNeighboursPerPix *
+                                      gridsize * gridsize)),
+    noise_density_(nullptr) {
+  // TODO(rays) break max_noise_count_ out into an area-proportional
+  // value, as now plus an additive constant for the number of text blobs
+  // in the 3x3 neighbourhood - maybe 9.
+}
+
+CCNonTextDetect::~CCNonTextDetect() {
+  delete noise_density_;
+}
+
+// Creates and returns a Pix with the same resolution as the original
+// in which 1 (black) pixels represent likely non text (photo, line drawing)
+// areas of the page, deleting from the blob_block the blobs that were
+// determined to be non-text.
+// The photo_map is used to bias the decision towards non-text, rather than
+// supplying definite decision.
+// The blob_block is the usual result of connected component analysis,
+// holding the detected blobs.
+// The returned Pix should be PixDestroyed after use.
+Pix* CCNonTextDetect::ComputeNonTextMask(bool debug, Pix* photo_map,
+                                         TO_BLOCK* blob_block) {
+  // Insert the smallest blobs into the grid.
+  InsertBlobList(&blob_block->small_blobs);
+  InsertBlobList(&blob_block->noise_blobs);
+  // Add the medium blobs that don't have a good strokewidth neighbour.
+  // Those that do go into good_grid as an antidote to spreading beyond the
+  // real reaches of a noise region.
+  BlobGrid good_grid(gridsize(), bleft(), tright());
+  BLOBNBOX_IT blob_it(&blob_block->blobs);
+  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+    BLOBNBOX* blob = blob_it.data();
+    double perimeter_area_ratio = blob->cblob()->perimeter() / 4.0;
+    perimeter_area_ratio *= perimeter_area_ratio / blob->enclosed_area();
+    if (blob->GoodTextBlob() == 0 || perimeter_area_ratio < kMinGoodTextPARatio)
+      InsertBBox(true, true, blob);
+    else
+      good_grid.InsertBBox(true, true, blob);
+  }
+  noise_density_ = ComputeNoiseDensity(debug, photo_map, &good_grid);
+  good_grid.Clear();  // Not needed any more.
+  Pix* pix = noise_density_->ThresholdToPix(max_noise_count_);
+  if (debug) {
+    pixWrite("junknoisemask.png", pix, IFF_PNG);
+  }
+  ScrollView* win = nullptr;
+  #ifndef GRAPHICS_DISABLED
+  if (debug) {
+    win = MakeWindow(0, 400, "Photo Mask Blobs");
+  }
+  #endif // !GRAPHICS_DISABLED
+  // Large and medium blobs are not text if they overlap with "a lot" of small
+  // blobs.
+  MarkAndDeleteNonTextBlobs(&blob_block->large_blobs,
+                            kMaxLargeOverlapsWithSmall,
+                            win, ScrollView::DARK_GREEN, pix);
+  MarkAndDeleteNonTextBlobs(&blob_block->blobs, kMaxMediumOverlapsWithSmall,
+                          win, ScrollView::WHITE, pix);
+  // Clear the grid of small blobs and insert the medium blobs.
+  Clear();
+  InsertBlobList(&blob_block->blobs);
+  MarkAndDeleteNonTextBlobs(&blob_block->large_blobs,
+                            kMaxLargeOverlapsWithMedium,
+                            win, ScrollView::DARK_GREEN, pix);
+  // Clear again before we start deleting the blobs in the grid.
+  Clear();
+  MarkAndDeleteNonTextBlobs(&blob_block->noise_blobs, -1,
+                            win, ScrollView::CORAL, pix);
+  MarkAndDeleteNonTextBlobs(&blob_block->small_blobs, -1,
+                            win, ScrollView::GOLDENROD, pix);
+  MarkAndDeleteNonTextBlobs(&blob_block->blobs, -1,
+                            win, ScrollView::WHITE, pix);
+  if (debug) {
+    #ifndef GRAPHICS_DISABLED
+    win->Update();
+    #endif // !GRAPHICS_DISABLED
+    pixWrite("junkccphotomask.png", pix, IFF_PNG);
+    #ifndef GRAPHICS_DISABLED
+    delete win->AwaitEvent(SVET_DESTROY);
+    delete win;
+    #endif // !GRAPHICS_DISABLED
+  }
+  return pix;
+}
+
+// Computes and returns the noise_density IntGrid, at the same gridsize as
+// this by summing the number of small elements in a 3x3 neighbourhood of
+// each grid cell. good_grid is filled with blobs that are considered most
+// likely good text, and this is filled with small and medium blobs that are
+// more likely non-text.
+// The photo_map is used to bias the decision towards non-text, rather than
+// supplying definite decision.
+IntGrid* CCNonTextDetect::ComputeNoiseDensity(bool debug, Pix* photo_map,
+                                              BlobGrid* good_grid) {
+  IntGrid* noise_counts = CountCellElements();
+  IntGrid* noise_density = noise_counts->NeighbourhoodSum();
+  IntGrid* good_counts = good_grid->CountCellElements();
+  // Now increase noise density in photo areas, to bias the decision and
+  // minimize hallucinated text on image, but trim the noise_density where
+  // there are good blobs and the original count is low in non-photo areas,
+  // indicating that most of the result came from neighbouring cells.
+  int height = pixGetHeight(photo_map);
+  int photo_offset = IntCastRounded(max_noise_count_ * kPhotoOffsetFraction);
+  for (int y = 0; y < gridheight(); ++y) {
+    for (int x = 0; x < gridwidth(); ++x) {
+      int noise = noise_density->GridCellValue(x, y);
+      if (max_noise_count_ < noise + photo_offset &&
+          noise <= max_noise_count_) {
+        // Test for photo.
+        int left = x * gridsize();
+        int right = left + gridsize();
+        int bottom = height - y * gridsize();
+        int top = bottom - gridsize();
+        if (ImageFind::BoundsWithinRect(photo_map, &left, &top, &right,
+                                        &bottom)) {
+          noise_density->SetGridCell(x, y, noise + photo_offset);
+        }
+      }
+      if (debug && noise > max_noise_count_ &&
+          good_counts->GridCellValue(x, y) > 0) {
+        tprintf("At %d, %d, noise = %d, good=%d, orig=%d, thr=%d\n",
+                x * gridsize(), y * gridsize(),
+                noise_density->GridCellValue(x, y),
+                good_counts->GridCellValue(x, y),
+                noise_counts->GridCellValue(x, y), max_noise_count_);
+      }
+      if (noise > max_noise_count_ &&
+          good_counts->GridCellValue(x, y) > 0 &&
+          noise_counts->GridCellValue(x, y) * kOriginalNoiseMultiple <=
+              max_noise_count_) {
+        noise_density->SetGridCell(x, y, 0);
+      }
+    }
+  }
+  delete noise_counts;
+  delete good_counts;
+  return noise_density;
+}
+
+// Helper to expand a box in one of the 4 directions by the given pad,
+// provided it does not expand into any cell with a zero noise density.
+// If that is not possible, try expanding all round by a small constant.
+static TBOX AttemptBoxExpansion(const TBOX& box, const IntGrid& noise_density,
+                                int pad) {
+  TBOX expanded_box(box);
+  expanded_box.set_right(box.right() + pad);
+  if (!noise_density.AnyZeroInRect(expanded_box))
+    return expanded_box;
+  expanded_box = box;
+  expanded_box.set_left(box.left() - pad);
+  if (!noise_density.AnyZeroInRect(expanded_box))
+    return expanded_box;
+  expanded_box = box;
+  expanded_box.set_top(box.top() + pad);
+  if (!noise_density.AnyZeroInRect(expanded_box))
+    return expanded_box;
+  expanded_box = box;
+  expanded_box.set_bottom(box.bottom() + pad);
+  if (!noise_density.AnyZeroInRect(expanded_box))
+    return expanded_box;
+  expanded_box = box;
+  expanded_box.pad(kNoisePadding, kNoisePadding);
+  if (!noise_density.AnyZeroInRect(expanded_box))
+    return expanded_box;
+  return box;
+}
+
+// Tests each blob in the list to see if it is certain non-text using 2
+// conditions:
+// 1. blob overlaps a cell with high value in noise_density_ (previously set
+// by ComputeNoiseDensity).
+// OR 2. The blob overlaps more than max_blob_overlaps in *this grid. This
+// condition is disabled with max_blob_overlaps == -1.
+// If it does, the blob is declared non-text, and is used to mark up the
+// nontext_mask. Such blobs are fully deleted, and non-noise blobs have their
+// neighbours reset, as they may now point to deleted data.
+// WARNING: The blobs list blobs may be in the *this grid, but they are
+// not removed. If any deleted blobs might be in *this, then this must be
+// Clear()ed immediately after MarkAndDeleteNonTextBlobs is called.
+// If the win is not nullptr, deleted blobs are drawn on it in red, and kept
+// blobs are drawn on it in ok_color.
+void CCNonTextDetect::MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST* blobs,
+                                                int max_blob_overlaps,
+                                                ScrollView* win,
+                                                ScrollView::Color ok_color,
+                                                Pix* nontext_mask) {
+  int imageheight = tright().y() - bleft().x();
+  BLOBNBOX_IT blob_it(blobs);
+  BLOBNBOX_LIST dead_blobs;
+  BLOBNBOX_IT dead_it(&dead_blobs);
+  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+    BLOBNBOX* blob = blob_it.data();
+    TBOX box = blob->bounding_box();
+    if (!noise_density_->RectMostlyOverThreshold(box, max_noise_count_) &&
+        (max_blob_overlaps < 0 ||
+            !BlobOverlapsTooMuch(blob, max_blob_overlaps))) {
+      blob->ClearNeighbours();
+      #ifndef GRAPHICS_DISABLED
+      if (win != nullptr)
+        blob->plot(win, ok_color, ok_color);
+      #endif // !GRAPHICS_DISABLED
+    } else {
+      if (noise_density_->AnyZeroInRect(box)) {
+        // There is a danger that the bounding box may overlap real text, so
+        // we need to render the outline.
+        Pix* blob_pix = blob->cblob()->render_outline();
+        pixRasterop(nontext_mask, box.left(), imageheight - box.top(),
+                    box.width(), box.height(), PIX_SRC | PIX_DST,
+                    blob_pix, 0, 0);
+        pixDestroy(&blob_pix);
+      } else {
+        if (box.area() < gridsize() * gridsize()) {
+          // It is a really bad idea to make lots of small components in the
+          // photo mask, so try to join it to a bigger area by expanding the
+          // box in a way that does not touch any zero noise density cell.
+          box = AttemptBoxExpansion(box, *noise_density_, gridsize());
+        }
+        // All overlapped cells are non-zero, so just mark the rectangle.
+        pixRasterop(nontext_mask, box.left(), imageheight - box.top(),
+                    box.width(), box.height(), PIX_SET, nullptr, 0, 0);
+      }
+      #ifndef GRAPHICS_DISABLED
+      if (win != nullptr)
+        blob->plot(win, ScrollView::RED, ScrollView::RED);
+      #endif // !GRAPHICS_DISABLED
+      // It is safe to delete the cblob now, as it isn't used by the grid
+      // or BlobOverlapsTooMuch, and the BLOBNBOXes will go away with the
+      // dead_blobs list.
+      // TODO(rays) delete the delete when the BLOBNBOX destructor deletes
+      // the cblob.
+      delete blob->cblob();
+      dead_it.add_to_end(blob_it.extract());
+    }
+  }
+}
+
+// Returns true if the given blob overlaps more than max_overlaps blobs
+// in the current grid.
+bool CCNonTextDetect::BlobOverlapsTooMuch(BLOBNBOX* blob, int max_overlaps) {
+  // Search the grid to see what intersects it.
+  // Setup a Rectangle search for overlapping this blob.
+  BlobGridSearch rsearch(this);
+  const TBOX& box = blob->bounding_box();
+  rsearch.StartRectSearch(box);
+  rsearch.SetUniqueMode(true);
+  BLOBNBOX* neighbour;
+  int overlap_count = 0;
+  while (overlap_count <= max_overlaps &&
+         (neighbour = rsearch.NextRectSearch()) != nullptr) {
+    if (box.major_overlap(neighbour->bounding_box())) {
+      ++overlap_count;
+      if (overlap_count > max_overlaps)
+        return true;
+    }
+  }
+  return false;
+}
+
+}  // namespace tesseract.
diff --git a/tesseract/src/textord/ccnontextdetect.h b/tesseract/src/textord/ccnontextdetect.h
new file mode 100644
index 00000000..e1f1ca2d
--- /dev/null
+++ b/tesseract/src/textord/ccnontextdetect.h
@@ -0,0 +1,87 @@
+///////////////////////////////////////////////////////////////////////
+// File:        ccnontextdetect.h
+// Description: Connected-Component-based non-text detection.
+// Copyright 2011 Google Inc. All Rights Reserved.
+// Author: rays@google.com (Ray Smith)
+// Created:     Sat Jun 11 09:52:01 PST 2011
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TEXTORD_CCPHOTODETECT_H_
+#define TESSERACT_TEXTORD_CCPHOTODETECT_H_
+
+#include "blobgrid.h"
+#include "scrollview.h"
+
+namespace tesseract {
+
+// The CCNonTextDetect class contains grid-based operations on blobs to create
+// a full-resolution image mask analogous yet complementary to
+// pixGenHalftoneMask as it is better at line-drawings, graphs and charts.
+class CCNonTextDetect : public BlobGrid {
+ public:
+  CCNonTextDetect(int gridsize, const ICOORD& bleft, const ICOORD& tright);
+  ~CCNonTextDetect() override;
+
+  // Creates and returns a Pix with the same resolution as the original
+  // in which 1 (black) pixels represent likely non text (photo, line drawing)
+  // areas of the page, deleting from the blob_block the blobs that were
+  // determined to be non-text.
+  // The photo_map (binary image mask) is used to bias the decision towards
+  // non-text, rather than supplying a definite decision.
+  // The blob_block is the usual result of connected component analysis,
+  // holding the detected blobs.
+  // The returned Pix should be PixDestroyed after use.
+  Pix* ComputeNonTextMask(bool debug, Pix* photo_map, TO_BLOCK* blob_block);
+
+ private:
+  // Computes and returns the noise_density IntGrid, at the same gridsize as
+  // this by summing the number of small elements in a 3x3 neighbourhood of
+  // each grid cell. good_grid is filled with blobs that are considered most
+  // likely good text, and this is filled with small and medium blobs that are
+  // more likely non-text.
+  // The photo_map is used to bias the decision towards non-text, rather than
+  // supplying definite decision.
+  IntGrid* ComputeNoiseDensity(bool debug, Pix* photo_map, BlobGrid* good_grid);
+
+  // Tests each blob in the list to see if it is certain non-text using 2
+  // conditions:
+  // 1. blob overlaps a cell with high value in noise_density_ (previously set
+  // by ComputeNoiseDensity).
+  // OR 2. The blob overlaps more than max_blob_overlaps in *this grid. This
+  // condition is disabled with max_blob_overlaps == -1.
+  // If it does, the blob is declared non-text, and is used to mark up the
+  // nontext_mask. Such blobs are fully deleted, and non-noise blobs have their
+  // neighbours reset, as they may now point to deleted data.
+  // WARNING: The blobs list blobs may be in the *this grid, but they are
+  // not removed. If any deleted blobs might be in *this, then this must be
+  // Clear()ed immediately after MarkAndDeleteNonTextBlobs is called.
+  // If the win is not nullptr, deleted blobs are drawn on it in red, and kept
+  void MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST* blobs,
+                                 int max_blob_overlaps,
+                                 ScrollView* win, ScrollView::Color ok_color,
+                                 Pix* nontext_mask);
+  // Returns true if the given blob overlaps more than max_overlaps blobs
+  // in the current grid.
+  bool BlobOverlapsTooMuch(BLOBNBOX* blob, int max_overlaps);
+
+  // Max entry in noise_density_ before the cell is declared noisy.
+  int max_noise_count_;
+  // Completed noise density map, which we keep around to use for secondary
+  // noise detection.
+  IntGrid* noise_density_;
+};
+
+}  // namespace tesseract.
+
+#endif  // TESSERACT_TEXTORD_CCPHOTODETECT_H_
diff --git a/tesseract/src/textord/cjkpitch.cpp b/tesseract/src/textord/cjkpitch.cpp
new file mode 100644
index 00000000..3d547396
--- /dev/null
+++ b/tesseract/src/textord/cjkpitch.cpp
@@ -0,0 +1,1070 @@
+///////////////////////////////////////////////////////////////////////
+// File:        cjkpitch.cpp
+// Description: Code to determine fixed pitchness and the pitch if fixed,
+//              for CJK text.
+// Author:      takenaka@google.com (Hiroshi Takenaka)
+//
+// Copyright 2011 Google Inc. All Rights Reserved.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "cjkpitch.h"
+#include "genericvector.h"
+#include "topitch.h"
+#include "tovars.h"
+
+#include <algorithm>    // for std::sort
+#include <vector>       // for std::vector
+
+namespace tesseract {
+
+static BOOL_VAR(textord_space_size_is_variable, false,
+                "If true, word delimiter spaces are assumed to have "
+                "variable width, even though characters have fixed pitch.");
+
+// Allow +/-10% error for character pitch / body size.
+static const float kFPTolerance = 0.1f;
+
+// Minimum ratio of "good" character pitch for a row to be considered
+// to be fixed-pitch.
+static const float kFixedPitchThreshold = 0.35f;
+
+// rank statistics for a small collection of float values.
+class SimpleStats {
+ public:
+  SimpleStats() = default;
+  ~SimpleStats() = default;
+
+  void Clear() {
+    values_.clear();
+    finalized_ = false;
+  }
+
+  void Add(float value) {
+    values_.push_back(value);
+    finalized_ = false;
+  }
+
+  void Finish() {
+    std::sort(values_.begin(), values_.end());
+    finalized_ = true;
+  }
+
+  float ile(double frac) {
+    if (!finalized_) Finish();
+    if (values_.empty()) return 0.0f;
+    if (frac >= 1.0) return values_.back();
+    if (frac <= 0.0 || values_.size() == 1) return values_[0];
+    int index = static_cast<int>((values_.size() - 1) * frac);
+    float reminder = (values_.size() - 1) * frac - index;
+
+    return values_[index] * (1.0f - reminder) +
+        values_[index + 1] * reminder;
+  }
+
+  float median() {
+    return ile(0.5);
+  }
+
+  float minimum() {
+    if (!finalized_) Finish();
+    if (values_.empty()) return 0.0f;
+    return values_[0];
+  }
+
+  int size() const {
+    return values_.size();
+  }
+
+ private:
+  bool finalized_ = false;
+  std::vector<float> values_;
+};
+
+// statistics for a small collection of float pairs (x, y).
+// EstimateYFor(x, r) returns the estimated y at x, based on
+// existing samples between x*(1-r) ~ x*(1+r).
+class LocalCorrelation {
+ public:
+  struct float_pair {
+    float x, y;
+    int vote;
+  };
+
+  LocalCorrelation(): finalized_(false) { }
+  ~LocalCorrelation() { }
+
+  void Finish() {
+    values_.sort(float_pair_compare);
+    finalized_ = true;
+  }
+
+  void Clear() {
+    finalized_ = false;
+  }
+
+  void Add(float x, float y, int v) {
+    struct float_pair value;
+    value.x = x;
+    value.y = y;
+    value.vote = v;
+    values_.push_back(value);
+    finalized_ = false;
+  }
+
+  float EstimateYFor(float x, float r) {
+    ASSERT_HOST(finalized_);
+    int start = 0, end = values_.size();
+    // Because the number of samples (used_) is assumed to be small,
+    // just use linear search to find values within the range.
+    while (start < values_.size() && values_[start].x < x * (1.0 - r)) start++;
+    while (end - 1 >= 0 && values_[end - 1].x > x * (1.0 + r)) end--;
+
+    // Fall back to the global average if there are no data within r
+    // of x.
+    if (start >= end) {
+      start = 0;
+      end = values_.size();
+    }
+
+    // Compute weighted average of the values.
+    float rc = 0;
+    int vote = 0;
+    for (int i = start; i < end; i++) {
+      rc += values_[i].vote * x * values_[i].y / values_[i].x;
+      vote += values_[i].vote;
+    }
+
+    return rc / vote;
+  }
+
+ private:
+  static int float_pair_compare(const void* a, const void* b) {
+    const auto* f_a = static_cast<const float_pair*>(a);
+    const auto* f_b = static_cast<const float_pair*>(b);
+    return (f_a->x > f_b->x) ? 1 : ((f_a->x < f_b->x) ? -1 : 0);
+  }
+
+  bool finalized_;
+  GenericVector<struct float_pair> values_;
+};
+
+// Class to represent a character on a fixed pitch row.  A FPChar may
+// consist of multiple blobs (BLOBNBOX's).
+class FPChar {
+ public:
+  enum Alignment {
+    ALIGN_UNKNOWN, ALIGN_GOOD, ALIGN_BAD
+  };
+
+  FPChar(): box_(), real_body_(),
+            from_(nullptr), to_(nullptr), num_blobs_(0), max_gap_(0),
+            final_(false), alignment_(ALIGN_UNKNOWN),
+            merge_to_prev_(false), delete_flag_(false) {
+  }
+
+  // Initialize from blob.
+  void Init(BLOBNBOX *blob) {
+    box_ = blob->bounding_box();
+    real_body_ = box_;
+    from_ = to_ = blob;
+    num_blobs_ = 1;
+  }
+
+  // Merge this character with "next". The "next" character should
+  // consist of succeeding blobs on the same row.
+  void Merge(const FPChar &next) {
+    int gap = real_body_.x_gap(next.real_body_);
+    if (gap > max_gap_) max_gap_ = gap;
+
+    box_ += next.box_;
+    real_body_ += next.real_body_;
+    to_ = next.to_;
+    num_blobs_ += next.num_blobs_;
+  }
+
+  // Accessors.
+  const TBOX &box() const { return box_; }
+  void set_box(const TBOX &box) {
+    box_ = box;
+  }
+  const TBOX &real_body() const { return real_body_; }
+
+  bool is_final() const { return final_; }
+  void set_final(bool flag) {
+    final_ = flag;
+  }
+
+  const Alignment& alignment() const {
+    return alignment_;
+  }
+  void set_alignment(Alignment alignment) {
+    alignment_ = alignment;
+  }
+
+  bool merge_to_prev() const {
+    return merge_to_prev_;
+  }
+  void set_merge_to_prev(bool flag) {
+    merge_to_prev_ = flag;
+  }
+
+  bool delete_flag() const {
+    return delete_flag_;
+  }
+  void set_delete_flag(bool flag) {
+    delete_flag_ = flag;
+  }
+
+  int max_gap() const {
+    return max_gap_;
+  }
+
+  int num_blobs() const {
+    return num_blobs_;
+  }
+
+ private:
+  TBOX box_;  // Rectangle region considered to be occupied by this
+  // character.  It could be bigger than the bounding box.
+  TBOX real_body_;  // Real bounding box of this character.
+  BLOBNBOX *from_;  // The first blob of this character.
+  BLOBNBOX *to_;  // The last blob of this character.
+  int num_blobs_;  // Number of blobs that belong to this character.
+  int max_gap_;  // Maximum x gap between the blobs.
+
+  bool final_;  // True if alignment/fragmentation decision for this
+  // character is finalized.
+
+  Alignment alignment_;  // Alignment status.
+  bool merge_to_prev_;  // True if this is a fragmented blob that
+  // needs to be merged to the previous
+  // character.
+
+  int delete_flag_;  // True if this character is merged to another
+  // one and needs to be deleted.
+};
+
+// Class to represent a fixed pitch row, as a linear collection of
+// FPChar's.
+class FPRow {
+ public:
+  FPRow() : all_pitches_(), all_gaps_(), good_pitches_(), good_gaps_(),
+            heights_(), characters_() {
+  }
+
+  ~FPRow() { }
+
+  // Initialize from TD_ROW.
+  void Init(TO_ROW *row);
+
+  // Estimate character pitch of this row, based on current alignment
+  // status of underlying FPChar's.  The argument pass1 can be set to
+  // true if the function is called after Pass1Analyze(), to eliminate
+  // some redundant computation.
+  void EstimatePitch(bool pass1);
+
+  // Check each character if it has good character pitches between its
+  // predecessor and its successor and set its alignment status.  If
+  // we already calculated the estimated pitch for this row, the value
+  // is used.  If we didn't, a character is considered to be good, if
+  // the pitches between its predecessor and its successor are almost
+  // equal.
+  void Pass1Analyze();
+
+  // Find characters that fit nicely into one imaginary body next to a
+  // character which is already finalized. Then mark them as character
+  // fragments.
+  bool Pass2Analyze();
+
+  // Merge FPChars marked as character fragments into one.
+  void MergeFragments();
+
+  // Finalize characters that are already large enough and cannot be
+  // merged with others any more.
+  void FinalizeLargeChars();
+
+  // Output pitch estimation results to attributes of TD_ROW.
+  void OutputEstimations();
+
+  void DebugOutputResult(int row_index);
+
+  int good_pitches() {
+    return good_pitches_.size();
+  }
+
+  float pitch() {
+    return pitch_;
+  }
+
+  float estimated_pitch() {
+    return estimated_pitch_;
+  }
+
+  void set_estimated_pitch(float v) {
+    estimated_pitch_ = v;
+  }
+
+  float height() {
+    return height_;
+  }
+
+  float height_pitch_ratio() {
+    if (good_pitches_.size() < 2) return -1.0;
+    return height_ / good_pitches_.median();
+  }
+
+  float gap() {
+    return gap_;
+  }
+
+  size_t num_chars() {
+    return characters_.size();
+  }
+  FPChar *character(int i) {
+    return &characters_[i];
+  }
+
+  const TBOX &box(int i) {
+    return characters_[i].box();
+  }
+
+  const TBOX &real_body(int i) {
+    return characters_[i].real_body();
+  }
+
+  bool is_box_modified(int i) {
+    return !(characters_[i].box() == characters_[i].real_body());
+  }
+
+  float center_x(int i) {
+    return (characters_[i].box().left() + characters_[i].box().right()) / 2.0;
+  }
+
+  bool is_final(int i) {
+    return characters_[i].is_final();
+  }
+
+  void finalize(int i) {
+    characters_[i].set_final(true);
+  }
+
+  bool is_good(int i) {
+    return characters_[i].alignment() == FPChar::ALIGN_GOOD;
+  }
+
+  void mark_good(int i) {
+    characters_[i].set_alignment(FPChar::ALIGN_GOOD);
+  }
+
+  void mark_bad(int i) {
+    characters_[i].set_alignment(FPChar::ALIGN_BAD);
+  }
+
+  void clear_alignment(int i) {
+    characters_[i].set_alignment(FPChar::ALIGN_UNKNOWN);
+  }
+
+ private:
+  static float x_overlap_fraction(const TBOX& box1, const TBOX& box2) {
+    if (std::min(box1.width(), box2.width()) == 0) return 0.0;
+    return -box1.x_gap(box2) / static_cast<float>(std::min(box1.width(), box2.width()));
+  }
+
+  static bool mostly_overlap(const TBOX& box1, const TBOX& box2) {
+    return x_overlap_fraction(box1, box2) > 0.9;
+  }
+
+  static bool significant_overlap(const TBOX& box1, const TBOX& box2) {
+    if (std::min(box1.width(), box2.width()) == 0) return false;
+    int overlap = -box1.x_gap(box2);
+    return overlap > 1 || x_overlap_fraction(box1, box2) > 0.1;
+  }
+
+  static float box_pitch(const TBOX& ref, const TBOX& box) {
+    return abs(ref.left() + ref.right() - box.left() - box.right()) / 2.0;
+  }
+
+  // Check if two neighboring characters satisfy the fixed pitch model.
+  static bool is_good_pitch(float pitch, const TBOX& box1, const TBOX& box2) {
+    // Character box shouldn't exceed pitch.
+    if (box1.width() >= pitch * (1.0 + kFPTolerance) ||
+        box2.width() >= pitch * (1.0 + kFPTolerance) ||
+        box1.height() >= pitch * (1.0 + kFPTolerance) ||
+        box2.height() >= pitch * (1.0 + kFPTolerance)) return false;
+
+    const float real_pitch = box_pitch(box1, box2);
+    if (fabs(real_pitch - pitch) < pitch * kFPTolerance) return true;
+
+    if (textord_space_size_is_variable) {
+      // Hangul characters usually have fixed pitch, but words are
+      // delimited by space which can be narrower than characters.
+      if (real_pitch > pitch && real_pitch < pitch * 2.0 &&
+          real_pitch - box1.x_gap(box2) < pitch) {
+        return true;
+      }
+    }
+    return false;
+  }
+
+  static bool is_interesting_blob(const BLOBNBOX *blob) {
+    return !blob->joined_to_prev() && blob->flow() != BTFT_LEADER;
+  }
+
+  // Cleanup chars that are already merged to others.
+  void DeleteChars() {
+    int index = 0;
+    for (int i = 0; i < characters_.size(); ++i) {
+      if (!characters_[i].delete_flag()) {
+        if (index != i) characters_[index] = characters_[i];
+        index++;
+      }
+    }
+    characters_.truncate(index);
+  }
+
+  float pitch_ = 0.0f; // Character pitch.
+  float estimated_pitch_ = 0.0f; // equal to pitch_ if pitch_ is considered
+  // to be good enough.
+  float height_ = 0.0f; // Character height.
+  float gap_ = 0.0f; // Minimum gap between characters.
+
+  // Pitches between any two successive characters.
+  SimpleStats all_pitches_;
+  // Gaps between any two successive characters.
+  SimpleStats all_gaps_;
+  // Pitches between any two successive characters that are consistent
+  // with the fixed pitch model.
+  SimpleStats good_pitches_;
+  // Gaps between any two successive characters that are consistent
+  // with the fixed pitch model.
+  SimpleStats good_gaps_;
+
+  SimpleStats heights_;
+
+  GenericVector<FPChar> characters_;
+  TO_ROW *real_row_ = nullptr; // Underlying TD_ROW for this row.
+};
+
+void FPRow::Init(TO_ROW *row) {
+  ASSERT_HOST(row != nullptr);
+  ASSERT_HOST(row->xheight > 0);
+  real_row_ = row;
+  real_row_->pitch_decision = PITCH_CORR_PROP;  // Default decision.
+
+  BLOBNBOX_IT blob_it = row->blob_list();
+  // Initialize characters_ and compute the initial estimation of
+  // character height.
+  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+    if (is_interesting_blob(blob_it.data())) {
+      FPChar fp_char;
+      fp_char.Init(blob_it.data());
+      // Merge unconditionally if two blobs overlap.
+      if (!characters_.empty() &&
+          significant_overlap(fp_char.box(), characters_.back().box())) {
+        characters_.back().Merge(fp_char);
+      } else {
+        characters_.push_back(fp_char);
+      }
+      TBOX bound = blob_it.data()->bounding_box();
+      if (bound.height() * 3.0 > bound.width()) {
+        heights_.Add(bound.height());
+      }
+    }
+  }
+  heights_.Finish();
+  height_ = heights_.ile(0.875);
+}
+
+void FPRow::OutputEstimations() {
+  if (good_pitches_.size() == 0) {
+    pitch_ = 0.0f;
+    real_row_->pitch_decision = PITCH_CORR_PROP;
+    return;
+  }
+
+  pitch_ = good_pitches_.median();
+  real_row_->fixed_pitch = pitch_;
+  // good_gaps_.ile(0.125) can be large if most characters on the row
+  // are skinny. Use pitch_ - height_ instead if it's smaller, but
+  // positive.
+  real_row_->kern_size = real_row_->pr_nonsp =
+          std::min(good_gaps_.ile(0.125), std::max(pitch_ - height_, 0.0f));
+  real_row_->body_size = pitch_ - real_row_->kern_size;
+
+  if (good_pitches_.size() < all_pitches_.size() * kFixedPitchThreshold) {
+    // If more than half of the characters of a line don't fit to the
+    // fixed pitch model, consider the line to be proportional. 50%
+    // seems to be a good threshold in practice as well.
+    // Anyway we store estimated values (fixed_pitch, kern_size, etc.) in
+    // real_row_ as a partial estimation result and try to use them in the
+    // normalization process.
+    real_row_->pitch_decision = PITCH_CORR_PROP;
+    return;
+  } else if (good_pitches_.size() > all_pitches_.size() * 0.75) {
+    real_row_->pitch_decision = PITCH_DEF_FIXED;
+  } else {
+    real_row_->pitch_decision = PITCH_CORR_FIXED;
+  }
+
+  real_row_->space_size = real_row_->pr_space = pitch_;
+  // Set min_space to 50% of character pitch so that we can break CJK
+  // text at a half-width space after punctuation.
+  real_row_->min_space = (pitch_ + good_gaps_.minimum()) * 0.5;
+
+  // Don't consider a quarter space as a real space, because it's used
+  // for line justification in traditional Japanese books.
+  real_row_->max_nonspace = std::max(pitch_ * 0.25 + good_gaps_.minimum(),
+                                static_cast<double>(good_gaps_.ile(0.875)));
+
+  int space_threshold =
+          std::min((real_row_->max_nonspace + real_row_->min_space) / 2,
+                   static_cast<int>(real_row_->xheight));
+
+  // Make max_nonspace larger than any intra-character gap so that
+  // make_prop_words() won't break a row at the middle of a character.
+  for (size_t i = 0; i < num_chars(); ++i) {
+    if (characters_[i].max_gap() > real_row_->max_nonspace) {
+      real_row_->max_nonspace = characters_[i].max_gap();
+    }
+  }
+  real_row_->space_threshold =
+          std::min((real_row_->max_nonspace + real_row_->min_space) / 2,
+                   static_cast<int>(real_row_->xheight));
+  real_row_->used_dm_model = false;
+
+  // Setup char_cells.
+  ICOORDELT_IT cell_it = &real_row_->char_cells;
+  auto *cell = new ICOORDELT(real_body(0).left(), 0);
+  cell_it.add_after_then_move(cell);
+
+  int right = real_body(0).right();
+  for (size_t i = 1; i < num_chars(); ++i) {
+    // Put a word break if gap between two characters is bigger than
+    // space_threshold.  Don't break if none of two characters
+    // couldn't be "finalized", because maybe they need to be merged
+    // to one character.
+    if ((is_final(i - 1) || is_final(i)) &&
+        real_body(i - 1).x_gap(real_body(i)) > space_threshold) {
+      cell = new ICOORDELT(right + 1, 0);
+      cell_it.add_after_then_move(cell);
+      while (right + pitch_ < box(i).left()) {
+        right += pitch_;
+        cell = new ICOORDELT(right + 1, 0);
+        cell_it.add_after_then_move(cell);
+      }
+      right = box(i).left();
+    }
+    cell = new ICOORDELT((right + real_body(i).left()) / 2, 0);
+    cell_it.add_after_then_move(cell);
+    right = real_body(i).right();
+  }
+
+  cell = new ICOORDELT(right + 1, 0);
+  cell_it.add_after_then_move(cell);
+
+  // TODO(takenaka): add code to store alignment/fragmentation
+  // information to blobs so that it can be reused later, e.g. in
+  // recognition phase.
+}
+
+void FPRow::EstimatePitch(bool pass1) {
+  good_pitches_.Clear();
+  all_pitches_.Clear();
+  good_gaps_.Clear();
+  all_gaps_.Clear();
+  heights_.Clear();
+  if (num_chars() == 0) return;
+
+  int32_t cx0, cx1;
+  bool prev_was_good = is_good(0);
+  cx0 = center_x(0);
+
+  heights_.Add(box(0).height());
+  for (size_t i = 1; i < num_chars(); i++) {
+    cx1 = center_x(i);
+    int32_t pitch = cx1 - cx0;
+    int32_t gap = std::max(0, real_body(i - 1).x_gap(real_body(i)));
+
+    heights_.Add(box(i).height());
+    // Ignore if the pitch is too close.  But don't ignore wide pitch
+    // may be the result of large tracking.
+    if (pitch > height_ * 0.5) {
+      all_pitches_.Add(pitch);
+      all_gaps_.Add(gap);
+      if (is_good(i)) {
+        // In pass1 (after Pass1Analyze()), all characters marked as
+        // "good" have a good consistent pitch with their previous
+        // characters.  However, it's not true in pass2 and a good
+        // character may have a good pitch only between its successor.
+        // So we collect only pitch values between two good
+        // characters. and within tolerance in pass2.
+        if (pass1 || (prev_was_good &&
+                      fabs(estimated_pitch_ - pitch) <
+                          kFPTolerance * estimated_pitch_)) {
+          good_pitches_.Add(pitch);
+          if (!is_box_modified(i - 1) && !is_box_modified(i)) {
+            good_gaps_.Add(gap);
+          }
+        }
+        prev_was_good = true;
+      } else {
+        prev_was_good = false;
+      }
+    }
+    cx0 = cx1;
+  }
+
+  good_pitches_.Finish();
+  all_pitches_.Finish();
+  good_gaps_.Finish();
+  all_gaps_.Finish();
+  heights_.Finish();
+
+  height_ = heights_.ile(0.875);
+  if (all_pitches_.size() == 0) {
+    pitch_ = 0.0f;
+    gap_ = 0.0f;
+  } else if (good_pitches_.size() < 2) {
+    // We don't have enough data to estimate the pitch of this row yet.
+    // Use median of all pitches as the initial guess.
+    pitch_ = all_pitches_.median();
+    ASSERT_HOST(pitch_ > 0.0f);
+    gap_ = all_gaps_.ile(0.125);
+  } else {
+    pitch_ = good_pitches_.median();
+    ASSERT_HOST(pitch_ > 0.0f);
+    gap_ = good_gaps_.ile(0.125);
+  }
+}
+
+void FPRow::DebugOutputResult(int row_index) {
+  if (num_chars() > 0) {
+    tprintf("Row %d: pitch_decision=%d, fixed_pitch=%f, max_nonspace=%d, "
+            "space_size=%f, space_threshold=%d, xheight=%f\n",
+            row_index, static_cast<int>(real_row_->pitch_decision),
+            real_row_->fixed_pitch, real_row_->max_nonspace,
+            real_row_->space_size, real_row_->space_threshold,
+            real_row_->xheight);
+
+    for (unsigned i = 0; i < num_chars(); i++) {
+      tprintf("Char %u: is_final=%d is_good=%d num_blobs=%d: ",
+              i, is_final(i), is_good(i), character(i)->num_blobs());
+      box(i).print();
+    }
+  }
+}
+
+void FPRow::Pass1Analyze() {
+  if (num_chars() < 2) return;
+
+  if (estimated_pitch_ > 0.0f) {
+    for (size_t i = 2; i < num_chars(); i++) {
+      if (is_good_pitch(estimated_pitch_, box(i - 2), box(i-1)) &&
+          is_good_pitch(estimated_pitch_, box(i - 1), box(i))) {
+        mark_good(i - 1);
+      }
+    }
+  } else {
+    for (size_t i = 2; i < num_chars(); i++) {
+      if (is_good_pitch(box_pitch(box(i-2), box(i-1)), box(i - 1), box(i))) {
+        mark_good(i - 1);
+      }
+    }
+  }
+  character(0)->set_alignment(character(1)->alignment());
+  character(num_chars() - 1)->set_alignment(
+      character(num_chars() - 2)->alignment());
+}
+
+bool FPRow::Pass2Analyze() {
+  bool changed = false;
+  if (num_chars() <= 1 || estimated_pitch_ == 0.0f) {
+    return false;
+  }
+  for (size_t i = 0; i < num_chars(); i++) {
+    if (is_final(i)) continue;
+
+    FPChar::Alignment alignment = character(i)->alignment();
+    bool intersecting = false;
+    bool not_intersecting = false;
+
+    if (i < num_chars() - 1 && is_final(i + 1)) {
+      // Next character is already finalized. Estimate the imaginary
+      // body including this character based on the character. Skip
+      // whitespace if necessary.
+      bool skipped_whitespaces = false;
+      float c1 = center_x(i + 1)  - 1.5 * estimated_pitch_;
+      while (c1 > box(i).right()) {
+        skipped_whitespaces = true;
+        c1 -= estimated_pitch_;
+      }
+      TBOX ibody(c1, box(i).bottom(), c1 + estimated_pitch_, box(i).top());
+
+      // Collect all characters that mostly fit in the region.
+      // Also, their union height shouldn't be too big.
+      int j = i;
+      TBOX merged;
+      while (j >= 0 && !is_final(j) && mostly_overlap(ibody, box(j)) &&
+             merged.bounding_union(box(j)).height() <
+             estimated_pitch_ * (1 + kFPTolerance)) {
+        merged += box(j);
+        j--;
+      }
+
+      if (j >= 0 && significant_overlap(ibody, box(j))) {
+        // character(j) lies on the character boundary and doesn't fit
+        // well into the imaginary body.
+        if (!is_final(j)) intersecting = true;
+      } else {
+        not_intersecting = true;
+        if (i - j > 0) {
+          // Merge character(j+1) ... character(i) because they fit
+          // into the body nicely.
+          if (i - j == 1) {
+            // Only one char in the imaginary body.
+            if (!skipped_whitespaces) mark_good(i);
+            // set ibody as bounding box of this character to get
+            // better pitch analysis result for halfwidth glyphs
+            // followed by a halfwidth space.
+            if (box(i).width() <= estimated_pitch_ * 0.5) {
+              ibody += box(i);
+              character(i)->set_box(ibody);
+            }
+            character(i)->set_merge_to_prev(false);
+            finalize(i);
+          } else {
+            for (int k = i; k > j + 1; k--) {
+              character(k)->set_merge_to_prev(true);
+            }
+          }
+        }
+      }
+    }
+    if (i > 0 && is_final(i - 1)) {
+      // Now we repeat everything from the opposite side.  Previous
+      // character is already finalized. Estimate the imaginary body
+      // including this character based on the character.
+      bool skipped_whitespaces = false;
+      float c1 = center_x(i - 1) + 1.5 * estimated_pitch_;
+      while (c1 < box(i).left()) {
+        skipped_whitespaces = true;
+        c1 += estimated_pitch_;
+      }
+      TBOX ibody(c1 - estimated_pitch_, box(i).bottom(), c1, box(i).top());
+
+      size_t j = i;
+      TBOX merged;
+      while (j < num_chars() && !is_final(j) && mostly_overlap(ibody, box(j)) &&
+             merged.bounding_union(box(j)).height() <
+             estimated_pitch_ * (1 + kFPTolerance)) {
+        merged += box(j);
+        j++;
+      }
+
+      if (j < num_chars() && significant_overlap(ibody, box(j))) {
+        if (!is_final(j)) intersecting = true;
+      } else {
+        not_intersecting = true;
+        if (j - i > 0) {
+          if (j - i == 1) {
+            if (!skipped_whitespaces) mark_good(i);
+            if (box(i).width() <= estimated_pitch_ * 0.5) {
+              ibody += box(i);
+              character(i)->set_box(ibody);
+            }
+            character(i)->set_merge_to_prev(false);
+            finalize(i);
+          } else {
+            for (size_t k = i + 1; k < j; k++) {
+              character(k)->set_merge_to_prev(true);
+            }
+          }
+        }
+      }
+    }
+
+    // This character doesn't fit well into the estimated imaginary
+    // bodies. Mark it as bad.
+    if (intersecting && !not_intersecting) mark_bad(i);
+    if (character(i)->alignment() != alignment ||
+        character(i)->merge_to_prev()) {
+      changed = true;
+    }
+  }
+
+  return changed;
+}
+
+void FPRow::MergeFragments() {
+  int last_char = 0;
+
+  for (size_t j = 0; j < num_chars(); ++j) {
+    if (character(j)->merge_to_prev()) {
+      character(last_char)->Merge(*character(j));
+      character(j)->set_delete_flag(true);
+      clear_alignment(last_char);
+      character(j-1)->set_merge_to_prev(false);
+    } else {
+      last_char = j;
+    }
+  }
+  DeleteChars();
+}
+
+void FPRow::FinalizeLargeChars() {
+  float row_pitch = estimated_pitch();
+  for (size_t i = 0; i < num_chars(); i++) {
+    if (is_final(i)) continue;
+
+    // Finalize if both neighbors are finalized. We have no other choice.
+    if (i > 0 && is_final(i - 1) && i < num_chars() - 1 && is_final(i + 1)) {
+      finalize(i);
+      continue;
+    }
+
+    float cx = center_x(i);
+    TBOX ibody(cx - 0.5 * row_pitch, 0, cx + 0.5 * row_pitch, 1);
+    if (i > 0) {
+      // The preceding character significantly intersects with the
+      // imaginary body of this character. Let Pass2Analyze() handle
+      // this case.
+      if (x_overlap_fraction(ibody, box(i - 1)) > 0.1) continue;
+      if (!is_final(i - 1)) {
+        TBOX merged = box(i);
+        merged += box(i - 1);
+        if (merged.width() < row_pitch) continue;
+        // This character cannot be finalized yet because it can be
+        // merged with the previous one.  Again, let Pass2Analyze()
+        // handle this case.
+      }
+    }
+    if (i < num_chars() - 1) {
+      if (x_overlap_fraction(ibody, box(i + 1)) > 0.1) continue;
+      if (!is_final(i + 1)) {
+        TBOX merged = box(i);
+        merged += box(i + 1);
+        if (merged.width() < row_pitch) continue;
+      }
+    }
+    finalize(i);
+  }
+
+  // Update alignment decision.  We only consider finalized characters
+  // in pass2.  E.g. if a finalized character C has another finalized
+  // character L on its left and a not-finalized character R on its
+  // right, we mark C as good if the pitch between C and L is good,
+  // regardless of the pitch between C and R.
+  for (size_t i = 0; i < num_chars(); i++) {
+    if (!is_final(i)) continue;
+    bool good_pitch = false;
+    bool bad_pitch = false;
+    if (i > 0 && is_final(i - 1)) {
+      if (is_good_pitch(row_pitch, box(i - 1), box(i))) {
+        good_pitch = true;
+      } else {
+        bad_pitch = true;
+      }
+    }
+    if (i < num_chars() - 1 && is_final(i + 1)) {
+      if (is_good_pitch(row_pitch, box(i), box(i + 1))) {
+        good_pitch = true;
+      } else {
+        bad_pitch = true;
+      }
+    }
+    if (good_pitch && !bad_pitch) mark_good(i);
+    else if (!good_pitch && bad_pitch) mark_bad(i);
+  }
+}
+
+class FPAnalyzer {
+ public:
+  FPAnalyzer(ICOORD page_tr, TO_BLOCK_LIST *port_blocks);
+  ~FPAnalyzer() { }
+
+  void Pass1Analyze() {
+    for (auto & row : rows_) row.Pass1Analyze();
+  }
+
+  // Estimate character pitch for each row.  The argument pass1 can be
+  // set to true if the function is called after Pass1Analyze(), to
+  // eliminate some redundant computation.
+  void EstimatePitch(bool pass1);
+
+  bool maybe_fixed_pitch() {
+    if (rows_.empty() ||
+        rows_.size() <= num_bad_rows_ + num_tall_rows_ + 1) return false;
+    return true;
+  }
+
+  void MergeFragments() {
+    for (auto & row : rows_) row.MergeFragments();
+  }
+
+  void FinalizeLargeChars() {
+    for (auto & row : rows_) row.FinalizeLargeChars();
+  }
+
+  bool Pass2Analyze() {
+    bool changed = false;
+    for (auto & row : rows_) {
+      if (row.Pass2Analyze()) {
+        changed = true;
+      }
+    }
+    return changed;
+  }
+
+  void OutputEstimations() {
+    for (auto & row : rows_) row.OutputEstimations();
+    // Don't we need page-level estimation of gaps/spaces?
+  }
+
+  void DebugOutputResult() {
+    tprintf("FPAnalyzer: final result\n");
+    for (size_t i = 0; i < rows_.size(); i++) rows_[i].DebugOutputResult(i);
+  }
+
+  size_t num_rows() {
+    return rows_.size();
+  }
+
+  // Returns the upper limit for pass2 loop iteration.
+  unsigned max_iteration() {
+    // We're fixing at least one character per iteration. So basically
+    // we shouldn't require more than max_chars_per_row_ iterations.
+    return max_chars_per_row_ + 100;
+  }
+
+ private:
+  ICOORD page_tr_;
+  std::vector<FPRow> rows_;
+  unsigned num_tall_rows_;
+  unsigned num_bad_rows_;
+  // TODO: num_empty_rows_ is incremented, but never used otherwise.
+  unsigned num_empty_rows_;
+  unsigned max_chars_per_row_;
+};
+
+FPAnalyzer::FPAnalyzer(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
+: page_tr_(page_tr),
+  num_tall_rows_(0),
+  num_bad_rows_(0),
+  num_empty_rows_(0),
+  max_chars_per_row_(0)
+{
+  TO_BLOCK_IT block_it(port_blocks);
+
+  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
+       block_it.forward()) {
+    TO_BLOCK *block = block_it.data();
+    if (!block->get_rows()->empty()) {
+      ASSERT_HOST(block->xheight > 0);
+      find_repeated_chars(block, false);
+    }
+  }
+
+  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
+       block_it.forward()) {
+    TO_ROW_IT row_it = block_it.data()->get_rows();
+    for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+      FPRow row;
+      row.Init(row_it.data());
+      rows_.push_back(row);
+      size_t num_chars = rows_.back().num_chars();
+      if (num_chars <= 1) num_empty_rows_++;
+      if (num_chars > max_chars_per_row_) max_chars_per_row_ = num_chars;
+    }
+  }
+}
+
+void FPAnalyzer::EstimatePitch(bool pass1) {
+  LocalCorrelation pitch_height_stats;
+
+  num_tall_rows_ = 0;
+  num_bad_rows_ = 0;
+  pitch_height_stats.Clear();
+  for (auto & row : rows_) {
+    row.EstimatePitch(pass1);
+    if (row.good_pitches()) {
+      pitch_height_stats.Add(row.height() + row.gap(),
+                             row.pitch(), row.good_pitches());
+      if (row.height_pitch_ratio() > 1.1) num_tall_rows_++;
+    } else {
+      num_bad_rows_++;
+    }
+  }
+
+  pitch_height_stats.Finish();
+  for (auto & row : rows_) {
+    if (row.good_pitches() >= 5) {
+      // We have enough evidences. Just use the pitch estimation
+      // from this row.
+      row.set_estimated_pitch(row.pitch());
+    } else if (row.num_chars() > 1) {
+      float estimated_pitch =
+          pitch_height_stats.EstimateYFor(row.height() + row.gap(),
+                                          0.1f);
+      // CJK characters are more likely to be fragmented than poorly
+      // chopped. So trust the page-level estimation of character
+      // pitch only if it's larger than row-level estimation or
+      // row-level estimation is too large (2x bigger than row height).
+      if (estimated_pitch > row.pitch() ||
+          row.pitch() > row.height() * 2.0) {
+        row.set_estimated_pitch(estimated_pitch);
+      } else {
+        row.set_estimated_pitch(row.pitch());
+      }
+    }
+  }
+}
+
+void compute_fixed_pitch_cjk(ICOORD page_tr,
+                             TO_BLOCK_LIST *port_blocks) {
+  FPAnalyzer analyzer(page_tr, port_blocks);
+  if (analyzer.num_rows() == 0) return;
+
+  analyzer.Pass1Analyze();
+  analyzer.EstimatePitch(true);
+
+  // Perform pass1 analysis again with the initial estimation of row
+  // pitches, for better estimation.
+  analyzer.Pass1Analyze();
+  analyzer.EstimatePitch(true);
+
+  // Early exit if the page doesn't seem to contain fixed pitch rows.
+  if (!analyzer.maybe_fixed_pitch()) {
+    if (textord_debug_pitch_test) {
+      tprintf("Page doesn't seem to contain fixed pitch rows\n");
+    }
+    return;
+  }
+
+  unsigned iteration = 0;
+  do {
+    analyzer.MergeFragments();
+    analyzer.FinalizeLargeChars();
+    analyzer.EstimatePitch(false);
+    iteration++;
+  } while (analyzer.Pass2Analyze() && iteration < analyzer.max_iteration());
+
+  if (textord_debug_pitch_test) {
+    tprintf("compute_fixed_pitch_cjk finished after %u iteration (limit=%u)\n",
+            iteration, analyzer.max_iteration());
+  }
+
+  analyzer.OutputEstimations();
+  if (textord_debug_pitch_test) analyzer.DebugOutputResult();
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/textord/cjkpitch.h b/tesseract/src/textord/cjkpitch.h
new file mode 100644
index 00000000..d42ab79f
--- /dev/null
+++ b/tesseract/src/textord/cjkpitch.h
@@ -0,0 +1,75 @@
+///////////////////////////////////////////////////////////////////////
+// File:        cjkpitch.h
+// Description: Code to determine fixed pitchness and the pitch if fixed,
+//              for CJK text.
+// Copyright 2011 Google Inc. All Rights Reserved.
+// Author: takenaka@google.com (Hiroshi Takenaka)
+// Created:     Mon Jun 27 12:48:35 JST 2011
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+#ifndef CJKPITCH_H_
+#define CJKPITCH_H_
+
+#include          "blobbox.h"
+
+namespace tesseract {
+
+// Function to test "fixed-pitchness" of the input text and estimating
+// character pitch parameters for it, based on CJK fixed-pitch layout
+// model.
+//
+// This function assumes that a fixed-pitch CJK text has following
+// characteristics:
+//
+// - Most glyphs are designed to fit within the same sized square
+//   (imaginary body). Also they are aligned to the center of their
+//   imaginary bodies.
+// - The imaginary body is always a regular rectangle.
+// - There may be some extra space between character bodies
+//   (tracking).
+// - There may be some extra space after punctuations.
+// - The text is *not* space-delimited. Thus spaces are rare.
+// - Character may consists of multiple unconnected blobs.
+//
+// And the function works in two passes.  On pass 1, it looks for such
+// "good" blobs that has the pitch same pitch on the both side and
+// looks like a complete CJK character. Then estimates the character
+// pitch for every row, based on those good blobs. If we couldn't find
+// enough good blobs for a row, then the pitch is estimated from other
+// rows with similar character height instead.
+//
+// Pass 2 is an iterative process to fit the blobs into fixed-pitch
+// character cells. Once we have estimated the character pitch, blobs
+// that are almost as large as the pitch can be considered to be
+// complete characters. And once we know that some characters are
+// complete characters, we can estimate the region occupied by its
+// neighbors. And so on.
+//
+// We repeat the process until all ambiguities are resolved. Then make
+// the final decision about fixed-pitchness of each row and compute
+// pitch and spacing parameters.
+//
+// (If a row is considered to be proportional, pitch_decision for the
+// row is set to PITCH_CORR_PROP and the later phase
+// (i.e. Textord::to_spacing()) should determine its spacing
+// parameters)
+//
+// This function doesn't provide all information required by
+// fixed_pitch_words() and the rows need to be processed with
+// make_prop_words() even if they are fixed pitched.
+void compute_fixed_pitch_cjk(ICOORD page_tr,               // top right
+                             TO_BLOCK_LIST *port_blocks);  // input list
+
+} // namespace tesseract
+
+#endif  // CJKPITCH_H_
diff --git a/tesseract/src/textord/colfind.cpp b/tesseract/src/textord/colfind.cpp
new file mode 100644
index 00000000..e305a2c3
--- /dev/null
+++ b/tesseract/src/textord/colfind.cpp
@@ -0,0 +1,1642 @@
+///////////////////////////////////////////////////////////////////////
+// File:        colfind.cpp
+// Description: Class to hold BLOBNBOXs in a grid for fast access
+//              to neighbours.
+// Author:      Ray Smith
+//
+// (C) Copyright 2007, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+// Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "colfind.h"
+
+#include "ccnontextdetect.h"
+#include "colpartition.h"
+#include "colpartitionset.h"
+#ifndef DISABLED_LEGACY_ENGINE
+#include "equationdetectbase.h"
+#endif
+#include "linefind.h"
+#include "normalis.h"
+#include "strokewidth.h"
+#include "blobbox.h"
+#include "scrollview.h"
+#include "tablefind.h"
+#include "params.h"
+#include "workingpartset.h"
+
+#include <algorithm>
+
+namespace tesseract {
+
+// When assigning columns, the max number of misfit grid rows/ColPartitionSets
+// that can be ignored.
+const int kMaxIncompatibleColumnCount = 2;
+// Max fraction of mean_column_gap_ for the gap between two partitions within a
+// column to allow them to merge.
+const double kHorizontalGapMergeFraction = 0.5;
+// Minimum gutter width as a fraction of gridsize
+const double kMinGutterWidthGrid = 0.5;
+// Max multiple of a partition's median size as a distance threshold for
+// adding noise blobs.
+const double kMaxDistToPartSizeRatio = 1.5;
+
+#ifndef GRAPHICS_DISABLED
+static BOOL_VAR(textord_tabfind_show_initial_partitions,
+                false, "Show partition bounds");
+static BOOL_VAR(textord_tabfind_show_reject_blobs,
+                false, "Show blobs rejected as noise");
+static INT_VAR(textord_tabfind_show_partitions, 0,
+              "Show partition bounds, waiting if >1 (ScrollView)");
+static BOOL_VAR(textord_tabfind_show_columns, false, "Show column bounds (ScrollView)");
+static BOOL_VAR(textord_tabfind_show_blocks, false, "Show final block bounds (ScrollView)");
+#endif
+static BOOL_VAR(textord_tabfind_find_tables, true, "run table detection");
+
+#ifndef GRAPHICS_DISABLED
+ScrollView* ColumnFinder::blocks_win_ = nullptr;
+#endif
+
+// Gridsize is an estimate of the text size in the image. A suitable value
+// is in TO_BLOCK::line_size after find_components has been used to make
+// the blobs.
+// bleft and tright are the bounds of the image (or rectangle) being processed.
+// vlines is a (possibly empty) list of TabVector and vertical_x and y are
+// the sum logical vertical vector produced by LineFinder::FindVerticalLines.
+ColumnFinder::ColumnFinder(int gridsize,
+                           const ICOORD& bleft, const ICOORD& tright,
+                           int resolution, bool cjk_script,
+                           double aligned_gap_fraction,
+                           TabVector_LIST* vlines, TabVector_LIST* hlines,
+                           int vertical_x, int vertical_y)
+  : TabFind(gridsize, bleft, tright, vlines, vertical_x, vertical_y,
+            resolution),
+    cjk_script_(cjk_script),
+    min_gutter_width_(static_cast<int>(kMinGutterWidthGrid * gridsize)),
+    mean_column_gap_(tright.x() - bleft.x()),
+    tabfind_aligned_gap_fraction_(aligned_gap_fraction),
+    deskew_(0.0f, 0.0f),
+    reskew_(1.0f, 0.0f), rotation_(1.0f, 0.0f), rerotate_(1.0f, 0.0f),
+    text_rotation_(0.0f, 0.0f),
+    best_columns_(nullptr), stroke_width_(nullptr),
+    part_grid_(gridsize, bleft, tright), nontext_map_(nullptr),
+    projection_(resolution),
+    denorm_(nullptr), input_blobs_win_(nullptr), equation_detect_(nullptr) {
+  TabVector_IT h_it(&horizontal_lines_);
+  h_it.add_list_after(hlines);
+}
+
+ColumnFinder::~ColumnFinder() {
+  column_sets_.delete_data_pointers();
+  delete [] best_columns_;
+  delete stroke_width_;
+  delete input_blobs_win_;
+  pixDestroy(&nontext_map_);
+  while (denorm_ != nullptr) {
+    DENORM* dead_denorm = denorm_;
+    denorm_ = const_cast<DENORM*>(denorm_->predecessor());
+    delete dead_denorm;
+  }
+
+  // The ColPartitions are destroyed automatically, but any boxes in
+  // the noise_parts_ list are owned and need to be deleted explicitly.
+  ColPartition_IT part_it(&noise_parts_);
+  for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) {
+    ColPartition* part = part_it.data();
+    part->DeleteBoxes();
+  }
+  // Likewise any boxes in the good_parts_ list need to be deleted.
+  // These are just the image parts. Text parts have already given their
+  // boxes on to the TO_BLOCK, and have empty lists.
+  part_it.set_to_list(&good_parts_);
+  for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) {
+    ColPartition* part = part_it.data();
+    part->DeleteBoxes();
+  }
+  // Also, any blobs on the image_bblobs_ list need to have their cblobs
+  // deleted. This only happens if there has been an early return from
+  // FindColumns, as in a normal return, the blobs go into the grid and
+  // end up in noise_parts_, good_parts_ or the output blocks.
+  BLOBNBOX_IT bb_it(&image_bblobs_);
+  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
+    BLOBNBOX* bblob = bb_it.data();
+    delete bblob->cblob();
+  }
+}
+
+// Performs initial processing on the blobs in the input_block:
+// Setup the part_grid, stroke_width_, nontext_map.
+// Obvious noise blobs are filtered out and used to mark the nontext_map_.
+// Initial stroke-width analysis is used to get local text alignment
+// direction, so the textline projection_ map can be setup.
+// On return, IsVerticallyAlignedText may be called (now optionally) to
+// determine the gross textline alignment of the page.
+void ColumnFinder::SetupAndFilterNoise(PageSegMode pageseg_mode,
+                                       Pix* photo_mask_pix,
+                                       TO_BLOCK* input_block) {
+  part_grid_.Init(gridsize(), bleft(), tright());
+  delete stroke_width_;
+  stroke_width_ = new StrokeWidth(gridsize(), bleft(), tright());
+  min_gutter_width_ = static_cast<int>(kMinGutterWidthGrid * gridsize());
+  input_block->ReSetAndReFilterBlobs();
+  #ifndef GRAPHICS_DISABLED
+  if (textord_tabfind_show_blocks) {
+    input_blobs_win_ = MakeWindow(0, 0, "Filtered Input Blobs");
+    input_block->plot_graded_blobs(input_blobs_win_);
+  }
+  #endif // !GRAPHICS_DISABLED
+  SetBlockRuleEdges(input_block);
+  pixDestroy(&nontext_map_);
+  // Run a preliminary strokewidth neighbour detection on the medium blobs.
+  stroke_width_->SetNeighboursOnMediumBlobs(input_block);
+  CCNonTextDetect nontext_detect(gridsize(), bleft(), tright());
+  // Remove obvious noise and make the initial non-text map.
+  nontext_map_ = nontext_detect.ComputeNonTextMask(textord_debug_tabfind,
+                                                   photo_mask_pix, input_block);
+  stroke_width_->FindTextlineDirectionAndFixBrokenCJK(pageseg_mode, cjk_script_,
+                                                      input_block);
+  // Clear the strokewidth grid ready for rotation or leader finding.
+  stroke_width_->Clear();
+}
+
+// Tests for vertical alignment of text (returning true if so), and generates
+// a list of blobs of moderate aspect ratio, in the most frequent writing
+// direction (in osd_blobs) for orientation and script detection to test
+// the character orientation.
+// block is the single block for the whole page or rectangle to be OCRed.
+// Note that the vertical alignment may be due to text whose writing direction
+// is vertical, like say Japanese, or due to text whose writing direction is
+// horizontal but whose text appears vertically aligned because the image is
+// not the right way up.
+bool ColumnFinder::IsVerticallyAlignedText(double find_vertical_text_ratio,
+                                           TO_BLOCK* block,
+                                           BLOBNBOX_CLIST* osd_blobs) {
+  return stroke_width_->TestVerticalTextDirection(find_vertical_text_ratio,
+                                                  block, osd_blobs);
+}
+
+// Rotates the blobs and the TabVectors so that the gross writing direction
+// (text lines) are horizontal and lines are read down the page.
+// Applied rotation stored in rotation_.
+// A second rotation is calculated for application during recognition to
+// make the rotated blobs upright for recognition.
+// Subsequent rotation stored in text_rotation_.
+//
+// Arguments:
+//   vertical_text_lines true if the text lines are vertical.
+//   recognition_rotation [0..3] is the number of anti-clockwise 90 degree
+//   rotations from osd required for the text to be upright and readable.
+void ColumnFinder::CorrectOrientation(TO_BLOCK* block,
+                                      bool vertical_text_lines,
+                                      int recognition_rotation) {
+  const FCOORD anticlockwise90(0.0f, 1.0f);
+  const FCOORD clockwise90(0.0f, -1.0f);
+  const FCOORD rotation180(-1.0f, 0.0f);
+  const FCOORD norotation(1.0f, 0.0f);
+
+  text_rotation_ = norotation;
+  // Rotate the page to make the text upright, as implied by
+  // recognition_rotation.
+  rotation_ = norotation;
+  if (recognition_rotation == 1) {
+    rotation_ = anticlockwise90;
+  } else if (recognition_rotation == 2) {
+    rotation_ = rotation180;
+  } else if (recognition_rotation == 3) {
+    rotation_ = clockwise90;
+  }
+  // We infer text writing direction to be vertical if there are several
+  // vertical text lines detected, and horizontal if not. But if the page
+  // orientation was determined to be 90 or 270 degrees, the true writing
+  // direction is the opposite of what we inferred.
+  if (recognition_rotation & 1) {
+    vertical_text_lines = !vertical_text_lines;
+  }
+  // If we still believe the writing direction is vertical, we use the
+  // convention of rotating the page ccw 90 degrees to make the text lines
+  // horizontal, and mark the blobs for rotation cw 90 degrees for
+  // classification so that the text order is correct after recognition.
+  if (vertical_text_lines) {
+    rotation_.rotate(anticlockwise90);
+    text_rotation_.rotate(clockwise90);
+  }
+  // Set rerotate_ to the inverse of rotation_.
+  rerotate_ = FCOORD(rotation_.x(), -rotation_.y());
+  if (rotation_.x() != 1.0f || rotation_.y() != 0.0f) {
+    // Rotate all the blobs and tab vectors.
+    RotateBlobList(rotation_, &block->large_blobs);
+    RotateBlobList(rotation_, &block->blobs);
+    RotateBlobList(rotation_, &block->small_blobs);
+    RotateBlobList(rotation_, &block->noise_blobs);
+    TabFind::ResetForVerticalText(rotation_, rerotate_, &horizontal_lines_,
+                                  &min_gutter_width_);
+    part_grid_.Init(gridsize(), bleft(), tright());
+    // Reset all blobs to initial state and filter by size.
+    // Since they have rotated, the list they belong on could have changed.
+    block->ReSetAndReFilterBlobs();
+    SetBlockRuleEdges(block);
+    stroke_width_->CorrectForRotation(rerotate_, &part_grid_);
+  }
+  if (textord_debug_tabfind) {
+    tprintf("Vertical=%d, orientation=%d, final rotation=(%f, %f)+(%f,%f)\n",
+            vertical_text_lines, recognition_rotation,
+            rotation_.x(), rotation_.y(),
+            text_rotation_.x(), text_rotation_.y());
+  }
+  // Setup the denormalization.
+  ASSERT_HOST(denorm_ == nullptr);
+  denorm_ = new DENORM;
+  denorm_->SetupNormalization(nullptr, &rotation_, nullptr,
+                              0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f);
+}
+
+// Finds blocks of text, image, rule line, table etc, returning them in the
+// blocks and to_blocks
+// (Each TO_BLOCK points to the basic BLOCK and adds more information.)
+// Image blocks are generated by a combination of photo_mask_pix (which may
+// NOT be nullptr) and the rejected text found during preliminary textline
+// finding.
+// The input_block is the result of a call to find_components, and contains
+// the blobs found in the image or rectangle to be OCRed. These blobs will be
+// removed and placed in the output blocks, while unused ones will be deleted.
+// If single_column is true, the input is treated as single column, but
+// it is still divided into blocks of equal line spacing/text size.
+// scaled_color is scaled down by scaled_factor from the input color image,
+// and may be nullptr if the input was not color.
+// grey_pix is optional, but if present must match the photo_mask_pix in size,
+// and must be a *real* grey image instead of binary_pix * 255.
+// thresholds_pix is expected to be present iff grey_pix is present and
+// can be an integer factor reduction of the grey_pix. It represents the
+// thresholds that were used to create the binary_pix from the grey_pix.
+// If diacritic_blobs is non-null, then diacritics/noise blobs, that would
+// confuse layout analysis by causing textline overlap, are placed there,
+// with the expectation that they will be reassigned to words later and
+// noise/diacriticness determined via classification.
+// Returns -1 if the user hits the 'd' key in the blocks window while running
+// in debug mode, which requests a retry with more debug info.
+int ColumnFinder::FindBlocks(PageSegMode pageseg_mode, Pix* scaled_color,
+                             int scaled_factor, TO_BLOCK* input_block,
+                             Pix* photo_mask_pix, Pix* thresholds_pix,
+                             Pix* grey_pix, DebugPixa* pixa_debug,
+                             BLOCK_LIST* blocks, BLOBNBOX_LIST* diacritic_blobs,
+                             TO_BLOCK_LIST* to_blocks) {
+  pixOr(photo_mask_pix, photo_mask_pix, nontext_map_);
+  stroke_width_->FindLeaderPartitions(input_block, &part_grid_);
+  stroke_width_->RemoveLineResidue(&big_parts_);
+  FindInitialTabVectors(nullptr, min_gutter_width_, tabfind_aligned_gap_fraction_,
+                        input_block);
+  SetBlockRuleEdges(input_block);
+  stroke_width_->GradeBlobsIntoPartitions(
+      pageseg_mode, rerotate_, input_block, nontext_map_, denorm_, cjk_script_,
+      &projection_, diacritic_blobs, &part_grid_, &big_parts_);
+  if (!PSM_SPARSE(pageseg_mode)) {
+    ImageFind::FindImagePartitions(photo_mask_pix, rotation_, rerotate_,
+                                   input_block, this, pixa_debug, &part_grid_,
+                                   &big_parts_);
+    ImageFind::TransferImagePartsToImageMask(rerotate_, &part_grid_,
+                                             photo_mask_pix);
+    ImageFind::FindImagePartitions(photo_mask_pix, rotation_, rerotate_,
+                                   input_block, this, pixa_debug, &part_grid_,
+                                   &big_parts_);
+  }
+  part_grid_.ReTypeBlobs(&image_bblobs_);
+  TidyBlobs(input_block);
+  Reset();
+  // TODO(rays) need to properly handle big_parts_.
+  ColPartition_IT p_it(&big_parts_);
+  for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward())
+    p_it.data()->DisownBoxesNoAssert();
+  big_parts_.clear();
+  delete stroke_width_;
+  stroke_width_ = nullptr;
+  // Compute the edge offsets whether or not there is a grey_pix. It is done
+  // here as the c_blobs haven't been touched by rotation or anything yet,
+  // so no denorm is required, yet the text has been separated from image, so
+  // no time is wasted running it on image blobs.
+  input_block->ComputeEdgeOffsets(thresholds_pix, grey_pix);
+
+  // A note about handling right-to-left scripts (Hebrew/Arabic):
+  // The columns must be reversed and come out in right-to-left instead of
+  // the normal left-to-right order. Because the left-to-right ordering
+  // is implicit in many data structures, it is simpler to fool the algorithms
+  // into thinking they are dealing with left-to-right text.
+  // To do this, we reflect the needed data in the y-axis and then reflect
+  // the blocks back after they have been created. This is a temporary
+  // arrangement that is confined to this function only, so the reflection
+  // is completely invisible in the output blocks.
+  // The only objects reflected are:
+  // The vertical separator lines that have already been found;
+  // The bounding boxes of all BLOBNBOXES on all lists on the input_block
+  // plus the image_bblobs. The outlines are not touched, since they are
+  // not looked at.
+  bool input_is_rtl = input_block->block->right_to_left();
+  if (input_is_rtl) {
+    // Reflect the vertical separator lines (member of TabFind).
+    ReflectInYAxis();
+    // Reflect the blob boxes.
+    ReflectForRtl(input_block, &image_bblobs_);
+    part_grid_.ReflectInYAxis();
+  }
+
+  if (!PSM_SPARSE(pageseg_mode)) {
+    if (!PSM_COL_FIND_ENABLED(pageseg_mode)) {
+      // No tab stops needed. Just the grid that FindTabVectors makes.
+      DontFindTabVectors(&image_bblobs_, input_block, &deskew_, &reskew_);
+    } else {
+      SetBlockRuleEdges(input_block);
+      // Find the tab stops, estimate skew, and deskew the tabs, blobs and
+      // part_grid_.
+      FindTabVectors(&horizontal_lines_, &image_bblobs_, input_block,
+                     min_gutter_width_, tabfind_aligned_gap_fraction_,
+                     &part_grid_, &deskew_, &reskew_);
+      // Add the deskew to the denorm_.
+      auto* new_denorm = new DENORM;
+      new_denorm->SetupNormalization(nullptr, &deskew_, denorm_,
+                                     0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f);
+      denorm_ = new_denorm;
+    }
+    SetBlockRuleEdges(input_block);
+    part_grid_.SetTabStops(this);
+
+    // Make the column_sets_.
+    if (!MakeColumns(false)) {
+      tprintf("Empty page!!\n");
+      part_grid_.DeleteParts();
+      return 0;  // This is an empty page.
+    }
+
+    // Refill the grid using rectangular spreading, and get the benefit
+    // of the completed tab vectors marking the rule edges of each blob.
+    Clear();
+    #ifndef GRAPHICS_DISABLED
+    if (textord_tabfind_show_reject_blobs) {
+      ScrollView* rej_win = MakeWindow(500, 300, "Rejected blobs");
+      input_block->plot_graded_blobs(rej_win);
+    }
+    #endif // !GRAPHICS_DISABLED
+    InsertBlobsToGrid(false, false, &image_bblobs_, this);
+    InsertBlobsToGrid(true, true, &input_block->blobs, this);
+
+    part_grid_.GridFindMargins(best_columns_);
+    // Split and merge the partitions by looking at local neighbours.
+    GridSplitPartitions();
+    // Resolve unknown partitions by adding to an existing partition, fixing
+    // the type, or declaring them noise.
+    part_grid_.GridFindMargins(best_columns_);
+    GridMergePartitions();
+    // Insert any unused noise blobs that are close enough to an appropriate
+    // partition.
+    InsertRemainingNoise(input_block);
+    // Add horizontal line separators as partitions.
+    GridInsertHLinePartitions();
+    GridInsertVLinePartitions();
+    // Recompute margins based on a local neighbourhood search.
+    part_grid_.GridFindMargins(best_columns_);
+    SetPartitionTypes();
+  }
+#ifndef GRAPHICS_DISABLED
+  if (textord_tabfind_show_initial_partitions) {
+    ScrollView* part_win = MakeWindow(100, 300, "InitialPartitions");
+    part_grid_.DisplayBoxes(part_win);
+    DisplayTabVectors(part_win);
+  }
+#endif
+  if (!PSM_SPARSE(pageseg_mode)) {
+  #ifndef DISABLED_LEGACY_ENGINE
+    if (equation_detect_) {
+      equation_detect_->FindEquationParts(&part_grid_, best_columns_);
+    }
+  #endif
+    if (textord_tabfind_find_tables) {
+      TableFinder table_finder;
+      table_finder.Init(gridsize(), bleft(), tright());
+      table_finder.set_resolution(resolution_);
+      table_finder.set_left_to_right_language(
+          !input_block->block->right_to_left());
+      // Copy cleaned partitions from part_grid_ to clean_part_grid_ and
+      // insert dot-like noise into period_grid_
+      table_finder.InsertCleanPartitions(&part_grid_, input_block);
+      // Get Table Regions
+      table_finder.LocateTables(&part_grid_, best_columns_, WidthCB(), reskew_);
+    }
+    GridRemoveUnderlinePartitions();
+    part_grid_.DeleteUnknownParts(input_block);
+
+    // Build the partitions into chains that belong in the same block and
+    // refine into one-to-one links, then smooth the types within each chain.
+    part_grid_.FindPartitionPartners();
+    part_grid_.FindFigureCaptions();
+    part_grid_.RefinePartitionPartners(true);
+    SmoothPartnerRuns();
+
+    #ifndef GRAPHICS_DISABLED
+    if (textord_tabfind_show_partitions) {
+      ScrollView* window = MakeWindow(400, 300, "Partitions");
+      if (window != nullptr) {
+        part_grid_.DisplayBoxes(window);
+        if (!textord_debug_printable)
+          DisplayTabVectors(window);
+        if (window != nullptr && textord_tabfind_show_partitions > 1) {
+          delete window->AwaitEvent(SVET_DESTROY);
+        }
+      }
+    }
+    #endif // !GRAPHICS_DISABLED
+    part_grid_.AssertNoDuplicates();
+  }
+  // Ownership of the ColPartitions moves from part_sets_ to part_grid_ here,
+  // and ownership of the BLOBNBOXes moves to the ColPartitions.
+  // (They were previously owned by the block or the image_bblobs list.)
+  ReleaseBlobsAndCleanupUnused(input_block);
+  // Ownership of the ColPartitions moves from part_grid_ to good_parts_ and
+  // noise_parts_ here. In text blocks, ownership of the BLOBNBOXes moves
+  // from the ColPartitions to the output TO_BLOCK. In non-text, the
+  // BLOBNBOXes stay with the ColPartitions and get deleted in the destructor.
+  if (PSM_SPARSE(pageseg_mode))
+    part_grid_.ExtractPartitionsAsBlocks(blocks, to_blocks);
+  else
+    TransformToBlocks(blocks, to_blocks);
+  if (textord_debug_tabfind) {
+    tprintf("Found %d blocks, %d to_blocks\n",
+            blocks->length(), to_blocks->length());
+  }
+
+#ifndef GRAPHICS_DISABLED
+  DisplayBlocks(blocks);
+#endif
+  RotateAndReskewBlocks(input_is_rtl, to_blocks);
+  int result = 0;
+  #ifndef GRAPHICS_DISABLED
+  if (blocks_win_ != nullptr) {
+    bool waiting = false;
+    do {
+      waiting = false;
+      SVEvent* event = blocks_win_->AwaitEvent(SVET_ANY);
+      if (event->type == SVET_INPUT && event->parameter != nullptr) {
+        if (*event->parameter == 'd')
+          result = -1;
+        else
+          blocks->clear();
+      } else if (event->type == SVET_DESTROY) {
+        blocks_win_ = nullptr;
+      } else {
+        waiting = true;
+      }
+      delete event;
+    } while (waiting);
+  }
+  #endif // !GRAPHICS_DISABLED
+  return result;
+}
+
+// Get the rotation required to deskew, and its inverse rotation.
+void ColumnFinder::GetDeskewVectors(FCOORD* deskew, FCOORD* reskew) {
+  *reskew = reskew_;
+  *deskew = reskew_;
+  deskew->set_y(-deskew->y());
+}
+
+#ifndef DISABLED_LEGACY_ENGINE
+void ColumnFinder::SetEquationDetect(EquationDetectBase* detect) {
+  equation_detect_ = detect;
+}
+#endif
+
+//////////////// PRIVATE CODE /////////////////////////
+
+#ifndef GRAPHICS_DISABLED
+
+// Displays the blob and block bounding boxes in a window called Blocks.
+void ColumnFinder::DisplayBlocks(BLOCK_LIST* blocks) {
+  if (textord_tabfind_show_blocks) {
+    if (blocks_win_ == nullptr)
+      blocks_win_ = MakeWindow(700, 300, "Blocks");
+    else
+      blocks_win_->Clear();
+    DisplayBoxes(blocks_win_);
+    BLOCK_IT block_it(blocks);
+    int serial = 1;
+    for (block_it.mark_cycle_pt(); !block_it.cycled_list();
+         block_it.forward()) {
+      BLOCK* block = block_it.data();
+      block->pdblk.plot(blocks_win_, serial++,
+                  textord_debug_printable ? ScrollView::BLUE
+                                          : ScrollView::GREEN);
+    }
+    blocks_win_->Update();
+  }
+}
+
+// Displays the column edges at each grid y coordinate defined by
+// best_columns_.
+void ColumnFinder::DisplayColumnBounds(PartSetVector* sets) {
+  ScrollView* col_win = MakeWindow(50, 300, "Columns");
+  DisplayBoxes(col_win);
+  col_win->Pen(textord_debug_printable ? ScrollView::BLUE : ScrollView::GREEN);
+  for (int i = 0; i < gridheight_; ++i) {
+    ColPartitionSet* columns = best_columns_[i];
+    if (columns != nullptr)
+      columns->DisplayColumnEdges(i * gridsize_, (i + 1) * gridsize_, col_win);
+  }
+}
+
+#endif // !GRAPHICS_DISABLED
+
+// Sets up column_sets_ (the determined column layout at each horizontal
+// slice). Returns false if the page is empty.
+bool ColumnFinder::MakeColumns(bool single_column) {
+  // The part_sets_ are a temporary structure used during column creation,
+  // and is a vector of ColPartitionSets, representing ColPartitions found
+  // at horizontal slices through the page.
+  PartSetVector part_sets;
+  if (!single_column) {
+    if (!part_grid_.MakeColPartSets(&part_sets))
+      return false;  // Empty page.
+    ASSERT_HOST(part_grid_.gridheight() == gridheight_);
+    // Try using only the good parts first.
+    bool good_only = true;
+    do {
+      for (int i = 0; i < gridheight_; ++i) {
+        ColPartitionSet* line_set = part_sets.get(i);
+        if (line_set != nullptr && line_set->LegalColumnCandidate()) {
+          ColPartitionSet* column_candidate = line_set->Copy(good_only);
+          if (column_candidate != nullptr)
+            column_candidate->AddToColumnSetsIfUnique(&column_sets_, WidthCB());
+        }
+      }
+      good_only = !good_only;
+    } while (column_sets_.empty() && !good_only);
+    if (textord_debug_tabfind)
+      PrintColumnCandidates("Column candidates");
+    // Improve the column candidates against themselves.
+    ImproveColumnCandidates(&column_sets_, &column_sets_);
+    if (textord_debug_tabfind)
+      PrintColumnCandidates("Improved columns");
+    // Improve the column candidates using the part_sets_.
+    ImproveColumnCandidates(&part_sets, &column_sets_);
+  }
+  ColPartitionSet* single_column_set =
+      part_grid_.MakeSingleColumnSet(WidthCB());
+  if (single_column_set != nullptr) {
+    // Always add the single column set as a backup even if not in
+    // single column mode.
+    single_column_set->AddToColumnSetsIfUnique(&column_sets_, WidthCB());
+  }
+  if (textord_debug_tabfind)
+    PrintColumnCandidates("Final Columns");
+  bool has_columns = !column_sets_.empty();
+  if (has_columns) {
+    // Divide the page into sections of uniform column layout.
+    bool any_multi_column = AssignColumns(part_sets);
+#ifndef GRAPHICS_DISABLED
+    if (textord_tabfind_show_columns) {
+      DisplayColumnBounds(&part_sets);
+    }
+#endif
+    ComputeMeanColumnGap(any_multi_column);
+  }
+  for (int i = 0; i < part_sets.size(); ++i) {
+    ColPartitionSet* line_set = part_sets.get(i);
+    if (line_set != nullptr) {
+      line_set->RelinquishParts();
+      delete line_set;
+    }
+  }
+  return has_columns;
+}
+
+// Attempt to improve the column_candidates by expanding the columns
+// and adding new partitions from the partition sets in src_sets.
+// Src_sets may be equal to column_candidates, in which case it will
+// use them as a source to improve themselves.
+void ColumnFinder::ImproveColumnCandidates(PartSetVector* src_sets,
+                                           PartSetVector* column_sets) {
+  PartSetVector temp_cols;
+  temp_cols.move(column_sets);
+  if (src_sets == column_sets)
+    src_sets = &temp_cols;
+  int set_size = temp_cols.size();
+  // Try using only the good parts first.
+  bool good_only = true;
+  do {
+    for (int i = 0; i < set_size; ++i) {
+      ColPartitionSet* column_candidate = temp_cols.get(i);
+      ASSERT_HOST(column_candidate != nullptr);
+      ColPartitionSet* improved = column_candidate->Copy(good_only);
+      if (improved != nullptr) {
+        improved->ImproveColumnCandidate(WidthCB(), src_sets);
+        improved->AddToColumnSetsIfUnique(column_sets, WidthCB());
+      }
+    }
+    good_only = !good_only;
+  } while (column_sets->empty() && !good_only);
+  if (column_sets->empty())
+    column_sets->move(&temp_cols);
+  else
+    temp_cols.delete_data_pointers();
+}
+
+// Prints debug information on the column candidates.
+void ColumnFinder::PrintColumnCandidates(const char* title) {
+  int set_size =  column_sets_.size();
+  tprintf("Found %d %s:\n", set_size, title);
+  if (textord_debug_tabfind >= 3) {
+    for (int i = 0; i < set_size; ++i) {
+      ColPartitionSet* column_set = column_sets_.get(i);
+      column_set->Print();
+    }
+  }
+}
+
+// Finds the optimal set of columns that cover the entire image with as
+// few changes in column partition as possible.
+// NOTE: this could be thought of as an optimization problem, but a simple
+// greedy algorithm is used instead. The algorithm repeatedly finds the modal
+// compatible column in an unassigned region and uses that with the extra
+// tweak of extending the modal region over small breaks in compatibility.
+// Where modal regions overlap, the boundary is chosen so as to minimize
+// the cost in terms of ColPartitions not fitting an approved column.
+// Returns true if any part of the page is multi-column.
+bool ColumnFinder::AssignColumns(const PartSetVector& part_sets) {
+  int set_count = part_sets.size();
+  ASSERT_HOST(set_count == gridheight());
+  // Allocate and init the best_columns_.
+  best_columns_ = new ColPartitionSet*[set_count];
+  for (int y = 0; y < set_count; ++y)
+    best_columns_[y] = nullptr;
+  int column_count = column_sets_.size();
+  // column_set_costs[part_sets_ index][column_sets_ index] is
+  // < INT32_MAX if the partition set is compatible with the column set,
+  // in which case its value is the cost for that set used in deciding
+  // which competing set to assign.
+  // any_columns_possible[part_sets_ index] is true if any of
+  // possible_column_sets[part_sets_ index][*] is < INT32_MAX.
+  // assigned_costs[part_sets_ index] is set to the column_set_costs
+  // of the assigned column_sets_ index or INT32_MAX if none is set.
+  // On return the best_columns_ member is set.
+  bool* any_columns_possible = new bool[set_count];
+  int* assigned_costs = new int[set_count];
+  int** column_set_costs = new int*[set_count];
+  // Set possible column_sets to indicate whether each set is compatible
+  // with each column.
+  for (int part_i = 0; part_i < set_count; ++part_i) {
+    ColPartitionSet* line_set = part_sets.get(part_i);
+    bool debug = line_set != nullptr &&
+                 WithinTestRegion(2, line_set->bounding_box().left(),
+                                  line_set->bounding_box().bottom());
+    column_set_costs[part_i] = new int[column_count];
+    any_columns_possible[part_i] = false;
+    assigned_costs[part_i] = INT32_MAX;
+    for (int col_i = 0; col_i < column_count; ++col_i) {
+      if (line_set != nullptr &&
+          column_sets_.get(col_i)->CompatibleColumns(debug, line_set,
+                                                     WidthCB())) {
+        column_set_costs[part_i][col_i] =
+            column_sets_.get(col_i)->UnmatchedWidth(line_set);
+        any_columns_possible[part_i] = true;
+      } else {
+        column_set_costs[part_i][col_i] = INT32_MAX;
+        if (debug)
+          tprintf("Set id %d did not match at y=%d, lineset =%p\n",
+                  col_i, part_i, line_set);
+      }
+    }
+  }
+  bool any_multi_column = false;
+  // Assign a column set to each vertical grid position.
+  // While there is an unassigned range, find its mode.
+  int start, end;
+  while (BiggestUnassignedRange(set_count, any_columns_possible,
+                                &start, &end)) {
+    if (textord_debug_tabfind >= 2)
+      tprintf("Biggest unassigned range = %d- %d\n", start, end);
+    // Find the modal column_set_id in the range.
+    int column_set_id = RangeModalColumnSet(column_set_costs,
+                                            assigned_costs, start, end);
+    if (textord_debug_tabfind >= 2) {
+      tprintf("Range modal column id = %d\n", column_set_id);
+      column_sets_.get(column_set_id)->Print();
+    }
+    // Now find the longest run of the column_set_id in the range.
+    ShrinkRangeToLongestRun(column_set_costs, assigned_costs,
+                            any_columns_possible,
+                            column_set_id, &start, &end);
+    if (textord_debug_tabfind >= 2)
+      tprintf("Shrunk range = %d- %d\n", start, end);
+    // Extend the start and end past the longest run, while there are
+    // only small gaps in compatibility that can be overcome by larger
+    // regions of compatibility beyond.
+    ExtendRangePastSmallGaps(column_set_costs, assigned_costs,
+                             any_columns_possible,
+                             column_set_id, -1, -1, &start);
+    --end;
+    ExtendRangePastSmallGaps(column_set_costs, assigned_costs,
+                             any_columns_possible,
+                             column_set_id, 1, set_count, &end);
+    ++end;
+    if (textord_debug_tabfind)
+      tprintf("Column id %d applies to range = %d - %d\n",
+              column_set_id, start, end);
+    // Assign the column to the range, which now may overlap with other ranges.
+    AssignColumnToRange(column_set_id, start, end, column_set_costs,
+                        assigned_costs);
+    if (column_sets_.get(column_set_id)->GoodColumnCount() > 1)
+      any_multi_column = true;
+  }
+  // If anything remains unassigned, the whole lot is unassigned, so
+  // arbitrarily assign id 0.
+  if (best_columns_[0] == nullptr) {
+    AssignColumnToRange(0, 0, gridheight_, column_set_costs, assigned_costs);
+  }
+  // Free memory.
+  for (int i = 0; i < set_count; ++i) {
+    delete [] column_set_costs[i];
+  }
+  delete [] assigned_costs;
+  delete [] any_columns_possible;
+  delete [] column_set_costs;
+  return any_multi_column;
+}
+
+// Finds the biggest range in part_sets_ that has no assigned column, but
+// column assignment is possible.
+bool ColumnFinder::BiggestUnassignedRange(int set_count,
+                                          const bool* any_columns_possible,
+                                          int* best_start, int* best_end) {
+  int best_range_size = 0;
+  *best_start = set_count;
+  *best_end = set_count;
+  int end = set_count;
+  for (int start = 0; start < gridheight_; start = end) {
+    // Find the first unassigned index in start.
+    while (start < set_count) {
+      if (best_columns_[start] == nullptr && any_columns_possible[start])
+        break;
+      ++start;
+    }
+    // Find the first past the end and count the good ones in between.
+    int range_size = 1;  // Number of non-null, but unassigned line sets.
+    end = start + 1;
+    while (end < set_count) {
+      if (best_columns_[end] != nullptr)
+        break;
+      if (any_columns_possible[end])
+        ++range_size;
+      ++end;
+    }
+    if (start < set_count && range_size > best_range_size) {
+      best_range_size = range_size;
+      *best_start = start;
+      *best_end = end;
+    }
+  }
+  return *best_start < *best_end;
+}
+
+// Finds the modal compatible column_set_ index within the given range.
+int ColumnFinder::RangeModalColumnSet(int** column_set_costs,
+                                      const int* assigned_costs,
+                                      int start, int end) {
+  int column_count = column_sets_.size();
+  STATS column_stats(0, column_count);
+  for (int part_i = start; part_i < end; ++part_i) {
+    for (int col_j = 0; col_j < column_count; ++col_j) {
+      if (column_set_costs[part_i][col_j] < assigned_costs[part_i])
+        column_stats.add(col_j, 1);
+    }
+  }
+  ASSERT_HOST(column_stats.get_total() > 0);
+  return column_stats.mode();
+}
+
+// Given that there are many column_set_id compatible columns in the range,
+// shrinks the range to the longest contiguous run of compatibility, allowing
+// gaps where no columns are possible, but not where competing columns are
+// possible.
+void ColumnFinder::ShrinkRangeToLongestRun(int** column_set_costs,
+                                           const int* assigned_costs,
+                                           const bool* any_columns_possible,
+                                           int column_set_id,
+                                           int* best_start, int* best_end) {
+  // orig_start and orig_end are the maximum range we will look at.
+  int orig_start = *best_start;
+  int orig_end = *best_end;
+  int best_range_size = 0;
+  *best_start = orig_end;
+  *best_end = orig_end;
+  int end = orig_end;
+  for (int start = orig_start; start < orig_end; start = end) {
+    // Find the first possible
+    while (start < orig_end) {
+      if (column_set_costs[start][column_set_id] < assigned_costs[start] ||
+          !any_columns_possible[start])
+        break;
+      ++start;
+    }
+    // Find the first past the end.
+    end = start + 1;
+    while (end < orig_end) {
+      if (column_set_costs[end][column_set_id] >= assigned_costs[start] &&
+          any_columns_possible[end])
+          break;
+      ++end;
+    }
+    if (start < orig_end && end - start > best_range_size) {
+      best_range_size = end - start;
+      *best_start = start;
+      *best_end = end;
+    }
+  }
+}
+
+// Moves start in the direction of step, up to, but not including end while
+// the only incompatible regions are no more than kMaxIncompatibleColumnCount
+// in size, and the compatible regions beyond are bigger.
+void ColumnFinder::ExtendRangePastSmallGaps(int** column_set_costs,
+                                            const int* assigned_costs,
+                                            const bool* any_columns_possible,
+                                            int column_set_id,
+                                            int step, int end, int* start) {
+  if (textord_debug_tabfind > 2)
+    tprintf("Starting expansion at %d, step=%d, limit=%d\n",
+            *start, step, end);
+  if (*start == end)
+    return;  // Cannot be expanded.
+
+  int barrier_size = 0;
+  int good_size = 0;
+  do {
+    // Find the size of the incompatible barrier.
+    barrier_size = 0;
+    int i;
+    for (i = *start + step; i != end; i += step) {
+      if (column_set_costs[i][column_set_id] < assigned_costs[i])
+        break;  // We are back on.
+      // Locations where none are possible don't count.
+      if (any_columns_possible[i])
+        ++barrier_size;
+    }
+    if (textord_debug_tabfind > 2)
+      tprintf("At %d, Barrier size=%d\n", i, barrier_size);
+    if (barrier_size > kMaxIncompatibleColumnCount)
+      return;  // Barrier too big.
+    if (i == end) {
+      // We can't go any further, but the barrier was small, so go to the end.
+      *start = i - step;
+      return;
+    }
+    // Now find the size of the good region on the other side.
+    good_size = 1;
+    for (i += step; i != end; i += step) {
+      if (column_set_costs[i][column_set_id] < assigned_costs[i])
+        ++good_size;
+      else if (any_columns_possible[i])
+        break;
+    }
+    if (textord_debug_tabfind > 2)
+      tprintf("At %d, good size = %d\n", i, good_size);
+    // If we had enough good ones we can extend the start and keep looking.
+    if (good_size >= barrier_size)
+      *start = i - step;
+  } while (good_size >= barrier_size);
+}
+
+// Assigns the given column_set_id to the given range.
+void ColumnFinder::AssignColumnToRange(int column_set_id, int start, int end,
+                                       int** column_set_costs,
+                                       int* assigned_costs) {
+  ColPartitionSet* column_set = column_sets_.get(column_set_id);
+  for (int i = start; i < end; ++i) {
+    assigned_costs[i] = column_set_costs[i][column_set_id];
+    best_columns_[i] = column_set;
+  }
+}
+
+// Computes the mean_column_gap_.
+void ColumnFinder::ComputeMeanColumnGap(bool any_multi_column) {
+  int total_gap = 0;
+  int total_width = 0;
+  int gap_samples = 0;
+  int width_samples = 0;
+  for (int i = 0; i < gridheight_; ++i) {
+    ASSERT_HOST(best_columns_[i] != nullptr);
+    best_columns_[i]->AccumulateColumnWidthsAndGaps(&total_width,
+                                                    &width_samples,
+                                                    &total_gap,
+                                                    &gap_samples);
+  }
+  mean_column_gap_ = any_multi_column && gap_samples > 0
+      ? total_gap / gap_samples : width_samples > 0
+      ? total_width / width_samples : 0;
+}
+
+//////// Functions that manipulate ColPartitions in the part_grid_ /////
+//////// to split, merge, find margins, and find types.  //////////////
+
+// Helper to delete all the deletable blobs on the list. Owned blobs are
+// extracted from the list, but not deleted, leaving them owned by the owner().
+static void ReleaseAllBlobsAndDeleteUnused(BLOBNBOX_LIST* blobs) {
+  for (BLOBNBOX_IT blob_it(blobs); !blob_it.empty(); blob_it.forward()) {
+    BLOBNBOX* blob = blob_it.extract();
+    if (blob->owner() == nullptr) {
+      delete blob->cblob();
+      delete blob;
+    }
+  }
+}
+
+// Hoovers up all un-owned blobs and deletes them.
+// The rest get released from the block so the ColPartitions can pass
+// ownership to the output blocks.
+void ColumnFinder::ReleaseBlobsAndCleanupUnused(TO_BLOCK* block) {
+  ReleaseAllBlobsAndDeleteUnused(&block->blobs);
+  ReleaseAllBlobsAndDeleteUnused(&block->small_blobs);
+  ReleaseAllBlobsAndDeleteUnused(&block->noise_blobs);
+  ReleaseAllBlobsAndDeleteUnused(&block->large_blobs);
+  ReleaseAllBlobsAndDeleteUnused(&image_bblobs_);
+}
+
+// Splits partitions that cross columns where they have nothing in the gap.
+void ColumnFinder::GridSplitPartitions() {
+  // Iterate the ColPartitions in the grid.
+  GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
+    gsearch(&part_grid_);
+  gsearch.StartFullSearch();
+  ColPartition* dont_repeat = nullptr;
+  ColPartition* part;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    if (part->blob_type() < BRT_UNKNOWN || part == dont_repeat)
+      continue;  // Only applies to text partitions.
+    ColPartitionSet* column_set = best_columns_[gsearch.GridY()];
+    int first_col = -1;
+    int last_col = -1;
+    // Find which columns the partition spans.
+    part->ColumnRange(resolution_, column_set, &first_col, &last_col);
+    if (first_col > 0)
+      --first_col;
+    // Convert output column indices to physical column indices.
+    first_col /= 2;
+    last_col /= 2;
+    // We will only consider cases where a partition spans two columns,
+    // since a heading that spans more columns than that is most likely
+    // genuine.
+    if (last_col != first_col + 1)
+      continue;
+    // Set up a rectangle search x-bounded by the column gap and y by the part.
+    int y = part->MidY();
+    TBOX margin_box = part->bounding_box();
+    bool debug = AlignedBlob::WithinTestRegion(2, margin_box.left(),
+                                               margin_box.bottom());
+    if (debug) {
+      tprintf("Considering partition for GridSplit:");
+      part->Print();
+    }
+    ColPartition* column = column_set->GetColumnByIndex(first_col);
+    if (column == nullptr)
+      continue;
+    margin_box.set_left(column->RightAtY(y) + 2);
+    column = column_set->GetColumnByIndex(last_col);
+    if (column == nullptr)
+      continue;
+    margin_box.set_right(column->LeftAtY(y) - 2);
+    // TODO(rays) Decide whether to keep rectangular filling or not in the
+    // main grid and therefore whether we need a fancier search here.
+    // Now run the rect search on the main blob grid.
+    GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> rectsearch(this);
+    if (debug) {
+      tprintf("Searching box (%d,%d)->(%d,%d)\n",
+              margin_box.left(), margin_box.bottom(),
+              margin_box.right(), margin_box.top());
+      part->Print();
+    }
+    rectsearch.StartRectSearch(margin_box);
+    BLOBNBOX* bbox;
+    while ((bbox = rectsearch.NextRectSearch()) != nullptr) {
+      if (bbox->bounding_box().overlap(margin_box))
+        break;
+    }
+    if (bbox == nullptr) {
+      // There seems to be nothing in the hole, so split the partition.
+      gsearch.RemoveBBox();
+      int x_middle = (margin_box.left() + margin_box.right()) / 2;
+      if (debug) {
+        tprintf("Splitting part at %d:", x_middle);
+        part->Print();
+      }
+      ColPartition* split_part = part->SplitAt(x_middle);
+      if (split_part != nullptr) {
+        if (debug) {
+          tprintf("Split result:");
+          part->Print();
+          split_part->Print();
+        }
+        part_grid_.InsertBBox(true, true, split_part);
+      } else {
+        // Split had no effect
+        if (debug)
+          tprintf("Split had no effect\n");
+        dont_repeat = part;
+      }
+      part_grid_.InsertBBox(true, true, part);
+      gsearch.RepositionIterator();
+    } else if (debug) {
+      tprintf("Part cannot be split: blob (%d,%d)->(%d,%d) in column gap\n",
+              bbox->bounding_box().left(), bbox->bounding_box().bottom(),
+              bbox->bounding_box().right(), bbox->bounding_box().top());
+    }
+  }
+}
+
+// Merges partitions where there is vertical overlap, within a single column,
+// and the horizontal gap is small enough.
+void ColumnFinder::GridMergePartitions() {
+  // Iterate the ColPartitions in the grid.
+  GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
+    gsearch(&part_grid_);
+  gsearch.StartFullSearch();
+  ColPartition* part;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    if (part->IsUnMergeableType())
+      continue;
+    // Set up a rectangle search x-bounded by the column and y by the part.
+    ColPartitionSet* columns = best_columns_[gsearch.GridY()];
+    TBOX box = part->bounding_box();
+    bool debug = AlignedBlob::WithinTestRegion(1, box.left(), box.bottom());
+    if (debug) {
+      tprintf("Considering part for merge at:");
+      part->Print();
+    }
+    int y = part->MidY();
+    ColPartition* left_column = columns->ColumnContaining(box.left(), y);
+    ColPartition* right_column = columns->ColumnContaining(box.right(), y);
+    if (left_column == nullptr || right_column != left_column) {
+      if (debug)
+        tprintf("In different columns\n");
+      continue;
+    }
+    box.set_left(left_column->LeftAtY(y));
+    box.set_right(right_column->RightAtY(y));
+    // Now run the rect search.
+    bool modified_box = false;
+    GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
+      rsearch(&part_grid_);
+    rsearch.SetUniqueMode(true);
+    rsearch.StartRectSearch(box);
+    ColPartition* neighbour;
+
+    while ((neighbour = rsearch.NextRectSearch()) != nullptr) {
+      if (neighbour == part || neighbour->IsUnMergeableType())
+        continue;
+      const TBOX& neighbour_box = neighbour->bounding_box();
+      if (debug) {
+        tprintf("Considering merge with neighbour at:");
+        neighbour->Print();
+      }
+      if (neighbour_box.right() < box.left() ||
+          neighbour_box.left() > box.right())
+        continue;  // Not within the same column.
+      if (part->VSignificantCoreOverlap(*neighbour) &&
+          part->TypesMatch(*neighbour)) {
+        // There is vertical overlap and the gross types match, but only
+        // merge if the horizontal gap is small enough, as one of the
+        // partitions may be a figure caption within a column.
+        // If there is only one column, then the mean_column_gap_ is large
+        // enough to allow almost any merge, by being the mean column width.
+        const TBOX& part_box = part->bounding_box();
+        // Don't merge if there is something else in the way. Use the margin
+        // to decide, and check both to allow a bit of overlap.
+        if (neighbour_box.left() > part->right_margin() &&
+            part_box.right() < neighbour->left_margin())
+          continue;  // Neighbour is too far to the right.
+        if (neighbour_box.right() < part->left_margin() &&
+            part_box.left() > neighbour->right_margin())
+          continue;  // Neighbour is too far to the left.
+        int h_gap = std::max(part_box.left(), neighbour_box.left()) -
+                std::min(part_box.right(), neighbour_box.right());
+        if (h_gap < mean_column_gap_ * kHorizontalGapMergeFraction ||
+            part_box.width() < mean_column_gap_ ||
+            neighbour_box.width() < mean_column_gap_) {
+          if (debug) {
+            tprintf("Running grid-based merge between:\n");
+            part->Print();
+            neighbour->Print();
+          }
+          rsearch.RemoveBBox();
+          if (!modified_box) {
+            // We are going to modify part, so remove it and re-insert it after.
+            gsearch.RemoveBBox();
+            rsearch.RepositionIterator();
+            modified_box = true;
+          }
+          part->Absorb(neighbour, WidthCB());
+        } else if (debug) {
+          tprintf("Neighbour failed hgap test\n");
+        }
+      } else if (debug) {
+        tprintf("Neighbour failed overlap or typesmatch test\n");
+      }
+    }
+    if (modified_box) {
+      // We modified the box of part, so re-insert it into the grid.
+      // This does no harm in the current cell, as it already exists there,
+      // but it needs to exist in all the cells covered by its bounding box,
+      // or it will never be found by a full search.
+      // Because the box has changed, it has to be removed first, otherwise
+      // add_sorted may fail to keep a single copy of the pointer.
+      part_grid_.InsertBBox(true, true, part);
+      gsearch.RepositionIterator();
+    }
+  }
+}
+
+// Inserts remaining noise blobs into the most applicable partition if any.
+// If there is no applicable partition, then the blobs are deleted.
+void ColumnFinder::InsertRemainingNoise(TO_BLOCK* block) {
+  BLOBNBOX_IT blob_it(&block->noise_blobs);
+  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+    BLOBNBOX* blob = blob_it.data();
+    if (blob->owner() != nullptr) continue;
+    TBOX search_box(blob->bounding_box());
+    bool debug = WithinTestRegion(2, search_box.left(), search_box.bottom());
+    search_box.pad(gridsize(), gridsize());
+    // Setup a rectangle search to find the best partition to merge with.
+    ColPartitionGridSearch rsearch(&part_grid_);
+    rsearch.SetUniqueMode(true);
+    rsearch.StartRectSearch(search_box);
+    ColPartition* part;
+    ColPartition* best_part = nullptr;
+    int best_distance = 0;
+    while ((part = rsearch.NextRectSearch()) != nullptr) {
+      if (part->IsUnMergeableType())
+        continue;
+      int distance = projection_.DistanceOfBoxFromPartition(
+          blob->bounding_box(), *part, denorm_, debug);
+      if (best_part == nullptr || distance < best_distance) {
+        best_part = part;
+        best_distance = distance;
+      }
+    }
+    if (best_part != nullptr &&
+        best_distance < kMaxDistToPartSizeRatio * best_part->median_height()) {
+      // Close enough to merge.
+      if (debug) {
+        tprintf("Adding noise blob with distance %d, thr=%g:box:",
+                best_distance,
+                kMaxDistToPartSizeRatio * best_part->median_height());
+        blob->bounding_box().print();
+        tprintf("To partition:");
+        best_part->Print();
+      }
+      part_grid_.RemoveBBox(best_part);
+      best_part->AddBox(blob);
+      part_grid_.InsertBBox(true, true, best_part);
+      blob->set_owner(best_part);
+      blob->set_flow(best_part->flow());
+      blob->set_region_type(best_part->blob_type());
+    } else {
+      // Mark the blob for deletion.
+      blob->set_region_type(BRT_NOISE);
+    }
+  }
+  // Delete the marked blobs, clearing neighbour references.
+  block->DeleteUnownedNoise();
+}
+
+// Helper makes a box from a horizontal line.
+static TBOX BoxFromHLine(const TabVector* hline) {
+  int top = std::max(hline->startpt().y(), hline->endpt().y());
+  int bottom = std::min(hline->startpt().y(), hline->endpt().y());
+  top += hline->mean_width();
+  if (top == bottom) {
+    if (bottom > 0)
+      --bottom;
+    else
+      ++top;
+  }
+  return TBOX(hline->startpt().x(), bottom, hline->endpt().x(), top);
+}
+
+// Remove partitions that come from horizontal lines that look like
+// underlines, but are not part of a table.
+void ColumnFinder::GridRemoveUnderlinePartitions() {
+  TabVector_IT hline_it(&horizontal_lines_);
+  for (hline_it.mark_cycle_pt(); !hline_it.cycled_list(); hline_it.forward()) {
+    TabVector* hline = hline_it.data();
+    if (hline->intersects_other_lines())
+      continue;
+    TBOX line_box = BoxFromHLine(hline);
+    TBOX search_box = line_box;
+    search_box.pad(0, line_box.height());
+    ColPartitionGridSearch part_search(&part_grid_);
+    part_search.SetUniqueMode(true);
+    part_search.StartRectSearch(search_box);
+    ColPartition* covered;
+    bool touched_table = false;
+    bool touched_text = false;
+    ColPartition* line_part = nullptr;
+    while ((covered = part_search.NextRectSearch()) != nullptr) {
+      if (covered->type() == PT_TABLE) {
+        touched_table = true;
+        break;
+      } else if (covered->IsTextType()) {
+        // TODO(rays) Add a list of underline sections to ColPartition.
+        int text_bottom = covered->median_bottom();
+        if (line_box.bottom() <= text_bottom && text_bottom <= search_box.top())
+          touched_text = true;
+      } else if (covered->blob_type() == BRT_HLINE &&
+          line_box.contains(covered->bounding_box()) &&
+          // not if same instance (identical to hline)
+          !TBOX(covered->bounding_box()).contains(line_box)) {
+        line_part = covered;
+      }
+    }
+    if (line_part != nullptr && !touched_table && touched_text) {
+      part_grid_.RemoveBBox(line_part);
+      delete line_part;
+    }
+  }
+}
+
+// Add horizontal line separators as partitions.
+void ColumnFinder::GridInsertHLinePartitions() {
+  TabVector_IT hline_it(&horizontal_lines_);
+  for (hline_it.mark_cycle_pt(); !hline_it.cycled_list(); hline_it.forward()) {
+    TabVector* hline = hline_it.data();
+    TBOX line_box = BoxFromHLine(hline);
+    ColPartition* part = ColPartition::MakeLinePartition(
+        BRT_HLINE, vertical_skew_,
+        line_box.left(), line_box.bottom(), line_box.right(), line_box.top());
+    part->set_type(PT_HORZ_LINE);
+    bool any_image = false;
+    ColPartitionGridSearch part_search(&part_grid_);
+    part_search.SetUniqueMode(true);
+    part_search.StartRectSearch(line_box);
+    ColPartition* covered;
+    while ((covered = part_search.NextRectSearch()) != nullptr) {
+      if (covered->IsImageType()) {
+        any_image = true;
+        break;
+      }
+    }
+    if (!any_image)
+      part_grid_.InsertBBox(true, true, part);
+    else
+      delete part;
+  }
+}
+
+// Add horizontal line separators as partitions.
+void ColumnFinder::GridInsertVLinePartitions() {
+  TabVector_IT vline_it(dead_vectors());
+  for (vline_it.mark_cycle_pt(); !vline_it.cycled_list(); vline_it.forward()) {
+    TabVector* vline = vline_it.data();
+    if (!vline->IsSeparator())
+      continue;
+    int left = std::min(vline->startpt().x(), vline->endpt().x());
+    int right = std::max(vline->startpt().x(), vline->endpt().x());
+    right += vline->mean_width();
+    if (left == right) {
+      if (left > 0)
+        --left;
+      else
+        ++right;
+    }
+    ColPartition* part = ColPartition::MakeLinePartition(
+        BRT_VLINE, vertical_skew_,
+        left, vline->startpt().y(), right, vline->endpt().y());
+    part->set_type(PT_VERT_LINE);
+    bool any_image = false;
+    ColPartitionGridSearch part_search(&part_grid_);
+    part_search.SetUniqueMode(true);
+    part_search.StartRectSearch(part->bounding_box());
+    ColPartition* covered;
+    while ((covered = part_search.NextRectSearch()) != nullptr) {
+      if (covered->IsImageType()) {
+        any_image = true;
+        break;
+      }
+    }
+    if (!any_image)
+      part_grid_.InsertBBox(true, true, part);
+    else
+      delete part;
+  }
+}
+
+// For every ColPartition in the grid, sets its type based on position
+// in the columns.
+void ColumnFinder::SetPartitionTypes() {
+  GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
+    gsearch(&part_grid_);
+  gsearch.StartFullSearch();
+  ColPartition* part;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    part->SetPartitionType(resolution_, best_columns_[gsearch.GridY()]);
+  }
+}
+
+// Only images remain with multiple types in a run of partners.
+// Sets the type of all in the group to the maximum of the group.
+void ColumnFinder::SmoothPartnerRuns() {
+  // Iterate the ColPartitions in the grid.
+  GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
+    gsearch(&part_grid_);
+  gsearch.StartFullSearch();
+  ColPartition* part;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    ColPartition* partner = part->SingletonPartner(true);
+    if (partner != nullptr) {
+      if (partner->SingletonPartner(false) != part) {
+        tprintf("Ooops! Partition:(%d partners)",
+                part->upper_partners()->length());
+        part->Print();
+        tprintf("has singleton partner:(%d partners",
+                partner->lower_partners()->length());
+        partner->Print();
+        tprintf("but its singleton partner is:");
+        if (partner->SingletonPartner(false) == nullptr)
+          tprintf("NULL\n");
+        else
+          partner->SingletonPartner(false)->Print();
+      }
+      ASSERT_HOST(partner->SingletonPartner(false) == part);
+    } else if (part->SingletonPartner(false) != nullptr) {
+      ColPartitionSet* column_set = best_columns_[gsearch.GridY()];
+      int column_count = column_set->ColumnCount();
+      part->SmoothPartnerRun(column_count * 2 + 1);
+    }
+  }
+}
+
+// Helper functions for TransformToBlocks.
+// Add the part to the temp list in the correct order.
+void ColumnFinder::AddToTempPartList(ColPartition* part,
+                                     ColPartition_CLIST* temp_list) {
+  int mid_y = part->MidY();
+  ColPartition_C_IT it(temp_list);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    ColPartition* test_part = it.data();
+    if (part->type() == PT_NOISE || test_part->type() == PT_NOISE)
+      continue;  // Noise stays in sequence.
+    if (test_part == part->SingletonPartner(false))
+      break;  // Insert before its lower partner.
+    int neighbour_bottom = test_part->median_bottom();
+    int neighbour_top = test_part->median_top();
+    int neighbour_y = (neighbour_bottom + neighbour_top) / 2;
+    if (neighbour_y < mid_y)
+      break;  // part is above test_part so insert it.
+    if (!part->HOverlaps(*test_part) && !part->WithinSameMargins(*test_part))
+      continue;  // Incompatibles stay in order
+  }
+  if (it.cycled_list()) {
+    it.add_to_end(part);
+  } else {
+    it.add_before_stay_put(part);
+  }
+}
+
+// Add everything from the temp list to the work_set assuming correct order.
+void ColumnFinder::EmptyTempPartList(ColPartition_CLIST* temp_list,
+                                     WorkingPartSet_LIST* work_set) {
+  ColPartition_C_IT it(temp_list);
+  while (!it.empty()) {
+    it.extract()->AddToWorkingSet(bleft_, tright_, resolution_,
+                          &good_parts_, work_set);
+    it.forward();
+  }
+}
+
+// Transform the grid of partitions to the output blocks.
+void ColumnFinder::TransformToBlocks(BLOCK_LIST* blocks,
+                                     TO_BLOCK_LIST* to_blocks) {
+  WorkingPartSet_LIST work_set;
+  ColPartitionSet* column_set = nullptr;
+  ColPartition_IT noise_it(&noise_parts_);
+  // The temp_part_list holds a list of parts at the same grid y coord
+  // so they can be added in the correct order. This prevents thin objects
+  // like horizontal lines going before the text lines above them.
+  ColPartition_CLIST temp_part_list;
+  // Iterate the ColPartitions in the grid. It starts at the top
+  GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
+    gsearch(&part_grid_);
+  gsearch.StartFullSearch();
+  int prev_grid_y = -1;
+  ColPartition* part;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    int grid_y = gsearch.GridY();
+    if (grid_y != prev_grid_y) {
+      EmptyTempPartList(&temp_part_list, &work_set);
+      prev_grid_y = grid_y;
+    }
+    if (best_columns_[grid_y] != column_set) {
+      column_set = best_columns_[grid_y];
+      // Every line should have a non-null best column.
+      ASSERT_HOST(column_set != nullptr);
+      column_set->ChangeWorkColumns(bleft_, tright_, resolution_,
+                                    &good_parts_, &work_set);
+      if (textord_debug_tabfind)
+        tprintf("Changed column groups at grid index %d, y=%d\n",
+                gsearch.GridY(), gsearch.GridY() * gridsize());
+    }
+    if (part->type() == PT_NOISE) {
+      noise_it.add_to_end(part);
+    } else {
+      AddToTempPartList(part, &temp_part_list);
+    }
+  }
+  EmptyTempPartList(&temp_part_list, &work_set);
+  // Now finish all working sets and transfer ColPartitionSets to block_sets.
+  WorkingPartSet_IT work_it(&work_set);
+  while (!work_it.empty()) {
+    WorkingPartSet* working_set = work_it.extract();
+    working_set->ExtractCompletedBlocks(bleft_, tright_, resolution_,
+                                        &good_parts_, blocks, to_blocks);
+    delete working_set;
+    work_it.forward();
+  }
+}
+
+// Helper reflects a list of blobs in the y-axis.
+// Only reflects the BLOBNBOX bounding box. Not the blobs or outlines below.
+static void ReflectBlobList(BLOBNBOX_LIST* bblobs) {
+  BLOBNBOX_IT it(bblobs);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    it.data()->reflect_box_in_y_axis();
+  }
+}
+
+// Reflect the blob boxes (but not the outlines) in the y-axis so that
+// the blocks get created in the correct RTL order. Reflects the blobs
+// in the input_block and the bblobs list.
+// The reflection is undone in RotateAndReskewBlocks by
+// reflecting the blocks themselves, and then recomputing the blob bounding
+// boxes.
+void ColumnFinder::ReflectForRtl(TO_BLOCK* input_block, BLOBNBOX_LIST* bblobs) {
+  ReflectBlobList(bblobs);
+  ReflectBlobList(&input_block->blobs);
+  ReflectBlobList(&input_block->small_blobs);
+  ReflectBlobList(&input_block->noise_blobs);
+  ReflectBlobList(&input_block->large_blobs);
+  // Update the denorm with the reflection.
+  auto* new_denorm = new DENORM;
+  new_denorm->SetupNormalization(nullptr, nullptr, denorm_,
+                                 0.0f, 0.0f, -1.0f, 1.0f, 0.0f, 0.0f);
+  denorm_ = new_denorm;
+}
+
+// Helper fixes up blobs and cblobs to match the desired rotation,
+// exploding multi-outline blobs back to single blobs and accumulating
+// the bounding box widths and heights.
+static void RotateAndExplodeBlobList(const FCOORD& blob_rotation,
+                                     BLOBNBOX_LIST* bblobs,
+                                     STATS* widths,
+                                     STATS* heights) {
+  BLOBNBOX_IT it(bblobs);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    BLOBNBOX* blob = it.data();
+    C_BLOB* cblob = blob->cblob();
+    C_OUTLINE_LIST* outlines = cblob->out_list();
+    C_OUTLINE_IT ol_it(outlines);
+    if (!outlines->singleton()) {
+      // This blob has multiple outlines from CJK repair.
+      // Explode the blob back into individual outlines.
+      for (;!ol_it.empty(); ol_it.forward()) {
+        C_OUTLINE* outline = ol_it.extract();
+        BLOBNBOX* new_blob = BLOBNBOX::RealBlob(outline);
+        // This blob will be revisited later since we add_after_stay_put here.
+        // This means it will get rotated and have its width/height added to
+        // the stats below.
+        it.add_after_stay_put(new_blob);
+      }
+      it.extract();
+      delete cblob;
+      delete blob;
+    } else {
+      if (blob_rotation.x() != 1.0f || blob_rotation.y() != 0.0f) {
+        cblob->rotate(blob_rotation);
+      }
+      blob->compute_bounding_box();
+      widths->add(blob->bounding_box().width(), 1);
+      heights->add(blob->bounding_box().height(), 1);
+    }
+  }
+}
+
+// Undo the deskew that was done in FindTabVectors, as recognition is done
+// without correcting blobs or blob outlines for skew.
+// Reskew the completed blocks to put them back to the original rotated coords
+// that were created by CorrectOrientation.
+// If the input_is_rtl, then reflect the blocks in the y-axis to undo the
+// reflection that was done before FindTabVectors.
+// Blocks that were identified as vertical text (relative to the rotated
+// coordinates) are further rotated so the text lines are horizontal.
+// blob polygonal outlines are rotated to match the position of the blocks
+// that they are in, and their bounding boxes are recalculated to be accurate.
+// Record appropriate inverse transformations and required
+// classifier transformation in the blocks.
+void ColumnFinder::RotateAndReskewBlocks(bool input_is_rtl,
+                                         TO_BLOCK_LIST* blocks) {
+  if (input_is_rtl) {
+    // The skew is backwards because of the reflection.
+    FCOORD tmp = deskew_;
+    deskew_ = reskew_;
+    reskew_ = tmp;
+  }
+  TO_BLOCK_IT it(blocks);
+  int block_index = 1;
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    TO_BLOCK* to_block = it.data();
+    BLOCK* block = to_block->block;
+    // Blocks are created on the deskewed blob outlines in TransformToBlocks()
+    // so we need to reskew them back to page coordinates.
+    if (input_is_rtl) {
+      block->reflect_polygon_in_y_axis();
+    }
+    block->rotate(reskew_);
+    // Copy the right_to_left flag to the created block.
+    block->set_right_to_left(input_is_rtl);
+    // Save the skew angle in the block for baseline computations.
+    block->set_skew(reskew_);
+    block->pdblk.set_index(block_index++);
+    FCOORD blob_rotation = ComputeBlockAndClassifyRotation(block);
+    // Rotate all the blobs if needed and recompute the bounding boxes.
+    // Compute the block median blob width and height as we go.
+    STATS widths(0, block->pdblk.bounding_box().width());
+    STATS heights(0, block->pdblk.bounding_box().height());
+    RotateAndExplodeBlobList(blob_rotation, &to_block->blobs,
+                             &widths, &heights);
+    TO_ROW_IT row_it(to_block->get_rows());
+    for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+      TO_ROW* row = row_it.data();
+      RotateAndExplodeBlobList(blob_rotation, row->blob_list(),
+                               &widths, &heights);
+    }
+    block->set_median_size(static_cast<int>(widths.median() + 0.5),
+                           static_cast<int>(heights.median() + 0.5));
+    if (textord_debug_tabfind >= 2)
+      tprintf("Block median size = (%d, %d)\n",
+              block->median_size().x(), block->median_size().y());
+  }
+}
+
+// Computes the rotations for the block (to make textlines horizontal) and
+// for the blobs (for classification) and sets the appropriate members
+// of the given block.
+// Returns the rotation that needs to be applied to the blobs to make
+// them sit in the rotated block.
+FCOORD ColumnFinder::ComputeBlockAndClassifyRotation(BLOCK* block) {
+  // The text_rotation_ tells us the gross page text rotation that needs
+  // to be applied for classification
+  // TODO(rays) find block-level classify rotation by orientation detection.
+  // In the mean time, assume that "up" for text printed in the minority
+  // direction (PT_VERTICAL_TEXT) is perpendicular to the line of reading.
+  // Accomplish this by zero-ing out the text rotation.  This covers the
+  // common cases of image credits in documents written in Latin scripts
+  // and page headings for predominantly vertically written CJK books.
+  FCOORD classify_rotation(text_rotation_);
+  FCOORD block_rotation(1.0f, 0.0f);
+  if (block->pdblk.poly_block()->isA() == PT_VERTICAL_TEXT) {
+    // Vertical text needs to be 90 degrees rotated relative to the rest.
+    // If the rest has a 90 degree rotation already, use the inverse, making
+    // the vertical text the original way up. Otherwise use 90 degrees
+    // clockwise.
+    if (rerotate_.x() == 0.0f)
+      block_rotation = rerotate_;
+    else
+      block_rotation = FCOORD(0.0f, -1.0f);
+    block->rotate(block_rotation);
+    classify_rotation = FCOORD(1.0f, 0.0f);
+  }
+  block_rotation.rotate(rotation_);
+  // block_rotation is now what we have done to the blocks. Now do the same
+  // thing to the blobs, but save the inverse rotation in the block, as that
+  // is what we need to DENORM back to the image coordinates.
+  FCOORD blob_rotation(block_rotation);
+  block_rotation.set_y(-block_rotation.y());
+  block->set_re_rotation(block_rotation);
+  block->set_classify_rotation(classify_rotation);
+  if (textord_debug_tabfind) {
+    tprintf("Blk %d, type %d rerotation(%.2f, %.2f), char(%.2f,%.2f), box:",
+            block->pdblk.index(), block->pdblk.poly_block()->isA(),
+            block->re_rotation().x(), block->re_rotation().y(),
+            classify_rotation.x(), classify_rotation.y());
+    block->pdblk.bounding_box().print();
+  }
+  return blob_rotation;
+}
+
+}  // namespace tesseract.
diff --git a/tesseract/src/textord/colfind.h b/tesseract/src/textord/colfind.h
new file mode 100644
index 00000000..b7d5b672
--- /dev/null
+++ b/tesseract/src/textord/colfind.h
@@ -0,0 +1,366 @@
+///////////////////////////////////////////////////////////////////////
+// File:        colfind.h
+// Description: Class to find columns in the grid of BLOBNBOXes.
+// Author:      Ray Smith
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TEXTORD_COLFIND_H_
+#define TESSERACT_TEXTORD_COLFIND_H_
+
+#include "colpartitiongrid.h"
+#include "colpartitionset.h"
+#include "debugpixa.h"
+#include "imagefind.h"
+#include "ocrblock.h"
+#include "tabfind.h"
+#include "textlineprojection.h"
+
+class BLOCK_LIST;
+struct Boxa;
+struct Pixa;
+class DENORM;
+class ScrollView;
+class STATS;
+class TO_BLOCK;
+
+namespace tesseract {
+
+class ColPartitionSet;
+class ColPartitionSet_LIST;
+class ColSegment_LIST;
+class ColumnGroup_LIST;
+class LineSpacing;
+class StrokeWidth;
+class TempColumn_LIST;
+class EquationDetectBase;
+
+// The ColumnFinder class finds columns in the grid.
+class TESS_API ColumnFinder : public TabFind {
+ public:
+  // Gridsize is an estimate of the text size in the image. A suitable value
+  // is in TO_BLOCK::line_size after find_components has been used to make
+  // the blobs.
+  // bleft and tright are the bounds of the image (rectangle) being processed.
+  // vlines is a (possibly empty) list of TabVector and vertical_x and y are
+  // the sum logical vertical vector produced by LineFinder::FindVerticalLines.
+  // If cjk_script is true, then broken CJK characters are fixed during
+  // layout analysis to assist in detecting horizontal vs vertically written
+  // textlines.
+  ColumnFinder(int gridsize, const ICOORD& bleft, const ICOORD& tright,
+               int resolution, bool cjk_script, double aligned_gap_fraction,
+               TabVector_LIST* vlines, TabVector_LIST* hlines,
+               int vertical_x, int vertical_y);
+  ~ColumnFinder() override;
+
+  // Accessors for testing
+  const DENORM* denorm() const {
+    return denorm_;
+  }
+  const TextlineProjection* projection() const {
+    return &projection_;
+  }
+  void set_cjk_script(bool is_cjk) {
+    cjk_script_ = is_cjk;
+  }
+
+  // ======================================================================
+  // The main function of ColumnFinder is broken into pieces to facilitate
+  // optional insertion of orientation and script detection in an efficient
+  // way. The calling sequence IS MANDATORY however, whether or not
+  // OSD is being used:
+  // 1. Construction.
+  // 2. SetupAndFilterNoise.
+  // 3. IsVerticallyAlignedText.
+  // 4. CorrectOrientation.
+  // 5. FindBlocks.
+  // 6. Destruction. Use of a single column finder for multiple images does not
+  //    make sense.
+  // Throughout these steps, the ColPartitions are owned by part_grid_, which
+  // means that that it must be kept correct. Exception: big_parts_ owns its
+  // own ColPartitions.
+  // The BLOBNBOXes are owned by the input TO_BLOCK for the whole time, except
+  // for a phase in FindBlocks before TransformToBlocks, when they become
+  // owned by the ColPartitions. The owner() ColPartition of a BLOBNBOX
+  // indicates more of a betrothal for the majority of layout analysis, ie
+  // which ColPartition will take ownership when the blobs are release from
+  // the input TO_BLOCK. Exception: image_bblobs_ owns the fake blobs that
+  // are part of the image regions, as they are not on any TO_BLOCK list.
+  // TODO(rays) break up column finder further into smaller classes, as
+  // there is a lot more to it than column finding now.
+  // ======================================================================
+
+  // Performs initial processing on the blobs in the input_block:
+  // Setup the part_grid, stroke_width_, nontext_map_.
+  // Obvious noise blobs are filtered out and used to mark the nontext_map_.
+  // Initial stroke-width analysis is used to get local text alignment
+  // direction, so the textline projection_ map can be setup.
+  // On return, IsVerticallyAlignedText may be called (now optionally) to
+  // determine the gross textline alignment of the page.
+  void SetupAndFilterNoise(PageSegMode pageseg_mode, Pix* photo_mask_pix,
+                           TO_BLOCK* input_block);
+
+  // Tests for vertical alignment of text (returning true if so), and generates
+  // a list of blobs (in osd_blobs) for orientation and script detection.
+  // block is the single block for the whole page or rectangle to be OCRed.
+  // Note that the vertical alignment may be due to text whose writing direction
+  // is vertical, like say Japanese, or due to text whose writing direction is
+  // horizontal but whose text appears vertically aligned because the image is
+  // not the right way up.
+  // find_vertical_text_ratio should be textord_tabfind_vertical_text_ratio.
+  bool IsVerticallyAlignedText(double find_vertical_text_ratio,
+                               TO_BLOCK* block, BLOBNBOX_CLIST* osd_blobs);
+
+  // Rotates the blobs and the TabVectors so that the gross writing direction
+  // (text lines) are horizontal and lines are read down the page.
+  // Applied rotation stored in rotation_.
+  // A second rotation is calculated for application during recognition to
+  // make the rotated blobs upright for recognition.
+  // Subsequent rotation stored in text_rotation_.
+  //
+  // Arguments:
+  //   vertical_text_lines is true if the text lines are vertical.
+  //   recognition_rotation [0..3] is the number of anti-clockwise 90 degree
+  //   rotations from osd required for the text to be upright and readable.
+  void CorrectOrientation(TO_BLOCK* block, bool vertical_text_lines,
+                          int recognition_rotation);
+
+  // Finds blocks of text, image, rule line, table etc, returning them in the
+  // blocks and to_blocks
+  // (Each TO_BLOCK points to the basic BLOCK and adds more information.)
+  // Image blocks are generated by a combination of photo_mask_pix (which may
+  // NOT be nullptr) and the rejected text found during preliminary textline
+  // finding.
+  // The input_block is the result of a call to find_components, and contains
+  // the blobs found in the image or rectangle to be OCRed. These blobs will be
+  // removed and placed in the output blocks, while unused ones will be deleted.
+  // If single_column is true, the input is treated as single column, but
+  // it is still divided into blocks of equal line spacing/text size.
+  // scaled_color is scaled down by scaled_factor from the input color image,
+  // and may be nullptr if the input was not color.
+  // grey_pix is optional, but if present must match the photo_mask_pix in size,
+  // and must be a *real* grey image instead of binary_pix * 255.
+  // thresholds_pix is expected to be present iff grey_pix is present and
+  // can be an integer factor reduction of the grey_pix. It represents the
+  // thresholds that were used to create the binary_pix from the grey_pix.
+  // Small blobs that confuse the segmentation into lines are placed into
+  // diacritic_blobs, with the intention that they be put into the most
+  // appropriate word after the rest of layout analysis.
+  // Returns -1 if the user hits the 'd' key in the blocks window while running
+  // in debug mode, which requests a retry with more debug info.
+  int FindBlocks(PageSegMode pageseg_mode, Pix* scaled_color, int scaled_factor,
+                 TO_BLOCK* block, Pix* photo_mask_pix, Pix* thresholds_pix,
+                 Pix* grey_pix, DebugPixa* pixa_debug, BLOCK_LIST* blocks,
+                 BLOBNBOX_LIST* diacritic_blobs, TO_BLOCK_LIST* to_blocks);
+
+  // Get the rotation required to deskew, and its inverse rotation.
+  void GetDeskewVectors(FCOORD* deskew, FCOORD* reskew);
+
+  // Set the equation detection pointer.
+  void SetEquationDetect(EquationDetectBase* detect);
+
+ private:
+  // Displays the blob and block bounding boxes in a window called Blocks.
+  void DisplayBlocks(BLOCK_LIST* blocks);
+  // Displays the column edges at each grid y coordinate defined by
+  // best_columns_.
+  void DisplayColumnBounds(PartSetVector* sets);
+
+  ////// Functions involved in determining the columns used on the page. /////
+
+  // Sets up column_sets_ (the determined column layout at each horizontal
+  // slice). Returns false if the page is empty.
+  bool MakeColumns(bool single_column);
+  // Attempt to improve the column_candidates by expanding the columns
+  // and adding new partitions from the partition sets in src_sets.
+  // Src_sets may be equal to column_candidates, in which case it will
+  // use them as a source to improve themselves.
+  void ImproveColumnCandidates(PartSetVector* src_sets,
+                               PartSetVector* column_sets);
+  // Prints debug information on the column candidates.
+  void PrintColumnCandidates(const char* title);
+  // Finds the optimal set of columns that cover the entire image with as
+  // few changes in column partition as possible.
+  // Returns true if any part of the page is multi-column.
+  bool AssignColumns(const PartSetVector& part_sets);
+  // Finds the biggest range in part_sets_ that has no assigned column, but
+  // column assignment is possible.
+  bool BiggestUnassignedRange(int set_count, const bool* any_columns_possible,
+                              int* start, int* end);
+  // Finds the modal compatible column_set_ index within the given range.
+  int RangeModalColumnSet(int** column_set_costs, const int* assigned_costs,
+                          int start, int end);
+  // Given that there are many column_set_id compatible columns in the range,
+  // shrinks the range to the longest contiguous run of compatibility, allowing
+  // gaps where no columns are possible, but not where competing columns are
+  // possible.
+  void ShrinkRangeToLongestRun(int** column_set_costs,
+                               const int* assigned_costs,
+                               const bool* any_columns_possible,
+                               int column_set_id,
+                               int* best_start, int* best_end);
+  // Moves start in the direction of step, up to, but not including end while
+  // the only incompatible regions are no more than kMaxIncompatibleColumnCount
+  // in size, and the compatible regions beyond are bigger.
+  void ExtendRangePastSmallGaps(int** column_set_costs,
+                                const int* assigned_costs,
+                                const bool* any_columns_possible,
+                                int column_set_id,
+                                int step, int end, int* start);
+  // Assigns the given column_set_id to the part_sets_ in the given range.
+  void AssignColumnToRange(int column_set_id, int start, int end,
+                           int** column_set_costs, int* assigned_costs);
+
+  // Computes the mean_column_gap_.
+  void ComputeMeanColumnGap(bool any_multi_column);
+
+  //////// Functions that manipulate ColPartitions in the part_grid_ /////
+  //////// to split, merge, find margins, and find types.  //////////////
+
+  // Hoovers up all un-owned blobs and deletes them.
+  // The rest get released from the block so the ColPartitions can pass
+  // ownership to the output blocks.
+  void ReleaseBlobsAndCleanupUnused(TO_BLOCK* block);
+  // Splits partitions that cross columns where they have nothing in the gap.
+  void GridSplitPartitions();
+  // Merges partitions where there is vertical overlap, within a single column,
+  // and the horizontal gap is small enough.
+  void GridMergePartitions();
+  // Inserts remaining noise blobs into the most applicable partition if any.
+  // If there is no applicable partition, then the blobs are deleted.
+  void InsertRemainingNoise(TO_BLOCK* block);
+  // Remove partitions that come from horizontal lines that look like
+  // underlines, but are not part of a table.
+  void GridRemoveUnderlinePartitions();
+  // Add horizontal line separators as partitions.
+  void GridInsertHLinePartitions();
+  // Add vertical line separators as partitions.
+  void GridInsertVLinePartitions();
+  // For every ColPartition in the grid, sets its type based on position
+  // in the columns.
+  void SetPartitionTypes();
+  // Only images remain with multiple types in a run of partners.
+  // Sets the type of all in the group to the maximum of the group.
+  void SmoothPartnerRuns();
+
+  //////// Functions that make the final output blocks             ///////
+
+  // Helper functions for TransformToBlocks.
+  // Add the part to the temp list in the correct order.
+  void AddToTempPartList(ColPartition* part, ColPartition_CLIST* temp_list);
+  // Add everything from the temp list to the work_set assuming correct order.
+  void EmptyTempPartList(ColPartition_CLIST* temp_list,
+                         WorkingPartSet_LIST* work_set);
+
+  // Transform the grid of partitions to the output blocks.
+  void TransformToBlocks(BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks);
+
+  // Reflect the blob boxes (but not the outlines) in the y-axis so that
+  // the blocks get created in the correct RTL order. Rotates the blobs
+  // in the input_block and the bblobs list.
+  // The reflection is undone in RotateAndReskewBlocks by
+  // reflecting the blocks themselves, and then recomputing the blob bounding
+  //  boxes.
+  void ReflectForRtl(TO_BLOCK* input_block, BLOBNBOX_LIST* bblobs);
+
+  // Undo the deskew that was done in FindTabVectors, as recognition is done
+  // without correcting blobs or blob outlines for skew.
+  // Reskew the completed blocks to put them back to the original rotated coords
+  // that were created by CorrectOrientation.
+  // If the input_is_rtl, then reflect the blocks in the y-axis to undo the
+  // reflection that was done before FindTabVectors.
+  // Blocks that were identified as vertical text (relative to the rotated
+  // coordinates) are further rotated so the text lines are horizontal.
+  // blob polygonal outlines are rotated to match the position of the blocks
+  // that they are in, and their bounding boxes are recalculated to be accurate.
+  // Record appropriate inverse transformations and required
+  // classifier transformation in the blocks.
+  void RotateAndReskewBlocks(bool input_is_rtl, TO_BLOCK_LIST* to_blocks);
+
+  // Computes the rotations for the block (to make textlines horizontal) and
+  // for the blobs (for classification) and sets the appropriate members
+  // of the given block.
+  // Returns the rotation that needs to be applied to the blobs to make
+  // them sit in the rotated block.
+  FCOORD ComputeBlockAndClassifyRotation(BLOCK* block);
+
+  // If true then the page language is cjk, so it is safe to perform
+  // FixBrokenCJK.
+  bool cjk_script_;
+  // The minimum gutter width to apply for finding columns.
+  // Modified when vertical text is detected to prevent detection of
+  // vertical text lines as columns.
+  int min_gutter_width_;
+  // The mean gap between columns over the page.
+  int mean_column_gap_;
+  // Config param saved at construction time. Modifies min_gutter_width_ with
+  // vertical text to prevent detection of vertical text as columns.
+  double tabfind_aligned_gap_fraction_;
+  // The rotation vector needed to convert original coords to deskewed.
+  FCOORD deskew_;
+  // The rotation vector needed to convert deskewed back to original coords.
+  FCOORD reskew_;
+  // The rotation vector used to rotate vertically oriented pages.
+  FCOORD rotation_;
+  // The rotation vector needed to convert the rotated back to original coords.
+  FCOORD rerotate_;
+  // The additional rotation vector needed to rotate text for recognition.
+  FCOORD text_rotation_;
+  // The column_sets_ contain the ordered candidate ColPartitionSets that
+  // define the possible divisions of the page into columns.
+  PartSetVector column_sets_;
+  // A simple array of pointers to the best assigned column division at
+  // each grid y coordinate.
+  ColPartitionSet** best_columns_;
+  // The grid used for creating initial partitions with strokewidth.
+  StrokeWidth* stroke_width_;
+  // The grid used to hold ColPartitions after the columns have been determined.
+  ColPartitionGrid part_grid_;
+  // List of ColPartitions that are no longer needed after they have been
+  // turned into regions, but are kept around because they are referenced
+  // by the part_grid_.
+  ColPartition_LIST good_parts_;
+  // List of ColPartitions that are big and might be dropcap or vertically
+  // joined.
+  ColPartition_LIST big_parts_;
+  // List of ColPartitions that have been declared noise.
+  ColPartition_LIST noise_parts_;
+  // The fake blobs that are made from the images.
+  BLOBNBOX_LIST image_bblobs_;
+  // Horizontal line separators.
+  TabVector_LIST horizontal_lines_;
+  // Image map of photo/noise areas on the page.
+  Pix* nontext_map_;
+  // Textline projection map.
+  TextlineProjection projection_;
+  // Sequence of DENORMS that indicate how to get back to the original image
+  // coordinate space. The destructor must delete all the DENORMs in the chain.
+  DENORM* denorm_;
+
+  // Various debug windows that automatically go away on completion.
+  ScrollView* input_blobs_win_;
+
+  // The equation region detector pointer. Note: This pointer is passed in by
+  // member function SetEquationDetect, and releasing it is NOT owned by this
+  // class.
+  EquationDetectBase* equation_detect_;
+
+  // Allow a subsequent instance to reuse the blocks window.
+  // Not thread-safe, but multiple threads shouldn't be using windows anyway.
+  static ScrollView* blocks_win_;
+};
+
+}  // namespace tesseract.
+
+#endif  // TESSERACT_TEXTORD_COLFIND_H_
diff --git a/tesseract/src/textord/colpartition.cpp b/tesseract/src/textord/colpartition.cpp
new file mode 100644
index 00000000..6dcdda74
--- /dev/null
+++ b/tesseract/src/textord/colpartition.cpp
@@ -0,0 +1,2597 @@
+///////////////////////////////////////////////////////////////////////
+// File:        colpartition.cpp
+// Description: Class to hold partitions of the page that correspond
+//              roughly to text lines.
+// Author:      Ray Smith
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "colpartition.h"
+#include "colpartitiongrid.h"
+#include "colpartitionset.h"
+#include "detlinefit.h"
+#include "dppoint.h"
+#include "imagefind.h"
+#include "workingpartset.h"
+#include "host.h"              // for NearlyEqual
+
+#include <algorithm>
+
+namespace tesseract {
+
+ELIST2IZE(ColPartition)
+CLISTIZE(ColPartition)
+
+//////////////// ColPartition Implementation ////////////////
+
+// enum to refer to the entries in a neighbourhood of lines.
+// Used by SmoothSpacings to test for blips with OKSpacingBlip.
+enum SpacingNeighbourhood {
+  PN_ABOVE2,
+  PN_ABOVE1,
+  PN_UPPER,
+  PN_LOWER,
+  PN_BELOW1,
+  PN_BELOW2,
+  PN_COUNT
+};
+
+// Maximum change in spacing (in inches) to ignore.
+const double kMaxSpacingDrift = 1.0 / 72;  // 1/72 is one point.
+// Maximum fraction of line height used as an additional allowance
+// for top spacing.
+const double kMaxTopSpacingFraction = 0.25;
+// What multiple of the largest line height should be used as an upper bound
+// for whether lines are in the same text block?
+const double kMaxSameBlockLineSpacing = 3;
+// Maximum ratio of sizes for lines to be considered the same size.
+const double kMaxSizeRatio = 1.5;
+// Fraction of max of leader width and gap for max IQR of gaps.
+const double kMaxLeaderGapFractionOfMax = 0.25;
+// Fraction of min of leader width and gap for max IQR of gaps.
+const double kMaxLeaderGapFractionOfMin = 0.5;
+// Minimum number of blobs to be considered a leader.
+const int kMinLeaderCount = 5;
+// Minimum score for a STRONG_CHAIN textline.
+const int kMinStrongTextValue = 6;
+// Minimum score for a CHAIN textline.
+const int kMinChainTextValue = 3;
+// Minimum number of blobs for strong horizontal text lines.
+const int kHorzStrongTextlineCount = 8;
+// Minimum height (in image pixels) for strong horizontal text lines.
+const int kHorzStrongTextlineHeight = 10;
+// Minimum aspect ratio for strong horizontal text lines.
+const int kHorzStrongTextlineAspect = 5;
+// Maximum upper quartile error allowed on a baseline fit as a fraction
+// of height.
+const double kMaxBaselineError = 0.4375;
+// Min coverage for a good baseline between vectors
+const double kMinBaselineCoverage = 0.5;
+// Max RMS color noise to compare colors.
+const int kMaxRMSColorNoise = 128;
+// Maximum distance to allow a partition color to be to use that partition
+// in smoothing neighbouring types. This is a squared distance.
+const int kMaxColorDistance = 900;
+
+// blob_type is the blob_region_type_ of the blobs in this partition.
+// Vertical is the direction of logical vertical on the possibly skewed image.
+ColPartition::ColPartition(BlobRegionType blob_type, const ICOORD& vertical)
+  : left_margin_(-INT32_MAX), right_margin_(INT32_MAX),
+    median_bottom_(INT32_MAX), median_top_(-INT32_MAX),
+    median_left_(INT32_MAX), median_right_(-INT32_MAX),
+    blob_type_(blob_type),
+    vertical_(vertical) {
+  memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
+}
+
+// Constructs a fake ColPartition with a single fake BLOBNBOX, all made
+// from a single TBOX.
+// WARNING: Despite being on C_LISTs, the BLOBNBOX owns the C_BLOB and
+// the ColPartition owns the BLOBNBOX!!!
+// Call DeleteBoxes before deleting the ColPartition.
+ColPartition* ColPartition::FakePartition(const TBOX& box,
+                                          PolyBlockType block_type,
+                                          BlobRegionType blob_type,
+                                          BlobTextFlowType flow) {
+  ColPartition* part = new ColPartition(blob_type, ICOORD(0, 1));
+  part->set_type(block_type);
+  part->set_flow(flow);
+  part->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(box)));
+  part->set_left_margin(box.left());
+  part->set_right_margin(box.right());
+  part->SetBlobTypes();
+  part->ComputeLimits();
+  part->ClaimBoxes();
+  return part;
+}
+
+// Constructs and returns a ColPartition with the given real BLOBNBOX,
+// and sets it up to be a "big" partition (single-blob partition bigger
+// than the surrounding text that may be a dropcap, two or more vertically
+// touching characters, or some graphic element.
+// If the given list is not nullptr, the partition is also added to the list.
+ColPartition* ColPartition::MakeBigPartition(BLOBNBOX* box,
+                                             ColPartition_LIST* big_part_list) {
+  box->set_owner(nullptr);
+  ColPartition* single = new ColPartition(BRT_UNKNOWN, ICOORD(0, 1));
+  single->set_flow(BTFT_NONE);
+  single->AddBox(box);
+  single->ComputeLimits();
+  single->ClaimBoxes();
+  single->SetBlobTypes();
+  single->set_block_owned(true);
+  if (big_part_list != nullptr) {
+    ColPartition_IT part_it(big_part_list);
+    part_it.add_to_end(single);
+  }
+  return single;
+}
+
+ColPartition::~ColPartition() {
+  // Remove this as a partner of all partners, as we don't want them
+  // referring to a deleted object.
+  ColPartition_C_IT it(&upper_partners_);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    it.data()->RemovePartner(false, this);
+  }
+  it.set_to_list(&lower_partners_);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    it.data()->RemovePartner(true, this);
+  }
+}
+
+// Constructs a fake ColPartition with no BLOBNBOXes to represent a
+// horizontal or vertical line, given a type and a bounding box.
+ColPartition* ColPartition::MakeLinePartition(BlobRegionType blob_type,
+                                              const ICOORD& vertical,
+                                              int left, int bottom,
+                                              int right, int top) {
+  auto* part = new ColPartition(blob_type, vertical);
+  part->bounding_box_ = TBOX(left, bottom, right, top);
+  part->median_bottom_ = bottom;
+  part->median_top_ = top;
+  part->median_height_ = top - bottom;
+  part->median_left_ = left;
+  part->median_right_ = right;
+  part->median_width_ = right - left;
+  part->left_key_ = part->BoxLeftKey();
+  part->right_key_ = part->BoxRightKey();
+  return part;
+}
+
+
+// Adds the given box to the partition, updating the partition bounds.
+// The list of boxes in the partition is updated, ensuring that no box is
+// recorded twice, and the boxes are kept in increasing left position.
+void ColPartition::AddBox(BLOBNBOX* bbox) {
+  TBOX box = bbox->bounding_box();
+  // Update the partition limits.
+  if (boxes_.length() == 0) {
+    bounding_box_ = box;
+  } else {
+    bounding_box_ += box;
+  }
+
+  if (IsVerticalType()) {
+    if (!last_add_was_vertical_) {
+      boxes_.sort(SortByBoxBottom<BLOBNBOX>);
+      last_add_was_vertical_ = true;
+    }
+    boxes_.add_sorted(SortByBoxBottom<BLOBNBOX>, true, bbox);
+  } else {
+    if (last_add_was_vertical_) {
+      boxes_.sort(SortByBoxLeft<BLOBNBOX>);
+      last_add_was_vertical_ = false;
+    }
+    boxes_.add_sorted(SortByBoxLeft<BLOBNBOX>, true, bbox);
+  }
+  if (!left_key_tab_)
+    left_key_ = BoxLeftKey();
+  if (!right_key_tab_)
+    right_key_ = BoxRightKey();
+  if (TabFind::WithinTestRegion(2, box.left(), box.bottom()))
+    tprintf("Added box (%d,%d)->(%d,%d) left_blob_x_=%d, right_blob_x_ = %d\n",
+            box.left(), box.bottom(), box.right(), box.top(),
+            bounding_box_.left(), bounding_box_.right());
+}
+
+// Removes the given box from the partition, updating the bounds.
+void ColPartition::RemoveBox(BLOBNBOX* box) {
+  BLOBNBOX_C_IT bb_it(&boxes_);
+  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
+    if (box == bb_it.data()) {
+      bb_it.extract();
+      ComputeLimits();
+      return;
+    }
+  }
+}
+
+// Returns the tallest box in the partition, as measured perpendicular to the
+// presumed flow of text.
+BLOBNBOX* ColPartition::BiggestBox() {
+  BLOBNBOX* biggest = nullptr;
+  BLOBNBOX_C_IT bb_it(&boxes_);
+  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
+    BLOBNBOX* bbox = bb_it.data();
+    if (IsVerticalType()) {
+      if (biggest == nullptr ||
+          bbox->bounding_box().width() > biggest->bounding_box().width())
+        biggest = bbox;
+    } else {
+      if (biggest == nullptr ||
+          bbox->bounding_box().height() > biggest->bounding_box().height())
+        biggest = bbox;
+    }
+  }
+  return biggest;
+}
+
+// Returns the bounding box excluding the given box.
+TBOX ColPartition::BoundsWithoutBox(BLOBNBOX* box) {
+  TBOX result;
+  BLOBNBOX_C_IT bb_it(&boxes_);
+  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
+    if (box != bb_it.data()) {
+      result += bb_it.data()->bounding_box();
+    }
+  }
+  return result;
+}
+
+// Claims the boxes in the boxes_list by marking them with a this owner
+// pointer. If a box is already owned, then it must be owned by this.
+void ColPartition::ClaimBoxes() {
+  BLOBNBOX_C_IT bb_it(&boxes_);
+  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
+    BLOBNBOX* bblob = bb_it.data();
+    ColPartition* other = bblob->owner();
+    if (other == nullptr) {
+      // Normal case: ownership is available.
+      bblob->set_owner(this);
+    } else {
+      ASSERT_HOST(other == this);
+    }
+  }
+}
+
+// nullptr the owner of the blobs in this partition, so they can be deleted
+// independently of the ColPartition.
+void ColPartition::DisownBoxes() {
+  BLOBNBOX_C_IT bb_it(&boxes_);
+  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
+    BLOBNBOX* bblob = bb_it.data();
+    ASSERT_HOST(bblob->owner() == this || bblob->owner() == nullptr);
+    bblob->set_owner(nullptr);
+  }
+}
+
+// nullptr the owner of the blobs in this partition that are owned by this
+// partition, so they can be deleted independently of the ColPartition.
+// Any blobs that are not owned by this partition get to keep their owner
+// without an assert failure.
+void ColPartition::DisownBoxesNoAssert() {
+  BLOBNBOX_C_IT bb_it(&boxes_);
+  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
+    BLOBNBOX* bblob = bb_it.data();
+    if (bblob->owner() == this)
+      bblob->set_owner(nullptr);
+  }
+}
+
+// Nulls the owner of the blobs in this partition that are owned by this
+// partition and not leader blobs, removing them from the boxes_ list, thus
+// turning this partition back to a leader partition if it contains a leader,
+// or otherwise leaving it empty. Returns true if any boxes remain.
+bool ColPartition::ReleaseNonLeaderBoxes() {
+  BLOBNBOX_C_IT bb_it(&boxes_);
+  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
+    BLOBNBOX* bblob = bb_it.data();
+    if (bblob->flow() != BTFT_LEADER) {
+      if (bblob->owner() == this) bblob->set_owner(nullptr);
+      bb_it.extract();
+    }
+  }
+  if (bb_it.empty()) return false;
+  flow_ = BTFT_LEADER;
+  ComputeLimits();
+  return true;
+}
+
+// Delete the boxes that this partition owns.
+void ColPartition::DeleteBoxes() {
+  // Although the boxes_ list is a C_LIST, in some cases it owns the
+  // BLOBNBOXes, as the ColPartition takes ownership from the grid,
+  // and the BLOBNBOXes own the underlying C_BLOBs.
+  for (BLOBNBOX_C_IT bb_it(&boxes_); !bb_it.empty(); bb_it.forward()) {
+    BLOBNBOX* bblob = bb_it.extract();
+    delete bblob->cblob();
+    delete bblob;
+  }
+}
+
+// Reflects the partition in the y-axis, assuming that its blobs have
+// already been done. Corrects only a limited part of the members, since
+// this function is assumed to be used shortly after initial creation, which
+// is before a lot of the members are used.
+void ColPartition::ReflectInYAxis() {
+  BLOBNBOX_CLIST reversed_boxes;
+  BLOBNBOX_C_IT reversed_it(&reversed_boxes);
+  // Reverse the order of the boxes_.
+  BLOBNBOX_C_IT bb_it(&boxes_);
+  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
+    reversed_it.add_before_then_move(bb_it.extract());
+  }
+  bb_it.add_list_after(&reversed_boxes);
+  ASSERT_HOST(!left_key_tab_ && !right_key_tab_);
+  int tmp = left_margin_;
+  left_margin_ = -right_margin_;
+  right_margin_ = -tmp;
+  ComputeLimits();
+}
+
+// Returns true if this is a legal partition - meaning that the conditions
+// left_margin <= bounding_box left
+// left_key <= bounding box left key
+// bounding box left <= bounding box right
+// and likewise for right margin and key
+// are all met.
+bool ColPartition::IsLegal() {
+  if (bounding_box_.left() > bounding_box_.right()) {
+    if (textord_debug_bugs) {
+      tprintf("Bounding box invalid\n");
+      Print();
+    }
+    return false;  // Bounding box invalid.
+  }
+  if (left_margin_ > bounding_box_.left() ||
+      right_margin_ < bounding_box_.right()) {
+    if (textord_debug_bugs) {
+      tprintf("Margins invalid\n");
+      Print();
+    }
+    return false;  // Margins invalid.
+  }
+  if (left_key_ > BoxLeftKey() || right_key_ < BoxRightKey()) {
+    if (textord_debug_bugs) {
+      tprintf("Key inside box: %d v %d or %d v %d\n",
+              left_key_, BoxLeftKey(), right_key_, BoxRightKey());
+      Print();
+    }
+    return false;  // Keys inside the box.
+  }
+  return true;
+}
+
+// Returns true if the left and right edges are approximately equal.
+bool ColPartition::MatchingColumns(const ColPartition& other) const {
+  int y = (MidY() + other.MidY()) / 2;
+  if (!NearlyEqual(other.LeftAtY(y) / kColumnWidthFactor,
+                   LeftAtY(y) / kColumnWidthFactor, 1))
+    return false;
+  if (!NearlyEqual(other.RightAtY(y) / kColumnWidthFactor,
+                   RightAtY(y) / kColumnWidthFactor, 1))
+    return false;
+  return true;
+}
+
+// Returns true if the colors match for two text partitions.
+bool ColPartition::MatchingTextColor(const ColPartition& other) const {
+  if (color1_[L_ALPHA_CHANNEL] > kMaxRMSColorNoise &&
+      other.color1_[L_ALPHA_CHANNEL] > kMaxRMSColorNoise)
+    return false;  // Too noisy.
+
+  // Colors must match for other to count.
+  double d_this1_o = ImageFind::ColorDistanceFromLine(other.color1_,
+                                                      other.color2_,
+                                                      color1_);
+  double d_this2_o = ImageFind::ColorDistanceFromLine(other.color1_,
+                                                      other.color2_,
+                                                      color2_);
+  double d_o1_this = ImageFind::ColorDistanceFromLine(color1_, color2_,
+                                                      other.color1_);
+  double d_o2_this = ImageFind::ColorDistanceFromLine(color1_, color2_,
+                                                      other.color2_);
+// All 4 distances must be small enough.
+  return d_this1_o < kMaxColorDistance && d_this2_o < kMaxColorDistance &&
+         d_o1_this < kMaxColorDistance && d_o2_this < kMaxColorDistance;
+}
+
+// Returns true if the sizes match for two text partitions,
+// taking orientation into account. See also SizesSimilar.
+bool ColPartition::MatchingSizes(const ColPartition& other) const {
+  if (blob_type_ == BRT_VERT_TEXT || other.blob_type_ == BRT_VERT_TEXT)
+    return !TabFind::DifferentSizes(median_width_, other.median_width_);
+  else
+    return !TabFind::DifferentSizes(median_height_, other.median_height_);
+}
+
+// Returns true if there is no tabstop violation in merging this and other.
+bool ColPartition::ConfirmNoTabViolation(const ColPartition& other) const {
+  if (bounding_box_.right() < other.bounding_box_.left() &&
+      bounding_box_.right() < other.LeftBlobRule())
+    return false;
+  if (other.bounding_box_.right() < bounding_box_.left() &&
+      other.bounding_box_.right() < LeftBlobRule())
+    return false;
+  if (bounding_box_.left() > other.bounding_box_.right() &&
+      bounding_box_.left() > other.RightBlobRule())
+    return false;
+  if (other.bounding_box_.left() > bounding_box_.right() &&
+      other.bounding_box_.left() > RightBlobRule())
+    return false;
+  return true;
+}
+
+// Returns true if other has a similar stroke width to this.
+bool ColPartition::MatchingStrokeWidth(const ColPartition& other,
+                                       double fractional_tolerance,
+                                       double constant_tolerance) const {
+  int match_count = 0;
+  int nonmatch_count = 0;
+  BLOBNBOX_C_IT box_it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
+  BLOBNBOX_C_IT other_it(const_cast<BLOBNBOX_CLIST*>(&other.boxes_));
+  box_it.mark_cycle_pt();
+  other_it.mark_cycle_pt();
+  while (!box_it.cycled_list() && !other_it.cycled_list()) {
+    if (box_it.data()->MatchingStrokeWidth(*other_it.data(),
+                                           fractional_tolerance,
+                                           constant_tolerance))
+      ++match_count;
+    else
+      ++nonmatch_count;
+    box_it.forward();
+    other_it.forward();
+  }
+  return match_count > nonmatch_count;
+}
+
+// Returns true if base is an acceptable diacritic base char merge
+// with this as the diacritic.
+// Returns true if:
+// (1) this is a ColPartition containing only diacritics, and
+// (2) the base characters indicated on the diacritics all believably lie
+// within the text line of the candidate ColPartition.
+bool ColPartition::OKDiacriticMerge(const ColPartition& candidate,
+                                    bool debug) const {
+  BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
+  int min_top = INT32_MAX;
+  int max_bottom = -INT32_MAX;
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    BLOBNBOX* blob = it.data();
+    if (!blob->IsDiacritic()) {
+      if (debug) {
+        tprintf("Blob is not a diacritic:");
+        blob->bounding_box().print();
+      }
+      return false;  // All blobs must have diacritic bases.
+    }
+    if (blob->base_char_top() < min_top)
+      min_top = blob->base_char_top();
+    if (blob->base_char_bottom() > max_bottom)
+      max_bottom = blob->base_char_bottom();
+  }
+  // If the intersection of all vertical ranges of all base characters
+  // overlaps the median range of this, then it is OK.
+  bool result = min_top > candidate.median_bottom_ &&
+                max_bottom < candidate.median_top_;
+  if (debug) {
+    if (result)
+      tprintf("OKDiacritic!\n");
+    else
+      tprintf("y ranges don\'t overlap: %d-%d / %d-%d\n",
+              max_bottom, min_top, median_bottom_, median_top_);
+  }
+  return result;
+}
+
+// Sets the sort key using either the tab vector, or the bounding box if
+// the tab vector is nullptr. If the tab_vector lies inside the bounding_box,
+// use the edge of the box as a key any way.
+void ColPartition::SetLeftTab(const TabVector* tab_vector) {
+  if (tab_vector != nullptr) {
+    left_key_ = tab_vector->sort_key();
+    left_key_tab_ = left_key_ <= BoxLeftKey();
+  } else {
+    left_key_tab_ = false;
+  }
+  if (!left_key_tab_)
+    left_key_ = BoxLeftKey();
+}
+
+// As SetLeftTab, but with the right.
+void ColPartition::SetRightTab(const TabVector* tab_vector) {
+  if (tab_vector != nullptr) {
+    right_key_ = tab_vector->sort_key();
+    right_key_tab_ = right_key_ >= BoxRightKey();
+  } else {
+    right_key_tab_ = false;
+  }
+  if (!right_key_tab_)
+    right_key_ = BoxRightKey();
+}
+
+// Copies the left/right tab from the src partition, but if take_box is
+// true, copies the box instead and uses that as a key.
+void ColPartition::CopyLeftTab(const ColPartition& src, bool take_box) {
+  left_key_tab_ = take_box ? false : src.left_key_tab_;
+  if (left_key_tab_) {
+    left_key_ = src.left_key_;
+  } else {
+    bounding_box_.set_left(XAtY(src.BoxLeftKey(), MidY()));
+    left_key_ = BoxLeftKey();
+  }
+  if (left_margin_ > bounding_box_.left())
+    left_margin_ = src.left_margin_;
+}
+
+// As CopyLeftTab, but with the right.
+void ColPartition::CopyRightTab(const ColPartition& src, bool take_box) {
+  right_key_tab_ = take_box ? false : src.right_key_tab_;
+  if (right_key_tab_) {
+    right_key_ = src.right_key_;
+  } else {
+    bounding_box_.set_right(XAtY(src.BoxRightKey(), MidY()));
+    right_key_ = BoxRightKey();
+  }
+  if (right_margin_ < bounding_box_.right())
+    right_margin_ = src.right_margin_;
+}
+
+// Returns the left rule line x coord of the leftmost blob.
+int ColPartition::LeftBlobRule() const {
+  BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
+  return it.data()->left_rule();
+}
+// Returns the right rule line x coord of the rightmost blob.
+int ColPartition::RightBlobRule() const {
+  BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
+  it.move_to_last();
+  return it.data()->right_rule();
+}
+
+float ColPartition::SpecialBlobsDensity(const BlobSpecialTextType type) const {
+  ASSERT_HOST(type < BSTT_COUNT);
+  return special_blobs_densities_[type];
+}
+
+int ColPartition::SpecialBlobsCount(const BlobSpecialTextType type) {
+  ASSERT_HOST(type < BSTT_COUNT);
+  BLOBNBOX_C_IT blob_it(&boxes_);
+  int count = 0;
+  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+    BLOBNBOX* blob = blob_it.data();
+    BlobSpecialTextType blob_type = blob->special_text_type();
+    if (blob_type == type) {
+      count++;
+    }
+  }
+
+  return count;
+}
+
+void ColPartition::SetSpecialBlobsDensity(
+    const BlobSpecialTextType type, const float density) {
+  ASSERT_HOST(type < BSTT_COUNT);
+  special_blobs_densities_[type] = density;
+}
+
+void ColPartition::ComputeSpecialBlobsDensity() {
+  memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
+  if (boxes_.empty()) {
+    return;
+  }
+
+  BLOBNBOX_C_IT blob_it(&boxes_);
+  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+    BLOBNBOX* blob = blob_it.data();
+    BlobSpecialTextType type = blob->special_text_type();
+    special_blobs_densities_[type]++;
+  }
+
+  for (float& special_blobs_density : special_blobs_densities_) {
+    special_blobs_density /= boxes_.length();
+  }
+}
+
+// Add a partner above if upper, otherwise below.
+// Add them uniquely and keep the list sorted by box left.
+// Partnerships are added symmetrically to partner and this.
+void ColPartition::AddPartner(bool upper, ColPartition* partner) {
+  if (upper) {
+    partner->lower_partners_.add_sorted(SortByBoxLeft<ColPartition>,
+                                        true, this);
+    upper_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, partner);
+  } else {
+    partner->upper_partners_.add_sorted(SortByBoxLeft<ColPartition>,
+                                        true, this);
+    lower_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, partner);
+  }
+}
+
+// Removes the partner from this, but does not remove this from partner.
+// This asymmetric removal is so as not to mess up the iterator that is
+// working on partner's partner list.
+void ColPartition::RemovePartner(bool upper, ColPartition* partner) {
+  ColPartition_C_IT it(upper ? &upper_partners_ : &lower_partners_);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    if (it.data() == partner) {
+      it.extract();
+      break;
+    }
+  }
+}
+
+// Returns the partner if the given partner is a singleton, otherwise nullptr.
+ColPartition* ColPartition::SingletonPartner(bool upper) {
+  ColPartition_CLIST* partners = upper ? &upper_partners_ : &lower_partners_;
+  if (!partners->singleton())
+    return nullptr;
+  ColPartition_C_IT it(partners);
+  return it.data();
+}
+
+// Merge with the other partition and delete it.
+void ColPartition::Absorb(ColPartition* other, WidthCallback cb) {
+  // The result has to either own all of the blobs or none of them.
+  // Verify the flag is consistent.
+  ASSERT_HOST(owns_blobs() == other->owns_blobs());
+  // TODO(nbeato): check owns_blobs better. Right now owns_blobs
+  // should always be true when this is called. So there is no issues.
+  if (TabFind::WithinTestRegion(2, bounding_box_.left(),
+                                bounding_box_.bottom()) ||
+      TabFind::WithinTestRegion(2, other->bounding_box_.left(),
+                                other->bounding_box_.bottom())) {
+    tprintf("Merging:");
+    Print();
+    other->Print();
+  }
+
+  // Update the special_blobs_densities_.
+  memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
+  for (int type = 0; type < BSTT_COUNT; ++type) {
+    unsigned w1 = boxes_.length();
+    unsigned w2 = other->boxes_.length();
+    float new_val = special_blobs_densities_[type] * w1 +
+        other->special_blobs_densities_[type] * w2;
+    if (!w1 || !w2) {
+      ASSERT_HOST((w1 + w2) > 0);
+      special_blobs_densities_[type] = new_val / (w1 + w2);
+    }
+  }
+
+  // Merge the two sorted lists.
+  BLOBNBOX_C_IT it(&boxes_);
+  BLOBNBOX_C_IT it2(&other->boxes_);
+  for (; !it2.empty(); it2.forward()) {
+    BLOBNBOX* bbox2 = it2.extract();
+    ColPartition* prev_owner = bbox2->owner();
+    if (prev_owner != other && prev_owner != nullptr) {
+      // A blob on other's list is owned by someone else; let them have it.
+      continue;
+    }
+    ASSERT_HOST(prev_owner == other || prev_owner == nullptr);
+    if (prev_owner == other)
+      bbox2->set_owner(this);
+    it.add_to_end(bbox2);
+  }
+  left_margin_ = std::min(left_margin_, other->left_margin_);
+  right_margin_ = std::max(right_margin_, other->right_margin_);
+  if (other->left_key_ < left_key_) {
+    left_key_ = other->left_key_;
+    left_key_tab_ = other->left_key_tab_;
+  }
+  if (other->right_key_ > right_key_) {
+    right_key_ = other->right_key_;
+    right_key_tab_ = other->right_key_tab_;
+  }
+  // Combine the flow and blob_type in a sensible way.
+  // Dominant flows stay.
+  if (!DominatesInMerge(flow_, other->flow_)) {
+    flow_ = other->flow_;
+    blob_type_ = other->blob_type_;
+  }
+  SetBlobTypes();
+  if (IsVerticalType()) {
+    boxes_.sort(SortByBoxBottom<BLOBNBOX>);
+    last_add_was_vertical_ = true;
+  } else {
+    boxes_.sort(SortByBoxLeft<BLOBNBOX>);
+    last_add_was_vertical_ = false;
+  }
+  ComputeLimits();
+  // Fix partner lists. other is going away, so remove it as a
+  // partner of all its partners and add this in its place.
+  for (int upper = 0; upper < 2; ++upper) {
+    ColPartition_CLIST partners;
+    ColPartition_C_IT part_it(&partners);
+    part_it.add_list_after(upper ? &other->upper_partners_
+                                 : &other->lower_partners_);
+    for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
+      ColPartition* partner = part_it.extract();
+      partner->RemovePartner(!upper, other);
+      partner->RemovePartner(!upper, this);
+      partner->AddPartner(!upper, this);
+    }
+  }
+  delete other;
+  if (cb != nullptr) {
+    SetColumnGoodness(cb);
+  }
+}
+
+// Merge1 and merge2 are candidates to be merged, yet their combined box
+// overlaps this. Is that allowed?
+// Returns true if the overlap between this and the merged pair of
+// merge candidates is sufficiently trivial to be allowed.
+// The merged box can graze the edge of this by the ok_box_overlap
+// if that exceeds the margin to the median top and bottom.
+// ok_box_overlap should be set by the caller appropriate to the sizes of
+// the text involved, and is usually a fraction of the median size of merge1
+// and/or merge2, or this.
+// TODO(rays) Determine whether vertical text needs to be considered.
+bool ColPartition::OKMergeOverlap(const ColPartition& merge1,
+                                  const ColPartition& merge2,
+                                  int ok_box_overlap, bool debug) {
+  // Vertical partitions are not allowed to be involved.
+  if (IsVerticalType() || merge1.IsVerticalType() || merge2.IsVerticalType()) {
+    if (debug)
+      tprintf("Vertical partition\n");
+    return false;
+  }
+  // The merging partitions must strongly overlap each other.
+  if (!merge1.VSignificantCoreOverlap(merge2)) {
+    if (debug)
+      tprintf("Voverlap %d (%d)\n",
+              merge1.VCoreOverlap(merge2),
+              merge1.VSignificantCoreOverlap(merge2));
+    return false;
+  }
+  // The merged box must not overlap the median bounds of this.
+  TBOX merged_box(merge1.bounding_box());
+  merged_box += merge2.bounding_box();
+  if (merged_box.bottom() < median_top_ && merged_box.top() > median_bottom_ &&
+      merged_box.bottom() < bounding_box_.top() - ok_box_overlap &&
+      merged_box.top() > bounding_box_.bottom() + ok_box_overlap) {
+    if (debug)
+      tprintf("Excessive box overlap\n");
+    return false;
+  }
+  // Looks OK!
+  return true;
+}
+
+// Find the blob at which to split this to minimize the overlap with the
+// given box. Returns the first blob to go in the second partition.
+BLOBNBOX* ColPartition::OverlapSplitBlob(const TBOX& box) {
+  if (boxes_.empty() || boxes_.singleton())
+    return nullptr;
+  BLOBNBOX_C_IT it(&boxes_);
+  TBOX left_box(it.data()->bounding_box());
+  for (it.forward(); !it.at_first(); it.forward()) {
+    BLOBNBOX* bbox = it.data();
+    left_box += bbox->bounding_box();
+    if (left_box.overlap(box))
+      return bbox;
+  }
+  return nullptr;
+}
+
+// Split this partition keeping the first half in this and returning
+// the second half.
+// Splits by putting the split_blob and the blobs that follow
+// in the second half, and the rest in the first half.
+ColPartition* ColPartition::SplitAtBlob(BLOBNBOX* split_blob) {
+  ColPartition* split_part = ShallowCopy();
+  split_part->set_owns_blobs(owns_blobs());
+  BLOBNBOX_C_IT it(&boxes_);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    BLOBNBOX* bbox = it.data();
+    ColPartition* prev_owner = bbox->owner();
+    ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == nullptr);
+    if (bbox == split_blob || !split_part->boxes_.empty()) {
+      split_part->AddBox(it.extract());
+      if (owns_blobs() && prev_owner != nullptr)
+        bbox->set_owner(split_part);
+    }
+  }
+  ASSERT_HOST(!it.empty());
+  if (split_part->IsEmpty()) {
+    // Split part ended up with nothing. Possible if split_blob is not
+    // in the list of blobs.
+    delete split_part;
+    return nullptr;
+  }
+  right_key_tab_ = false;
+  split_part->left_key_tab_ = false;
+  ComputeLimits();
+  // TODO(nbeato) Merge Ray's CL like this:
+  // if (owns_blobs())
+  //  SetBlobTextlineGoodness();
+  split_part->ComputeLimits();
+  // TODO(nbeato) Merge Ray's CL like this:
+  // if (split_part->owns_blobs())
+  //   split_part->SetBlobTextlineGoodness();
+  return split_part;
+}
+
+// Split this partition at the given x coordinate, returning the right
+// half and keeping the left half in this.
+ColPartition* ColPartition::SplitAt(int split_x) {
+  if (split_x <= bounding_box_.left() || split_x >= bounding_box_.right())
+    return nullptr;  // There will be no change.
+  ColPartition* split_part = ShallowCopy();
+  split_part->set_owns_blobs(owns_blobs());
+  BLOBNBOX_C_IT it(&boxes_);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    BLOBNBOX* bbox = it.data();
+    ColPartition* prev_owner = bbox->owner();
+    ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == nullptr);
+    const TBOX& box = bbox->bounding_box();
+    if (box.left() >= split_x) {
+      split_part->AddBox(it.extract());
+      if (owns_blobs() && prev_owner != nullptr)
+        bbox->set_owner(split_part);
+    }
+  }
+  if (it.empty()) {
+    // Possible if split-x passes through the first blob.
+    it.add_list_after(&split_part->boxes_);
+  }
+  ASSERT_HOST(!it.empty());
+  if (split_part->IsEmpty()) {
+    // Split part ended up with nothing. Possible if split_x passes
+    // through the last blob.
+    delete split_part;
+    return nullptr;
+  }
+  right_key_tab_ = false;
+  split_part->left_key_tab_ = false;
+  right_margin_ = split_x;
+  split_part->left_margin_ = split_x;
+  ComputeLimits();
+  split_part->ComputeLimits();
+  return split_part;
+}
+
+// Recalculates all the coordinate limits of the partition.
+void ColPartition::ComputeLimits() {
+  bounding_box_ = TBOX();  // Clear it
+  BLOBNBOX_C_IT it(&boxes_);
+  BLOBNBOX* bbox = nullptr;
+  int non_leader_count = 0;
+  if (it.empty()) {
+    bounding_box_.set_left(left_margin_);
+    bounding_box_.set_right(right_margin_);
+    bounding_box_.set_bottom(0);
+    bounding_box_.set_top(0);
+  } else {
+    for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+      bbox = it.data();
+      bounding_box_ += bbox->bounding_box();
+      if (bbox->flow() != BTFT_LEADER)
+        ++non_leader_count;
+    }
+  }
+  if (!left_key_tab_)
+    left_key_ = BoxLeftKey();
+  if (left_key_ > BoxLeftKey() && textord_debug_bugs) {
+    // TODO(rays) investigate the causes of these error messages, to find
+    // out if they are genuinely harmful, or just indicative of junk input.
+    tprintf("Computed left-illegal partition\n");
+    Print();
+  }
+  if (!right_key_tab_)
+    right_key_ = BoxRightKey();
+  if (right_key_ < BoxRightKey() && textord_debug_bugs) {
+    tprintf("Computed right-illegal partition\n");
+    Print();
+  }
+  if (it.empty())
+    return;
+  if (IsImageType() || blob_type() == BRT_RECTIMAGE ||
+      blob_type() == BRT_POLYIMAGE) {
+    median_top_ = bounding_box_.top();
+    median_bottom_ = bounding_box_.bottom();
+    median_height_ = bounding_box_.height();
+    median_left_ = bounding_box_.left();
+    median_right_ = bounding_box_.right();
+    median_width_ = bounding_box_.width();
+  } else {
+    STATS top_stats(bounding_box_.bottom(), bounding_box_.top() + 1);
+    STATS bottom_stats(bounding_box_.bottom(), bounding_box_.top() + 1);
+    STATS height_stats(0, bounding_box_.height() + 1);
+    STATS left_stats(bounding_box_.left(), bounding_box_.right() + 1);
+    STATS right_stats(bounding_box_.left(), bounding_box_.right() + 1);
+    STATS width_stats(0, bounding_box_.width() + 1);
+    for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+      bbox = it.data();
+      if (non_leader_count == 0 || bbox->flow() != BTFT_LEADER) {
+        const TBOX& box = bbox->bounding_box();
+        int area = box.area();
+        top_stats.add(box.top(), area);
+        bottom_stats.add(box.bottom(), area);
+        height_stats.add(box.height(), area);
+        left_stats.add(box.left(), area);
+        right_stats.add(box.right(), area);
+        width_stats.add(box.width(), area);
+      }
+    }
+    median_top_ = static_cast<int>(top_stats.median() + 0.5);
+    median_bottom_ = static_cast<int>(bottom_stats.median() + 0.5);
+    median_height_ = static_cast<int>(height_stats.median() + 0.5);
+    median_left_ = static_cast<int>(left_stats.median() + 0.5);
+    median_right_ = static_cast<int>(right_stats.median() + 0.5);
+    median_width_ = static_cast<int>(width_stats.median() + 0.5);
+  }
+
+  if (right_margin_ < bounding_box_.right() && textord_debug_bugs) {
+    tprintf("Made partition with bad right coords, %d < %d\n",
+            right_margin_, bounding_box_.right());
+    Print();
+  }
+  if (left_margin_ > bounding_box_.left() && textord_debug_bugs) {
+    tprintf("Made partition with bad left coords, %d > %d\n",
+            left_margin_, bounding_box_.left());
+    Print();
+  }
+  // Fix partner lists. The bounding box has changed and partners are stored
+  // in bounding box order, so remove and reinsert this as a partner
+  // of all its partners.
+  for (int upper = 0; upper < 2; ++upper) {
+    ColPartition_CLIST partners;
+    ColPartition_C_IT part_it(&partners);
+    part_it.add_list_after(upper ? &upper_partners_ : &lower_partners_);
+    for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
+      ColPartition* partner = part_it.extract();
+      partner->RemovePartner(!upper, this);
+      partner->AddPartner(!upper, this);
+    }
+  }
+  if (TabFind::WithinTestRegion(2, bounding_box_.left(),
+                                bounding_box_.bottom())) {
+    tprintf("Recomputed box for partition %p\n", this);
+    Print();
+  }
+}
+
+// Returns the number of boxes that overlap the given box.
+int ColPartition::CountOverlappingBoxes(const TBOX& box) {
+  BLOBNBOX_C_IT it(&boxes_);
+  int overlap_count = 0;
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    BLOBNBOX* bbox = it.data();
+    if (box.overlap(bbox->bounding_box()))
+      ++overlap_count;
+  }
+  return overlap_count;
+}
+
+// Computes and sets the type_ and first_column_, last_column_ and column_set_.
+// resolution refers to the ppi resolution of the image.
+void ColPartition::SetPartitionType(int resolution, ColPartitionSet* columns) {
+  int first_spanned_col = -1;
+  ColumnSpanningType span_type =
+      columns->SpanningType(resolution,
+                            bounding_box_.left(), bounding_box_.right(),
+                            std::min(bounding_box_.height(), bounding_box_.width()),
+                            MidY(), left_margin_, right_margin_,
+                            &first_column_, &last_column_,
+                            &first_spanned_col);
+  column_set_ = columns;
+  if (first_column_ < last_column_ && span_type == CST_PULLOUT &&
+      !IsLineType()) {
+    // Unequal columns may indicate that the pullout spans one of the columns
+    // it lies in, so force it to be allocated to just that column.
+    if (first_spanned_col >= 0) {
+      first_column_ = first_spanned_col;
+      last_column_ = first_spanned_col;
+    } else {
+      if ((first_column_ & 1) == 0)
+        last_column_ = first_column_;
+      else if ((last_column_ & 1) == 0)
+        first_column_ = last_column_;
+      else
+        first_column_ = last_column_ = (first_column_ + last_column_) / 2;
+    }
+  }
+  type_ = PartitionType(span_type);
+}
+
+// Returns the PartitionType from the current BlobRegionType and a column
+// flow spanning type ColumnSpanningType, generated by
+// ColPartitionSet::SpanningType, that indicates how the partition sits
+// in the columns.
+PolyBlockType ColPartition::PartitionType(ColumnSpanningType flow) const {
+  if (flow == CST_NOISE) {
+    if (blob_type_ != BRT_HLINE && blob_type_ != BRT_VLINE &&
+        blob_type_ != BRT_RECTIMAGE && blob_type_ != BRT_VERT_TEXT)
+      return PT_NOISE;
+    flow = CST_FLOWING;
+  }
+
+  switch (blob_type_) {
+    case BRT_NOISE:
+      return PT_NOISE;
+    case BRT_HLINE:
+      return PT_HORZ_LINE;
+    case BRT_VLINE:
+      return PT_VERT_LINE;
+    case BRT_RECTIMAGE:
+    case BRT_POLYIMAGE:
+      switch (flow) {
+        case CST_FLOWING:
+          return PT_FLOWING_IMAGE;
+        case CST_HEADING:
+          return PT_HEADING_IMAGE;
+        case CST_PULLOUT:
+          return PT_PULLOUT_IMAGE;
+        default:
+          ASSERT_HOST(!"Undefined flow type for image!");
+      }
+      break;
+    case BRT_VERT_TEXT:
+      return PT_VERTICAL_TEXT;
+    case BRT_TEXT:
+    case BRT_UNKNOWN:
+    default:
+      switch (flow) {
+        case CST_FLOWING:
+          return PT_FLOWING_TEXT;
+        case CST_HEADING:
+          return PT_HEADING_TEXT;
+        case CST_PULLOUT:
+          return PT_PULLOUT_TEXT;
+        default:
+          ASSERT_HOST(!"Undefined flow type for text!");
+      }
+  }
+  ASSERT_HOST(!"Should never get here!");
+  return PT_NOISE;
+}
+
+// Returns the first and last column touched by this partition.
+// resolution refers to the ppi resolution of the image.
+void ColPartition::ColumnRange(int resolution, ColPartitionSet* columns,
+                               int* first_col, int* last_col) {
+  int first_spanned_col = -1;
+  ColumnSpanningType span_type =
+      columns->SpanningType(resolution,
+                            bounding_box_.left(), bounding_box_.right(),
+                            std::min(bounding_box_.height(), bounding_box_.width()),
+                            MidY(), left_margin_, right_margin_,
+                            first_col, last_col,
+                            &first_spanned_col);
+  type_ = PartitionType(span_type);
+}
+
+// Sets the internal flags good_width_ and good_column_.
+void ColPartition::SetColumnGoodness(WidthCallback cb) {
+  int y = MidY();
+  int width = RightAtY(y) - LeftAtY(y);
+  good_width_ = cb(width);
+  good_column_ = blob_type_ == BRT_TEXT && left_key_tab_ && right_key_tab_;
+}
+
+// Determines whether the blobs in this partition mostly represent
+// a leader (fixed pitch sequence) and sets the member blobs accordingly.
+// Note that height is assumed to have been tested elsewhere, and that this
+// function will find most fixed-pitch text as leader without a height filter.
+// Leader detection is limited to sequences of identical width objects,
+// such as .... or ----, so patterns, such as .-.-.-.-. will not be found.
+bool ColPartition::MarkAsLeaderIfMonospaced() {
+  bool result = false;
+  // Gather statistics on the gaps between blobs and the widths of the blobs.
+  int part_width = bounding_box_.width();
+  STATS gap_stats(0, part_width);
+  STATS width_stats(0, part_width);
+  BLOBNBOX_C_IT it(&boxes_);
+  BLOBNBOX* prev_blob = it.data();
+  prev_blob->set_flow(BTFT_NEIGHBOURS);
+  width_stats.add(prev_blob->bounding_box().width(), 1);
+  int blob_count = 1;
+  for (it.forward(); !it.at_first(); it.forward()) {
+    BLOBNBOX* blob = it.data();
+    int left = blob->bounding_box().left();
+    int right = blob->bounding_box().right();
+    gap_stats.add(left - prev_blob->bounding_box().right(), 1);
+    width_stats.add(right - left, 1);
+    blob->set_flow(BTFT_NEIGHBOURS);
+    prev_blob = blob;
+    ++blob_count;
+  }
+  double median_gap = gap_stats.median();
+  double median_width = width_stats.median();
+  double max_width = std::max(median_gap, median_width);
+  double min_width = std::min(median_gap, median_width);
+  double gap_iqr = gap_stats.ile(0.75f) - gap_stats.ile(0.25f);
+  if (textord_debug_tabfind >= 4) {
+    tprintf("gap iqr = %g, blob_count=%d, limits=%g,%g\n",
+            gap_iqr, blob_count, max_width * kMaxLeaderGapFractionOfMax,
+            min_width * kMaxLeaderGapFractionOfMin);
+  }
+  if (gap_iqr < max_width * kMaxLeaderGapFractionOfMax &&
+      gap_iqr < min_width * kMaxLeaderGapFractionOfMin &&
+      blob_count >= kMinLeaderCount) {
+    // This is stable enough to be called a leader, so check the widths.
+    // Since leader dashes can join, run a dp cutting algorithm and go
+    // on the cost.
+    int offset = static_cast<int>(ceil(gap_iqr * 2));
+    int min_step = static_cast<int>(median_gap + median_width + 0.5);
+    int max_step = min_step + offset;
+    min_step -= offset;
+    // Pad the buffer with min_step/2 on each end.
+    int part_left = bounding_box_.left() - min_step / 2;
+    part_width += min_step;
+    auto* projection = new DPPoint[part_width];
+    for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+      BLOBNBOX* blob = it.data();
+      int left = blob->bounding_box().left();
+      int right = blob->bounding_box().right();
+      int height = blob->bounding_box().height();
+      for (int x = left; x < right; ++x) {
+        projection[left - part_left].AddLocalCost(height);
+      }
+    }
+    DPPoint* best_end = DPPoint::Solve(min_step, max_step, false,
+                                       &DPPoint::CostWithVariance,
+                                       part_width, projection);
+    if (best_end != nullptr && best_end->total_cost() < blob_count) {
+      // Good enough. Call it a leader.
+      result = true;
+      bool modified_blob_list = false;
+      for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+        BLOBNBOX* blob = it.data();
+        // If the first or last blob is spaced too much, don't mark it.
+        if (it.at_first()) {
+          int gap = it.data_relative(1)->bounding_box().left() -
+                     blob->bounding_box().right();
+          if (blob->bounding_box().width() + gap > max_step) {
+            it.extract();
+            modified_blob_list = true;
+            continue;
+          }
+        }
+        if (it.at_last()) {
+          int gap = blob->bounding_box().left() -
+                     it.data_relative(-1)->bounding_box().right();
+          if (blob->bounding_box().width() + gap > max_step) {
+            it.extract();
+            modified_blob_list = true;
+            break;
+          }
+        }
+        blob->set_region_type(BRT_TEXT);
+        blob->set_flow(BTFT_LEADER);
+      }
+      if (modified_blob_list) ComputeLimits();
+      blob_type_ = BRT_TEXT;
+      flow_ = BTFT_LEADER;
+    } else if (textord_debug_tabfind) {
+      if (best_end == nullptr) {
+        tprintf("No path\n");
+      } else {
+        tprintf("Total cost = %d vs allowed %d\n", best_end->total_cost(),
+                blob_count);
+      }
+    }
+    delete [] projection;
+  }
+  return result;
+}
+
+// Given the result of TextlineProjection::EvaluateColPartition, (positive for
+// horizontal text, negative for vertical text, and near zero for non-text),
+// sets the blob_type_ and flow_ for this partition to indicate whether it
+// is strongly or weakly vertical or horizontal text, or non-text.
+// The function assumes that the blob neighbours are valid (from
+// StrokeWidth::SetNeighbours) and that those neighbours have their
+// region_type() set.
+void ColPartition::SetRegionAndFlowTypesFromProjectionValue(int value) {
+  int blob_count = 0;        // Total # blobs.
+  int good_blob_score_ = 0;  // Total # good strokewidth neighbours.
+  int noisy_count = 0;       // Total # neighbours marked as noise.
+  int hline_count = 0;
+  int vline_count = 0;
+  BLOBNBOX_C_IT it(&boxes_);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    BLOBNBOX* blob = it.data();
+    ++blob_count;
+    noisy_count += blob->NoisyNeighbours();
+    good_blob_score_ += blob->GoodTextBlob();
+    if (blob->region_type() == BRT_HLINE) ++hline_count;
+    if (blob->region_type() == BRT_VLINE) ++vline_count;
+  }
+  flow_ = BTFT_NEIGHBOURS;
+  blob_type_ = BRT_UNKNOWN;
+  if (hline_count > vline_count) {
+    flow_ = BTFT_NONE;
+    blob_type_ = BRT_HLINE;
+  } else if (vline_count > hline_count) {
+    flow_ = BTFT_NONE;
+    blob_type_ = BRT_VLINE;
+  } else if (value < -1 || 1 < value) {
+    int long_side;
+    int short_side;
+    if (value > 0) {
+      long_side = bounding_box_.width();
+      short_side = bounding_box_.height();
+      blob_type_ = BRT_TEXT;
+    } else {
+      long_side = bounding_box_.height();
+      short_side = bounding_box_.width();
+      blob_type_ = BRT_VERT_TEXT;
+    }
+    // We will combine the old metrics using aspect ratio and blob counts
+    // with the input value by allowing a strong indication to flip the
+    // STRONG_CHAIN/CHAIN flow values.
+    int strong_score = blob_count >= kHorzStrongTextlineCount ? 1 : 0;
+    if (short_side > kHorzStrongTextlineHeight) ++strong_score;
+    if (short_side * kHorzStrongTextlineAspect < long_side) ++strong_score;
+    if (abs(value) >= kMinStrongTextValue)
+      flow_ = BTFT_STRONG_CHAIN;
+    else if (abs(value) >= kMinChainTextValue)
+      flow_ = BTFT_CHAIN;
+    else
+      flow_ = BTFT_NEIGHBOURS;
+    // Upgrade chain to strong chain if the other indicators are good
+    if (flow_ == BTFT_CHAIN && strong_score == 3)
+      flow_ = BTFT_STRONG_CHAIN;
+    // Downgrade strong vertical text to chain if the indicators are bad.
+    if (flow_ == BTFT_STRONG_CHAIN && value < 0 && strong_score < 2)
+      flow_ = BTFT_CHAIN;
+  }
+  if (flow_ == BTFT_NEIGHBOURS) {
+    // Check for noisy neighbours.
+    if (noisy_count >= blob_count) {
+      flow_ = BTFT_NONTEXT;
+      blob_type_= BRT_NOISE;
+    }
+  }
+  if (TabFind::WithinTestRegion(2, bounding_box_.left(),
+                                bounding_box_.bottom())) {
+    tprintf("RegionFlowTypesFromProjectionValue count=%d, noisy=%d, score=%d,",
+            blob_count, noisy_count, good_blob_score_);
+    tprintf(" Projection value=%d, flow=%d, blob_type=%d\n",
+            value, flow_, blob_type_);
+    Print();
+  }
+  SetBlobTypes();
+}
+
+// Sets all blobs with the partition blob type and flow, but never overwrite
+// leader blobs, as we need to be able to identify them later.
+void ColPartition::SetBlobTypes() {
+  if (!owns_blobs())
+    return;
+  BLOBNBOX_C_IT it(&boxes_);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    BLOBNBOX* blob = it.data();
+    if (blob->flow() != BTFT_LEADER)
+      blob->set_flow(flow_);
+    blob->set_region_type(blob_type_);
+    ASSERT_HOST(blob->owner() == nullptr || blob->owner() == this);
+  }
+}
+
+// Returns true if a decent baseline can be fitted through the blobs.
+// Works for both horizontal and vertical text.
+bool ColPartition::HasGoodBaseline() {
+  // Approximation of the baseline.
+  DetLineFit linepoints;
+  // Calculation of the mean height on this line segment. Note that these
+  // variable names apply to the context of a horizontal line, and work
+  // analogously, rather than literally in the case of a vertical line.
+  int total_height = 0;
+  int coverage = 0;
+  int height_count = 0;
+  int width = 0;
+  BLOBNBOX_C_IT it(&boxes_);
+  TBOX box(it.data()->bounding_box());
+  // Accumulate points representing the baseline at the middle of each blob,
+  // but add an additional point for each end of the line. This makes it
+  // harder to fit a severe skew angle, as it is most likely not right.
+  if (IsVerticalType()) {
+    // For a vertical line, use the right side as the baseline.
+    ICOORD first_pt(box.right(), box.bottom());
+    // Use the bottom-right of the first (bottom) box, the top-right of the
+    // last, and the middle-right of all others.
+    linepoints.Add(first_pt);
+    for (it.forward(); !it.at_last(); it.forward()) {
+      BLOBNBOX* blob = it.data();
+      box = blob->bounding_box();
+      ICOORD box_pt(box.right(), (box.top() + box.bottom()) / 2);
+      linepoints.Add(box_pt);
+      total_height += box.width();
+      coverage += box.height();
+      ++height_count;
+    }
+    box = it.data()->bounding_box();
+    ICOORD last_pt(box.right(), box.top());
+    linepoints.Add(last_pt);
+    width = last_pt.y() - first_pt.y();
+
+  } else {
+    // Horizontal lines use the bottom as the baseline.
+    TBOX box(it.data()->bounding_box());
+    // Use the bottom-left of the first box, the the bottom-right of the last,
+    // and the middle of all others.
+    ICOORD first_pt(box.left(), box.bottom());
+    linepoints.Add(first_pt);
+    for (it.forward(); !it.at_last(); it.forward()) {
+      BLOBNBOX* blob = it.data();
+      box = blob->bounding_box();
+      ICOORD box_pt((box.left() + box.right()) / 2, box.bottom());
+      linepoints.Add(box_pt);
+      total_height += box.height();
+      coverage += box.width();
+      ++height_count;
+    }
+    box = it.data()->bounding_box();
+    ICOORD last_pt(box.right(), box.bottom());
+    linepoints.Add(last_pt);
+    width = last_pt.x() - first_pt.x();
+  }
+  // Maximum median error allowed to be a good text line.
+  if (height_count == 0)
+    return false;
+  double max_error = kMaxBaselineError * total_height / height_count;
+  ICOORD start_pt, end_pt;
+  double error = linepoints.Fit(&start_pt, &end_pt);
+  return error < max_error && coverage >= kMinBaselineCoverage * width;
+}
+
+// Adds this ColPartition to a matching WorkingPartSet if one can be found,
+// otherwise starts a new one in the appropriate column, ending the previous.
+void ColPartition::AddToWorkingSet(const ICOORD& bleft, const ICOORD& tright,
+                                   int resolution,
+                                   ColPartition_LIST* used_parts,
+                                   WorkingPartSet_LIST* working_sets) {
+  if (block_owned_)
+    return;  // Done it already.
+  block_owned_ = true;
+  WorkingPartSet_IT it(working_sets);
+  // If there is an upper partner use its working_set_ directly.
+  ColPartition* partner = SingletonPartner(true);
+  if (partner != nullptr && partner->working_set_ != nullptr) {
+    working_set_ = partner->working_set_;
+    working_set_->AddPartition(this);
+    return;
+  }
+  if (partner != nullptr && textord_debug_bugs) {
+    tprintf("Partition with partner has no working set!:");
+    Print();
+    partner->Print();
+  }
+  // Search for the column that the left edge fits in.
+  WorkingPartSet* work_set = nullptr;
+  it.move_to_first();
+  int col_index = 0;
+  for (it.mark_cycle_pt(); !it.cycled_list() &&
+       col_index != first_column_;
+        it.forward(), ++col_index);
+  if (textord_debug_tabfind >= 2) {
+    tprintf("Match is %s for:", (col_index & 1) ? "Real" : "Between");
+    Print();
+  }
+  if (it.cycled_list() && textord_debug_bugs) {
+    tprintf("Target column=%d, only had %d\n", first_column_, col_index);
+  }
+  ASSERT_HOST(!it.cycled_list());
+  work_set = it.data();
+  // If last_column_ != first_column, then we need to scoop up all blocks
+  // between here and the last_column_ and put back in work_set.
+  if (!it.cycled_list() && last_column_ != first_column_ && !IsPulloutType()) {
+    // Find the column that the right edge falls in.
+    BLOCK_LIST completed_blocks;
+    TO_BLOCK_LIST to_blocks;
+    for (; !it.cycled_list() && col_index <= last_column_;
+         it.forward(), ++col_index) {
+      WorkingPartSet* end_set = it.data();
+      end_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts,
+                                      &completed_blocks, &to_blocks);
+    }
+    work_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
+  }
+  working_set_ = work_set;
+  work_set->AddPartition(this);
+}
+
+// From the given block_parts list, builds one or more BLOCKs and
+// corresponding TO_BLOCKs, such that the line spacing is uniform in each.
+// Created blocks are appended to the end of completed_blocks and to_blocks.
+// The used partitions are put onto used_parts, as they may still be referred
+// to in the partition grid. bleft, tright and resolution are the bounds
+// and resolution of the original image.
+void ColPartition::LineSpacingBlocks(const ICOORD& bleft, const ICOORD& tright,
+                                     int resolution,
+                                     ColPartition_LIST* block_parts,
+                                     ColPartition_LIST* used_parts,
+                                     BLOCK_LIST* completed_blocks,
+                                     TO_BLOCK_LIST* to_blocks) {
+  int page_height = tright.y() - bleft.y();
+  // Compute the initial spacing stats.
+  ColPartition_IT it(block_parts);
+  int part_count = 0;
+  int max_line_height = 0;
+
+  // TODO(joeliu): We should add some special logic for PT_INLINE_EQUATION type
+  // because their line spacing with their neighbors maybe smaller and their
+  // height may be slightly larger.
+
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    ColPartition* part = it.data();
+    ASSERT_HOST(!part->boxes()->empty());
+    STATS side_steps(0, part->bounding_box().height());
+    if (part->bounding_box().height() > max_line_height)
+      max_line_height = part->bounding_box().height();
+    BLOBNBOX_C_IT blob_it(part->boxes());
+    int prev_bottom = blob_it.data()->bounding_box().bottom();
+    for (blob_it.forward(); !blob_it.at_first(); blob_it.forward()) {
+      BLOBNBOX* blob = blob_it.data();
+      int bottom = blob->bounding_box().bottom();
+      int step = bottom - prev_bottom;
+      if (step < 0)
+        step = -step;
+      side_steps.add(step, 1);
+      prev_bottom = bottom;
+    }
+    part->set_side_step(static_cast<int>(side_steps.median() + 0.5));
+    if (!it.at_last()) {
+      ColPartition* next_part = it.data_relative(1);
+      part->set_bottom_spacing(part->median_bottom() -
+                               next_part->median_bottom());
+      part->set_top_spacing(part->median_top() - next_part->median_top());
+    } else {
+      part->set_bottom_spacing(page_height);
+      part->set_top_spacing(page_height);
+    }
+    if (textord_debug_tabfind) {
+      part->Print();
+      tprintf("side step = %.2f, top spacing = %d, bottom spacing=%d\n",
+              side_steps.median(), part->top_spacing(), part->bottom_spacing());
+    }
+    ++part_count;
+  }
+  if (part_count == 0)
+    return;
+
+  SmoothSpacings(resolution, page_height, block_parts);
+
+  // Move the partitions into individual block lists and make the blocks.
+  BLOCK_IT block_it(completed_blocks);
+  TO_BLOCK_IT to_block_it(to_blocks);
+  ColPartition_LIST spacing_parts;
+  ColPartition_IT sp_block_it(&spacing_parts);
+  int same_block_threshold = max_line_height * kMaxSameBlockLineSpacing;
+  for (it.mark_cycle_pt(); !it.empty();) {
+    ColPartition* part = it.extract();
+    sp_block_it.add_to_end(part);
+    it.forward();
+    if (it.empty() || part->bottom_spacing() > same_block_threshold ||
+        !part->SpacingsEqual(*it.data(), resolution)) {
+      // There is a spacing boundary. Check to see if it.data() belongs
+      // better in the current block or the next one.
+      if (!it.empty() && part->bottom_spacing() <= same_block_threshold) {
+        ColPartition* next_part = it.data();
+        // If there is a size match one-way, then the middle line goes with
+        // its matched size, otherwise it goes with the smallest spacing.
+        ColPartition* third_part = it.at_last() ? nullptr : it.data_relative(1);
+        if (textord_debug_tabfind) {
+          tprintf("Spacings unequal: upper:%d/%d, lower:%d/%d,"
+                  " sizes %d %d %d\n",
+                  part->top_spacing(), part->bottom_spacing(),
+                  next_part->top_spacing(), next_part->bottom_spacing(),
+                  part->median_height(), next_part->median_height(),
+                  third_part != nullptr ? third_part->median_height() : 0);
+        }
+        // We can only consider adding the next line to the block if the sizes
+        // match and the lines are close enough for their size.
+        if (part->SizesSimilar(*next_part) &&
+            next_part->median_height() * kMaxSameBlockLineSpacing >
+                part->bottom_spacing() &&
+            part->median_height() * kMaxSameBlockLineSpacing >
+                part->top_spacing()) {
+          // Even now, we can only add it as long as the third line doesn't
+          // match in the same way and have a smaller bottom spacing.
+          if (third_part == nullptr ||
+              !next_part->SizesSimilar(*third_part) ||
+              third_part->median_height() * kMaxSameBlockLineSpacing <=
+                  next_part->bottom_spacing() ||
+              next_part->median_height() * kMaxSameBlockLineSpacing <=
+                  next_part->top_spacing() ||
+                  next_part->bottom_spacing() > part->bottom_spacing()) {
+            // Add to the current block.
+            sp_block_it.add_to_end(it.extract());
+            it.forward();
+            if (textord_debug_tabfind) {
+              tprintf("Added line to current block.\n");
+            }
+          }
+        }
+      }
+      TO_BLOCK* to_block = MakeBlock(bleft, tright, &spacing_parts, used_parts);
+      if (to_block != nullptr) {
+        to_block_it.add_to_end(to_block);
+        block_it.add_to_end(to_block->block);
+      }
+      sp_block_it.set_to_list(&spacing_parts);
+    } else {
+      if (textord_debug_tabfind && !it.empty()) {
+        ColPartition* next_part = it.data();
+        tprintf("Spacings equal: upper:%d/%d, lower:%d/%d, median:%d/%d\n",
+                part->top_spacing(), part->bottom_spacing(),
+                next_part->top_spacing(), next_part->bottom_spacing(),
+                part->median_height(), next_part->median_height());
+      }
+    }
+  }
+}
+
+// Helper function to clip the input pos to the given bleft, tright bounds.
+static void ClipCoord(const ICOORD& bleft, const ICOORD& tright, ICOORD* pos) {
+  if (pos->x() < bleft.x())
+    pos->set_x(bleft.x());
+  if (pos->x() > tright.x())
+    pos->set_x(tright.x());
+  if (pos->y() < bleft.y())
+    pos->set_y(bleft.y());
+  if (pos->y() > tright.y())
+    pos->set_y(tright.y());
+}
+
+// Helper moves the blobs from the given list of block_parts into the block
+// itself. Sets up the block for (old) textline formation correctly for
+// vertical and horizontal text. The partitions are moved to used_parts
+// afterwards, as they cannot be deleted yet.
+static TO_BLOCK* MoveBlobsToBlock(bool vertical_text, int line_spacing,
+                                  BLOCK* block,
+                                  ColPartition_LIST* block_parts,
+                                  ColPartition_LIST* used_parts) {
+  // Make a matching TO_BLOCK and put all the BLOBNBOXes from the parts in it.
+  // Move all the parts to a done list as they are no longer needed, except
+  // that have have to continue to exist until the part grid is deleted.
+  // Compute the median blob size as we go, as the block needs to know.
+  TBOX block_box(block->pdblk.bounding_box());
+  STATS sizes(0, std::max(block_box.width(), block_box.height()));
+  bool text_type = block->pdblk.poly_block()->IsText();
+  ColPartition_IT it(block_parts);
+  auto* to_block = new TO_BLOCK(block);
+  BLOBNBOX_IT blob_it(&to_block->blobs);
+  ColPartition_IT used_it(used_parts);
+  for (it.move_to_first(); !it.empty(); it.forward()) {
+    ColPartition* part = it.extract();
+    // Transfer blobs from all regions to the output blocks.
+    // Blobs for non-text regions will be used to define the polygonal
+    // bounds of the region.
+    for (BLOBNBOX_C_IT bb_it(part->boxes()); !bb_it.empty();
+         bb_it.forward()) {
+      BLOBNBOX* bblob = bb_it.extract();
+      if (bblob->owner() != part) {
+        tprintf("Ownership incorrect for blob:");
+        bblob->bounding_box().print();
+        tprintf("Part=");
+        part->Print();
+        if (bblob->owner() == nullptr) {
+          tprintf("Not owned\n");
+        } else {
+          tprintf("Owner part:");
+          bblob->owner()->Print();
+        }
+      }
+      ASSERT_HOST(bblob->owner() == part);
+      // Assert failure here is caused by arbitrarily changing the partition
+      // type without also changing the blob type, such as in
+      // InsertSmallBlobsAsUnknowns.
+      ASSERT_HOST(!text_type || bblob->region_type() >= BRT_UNKNOWN);
+      C_OUTLINE_LIST* outlines = bblob->cblob()->out_list();
+      C_OUTLINE_IT ol_it(outlines);
+      ASSERT_HOST(!text_type || ol_it.data()->pathlength() > 0);
+      if (vertical_text)
+        sizes.add(bblob->bounding_box().width(), 1);
+      else
+        sizes.add(bblob->bounding_box().height(), 1);
+      blob_it.add_after_then_move(bblob);
+    }
+    used_it.add_to_end(part);
+  }
+  if (text_type && blob_it.empty()) {
+    delete block;
+    delete to_block;
+    return nullptr;
+  }
+  to_block->line_size = sizes.median();
+  if (vertical_text) {
+    int block_width = block->pdblk.bounding_box().width();
+    if (block_width < line_spacing)
+      line_spacing = block_width;
+    to_block->line_spacing = static_cast<float>(line_spacing);
+    to_block->max_blob_size = static_cast<float>(block_width + 1);
+  } else {
+    int block_height = block->pdblk.bounding_box().height();
+    if (block_height < line_spacing)
+      line_spacing = block_height;
+    to_block->line_spacing = static_cast<float>(line_spacing);
+    to_block->max_blob_size = static_cast<float>(block_height + 1);
+  }
+  return to_block;
+}
+
+// Constructs a block from the given list of partitions.
+// Arguments are as LineSpacingBlocks above.
+TO_BLOCK* ColPartition::MakeBlock(const ICOORD& bleft, const ICOORD& tright,
+                                  ColPartition_LIST* block_parts,
+                                  ColPartition_LIST* used_parts) {
+  if (block_parts->empty())
+    return nullptr;  // Nothing to do.
+  // If the block_parts are not in reading order, then it will make an invalid
+  // block polygon and bounding_box, so sort by bounding box now just to make
+  // sure.
+  block_parts->sort(&ColPartition::SortByBBox);
+  ColPartition_IT it(block_parts);
+  ColPartition* part = it.data();
+  PolyBlockType type = part->type();
+  if (type == PT_VERTICAL_TEXT)
+    return MakeVerticalTextBlock(bleft, tright, block_parts, used_parts);
+  // LineSpacingBlocks has handed us a collection of evenly spaced lines and
+  // put the average spacing in each partition, so we can just take the
+  // linespacing from the first partition.
+  int line_spacing = part->bottom_spacing();
+  if (line_spacing < part->median_height())
+    line_spacing = part->bounding_box().height();
+  ICOORDELT_LIST vertices;
+  ICOORDELT_IT vert_it(&vertices);
+  ICOORD start, end;
+  int min_x = INT32_MAX;
+  int max_x = -INT32_MAX;
+  int min_y = INT32_MAX;
+  int max_y = -INT32_MAX;
+  int iteration = 0;
+  do {
+    if (iteration == 0)
+      ColPartition::LeftEdgeRun(&it, &start, &end);
+    else
+      ColPartition::RightEdgeRun(&it, &start, &end);
+    ClipCoord(bleft, tright, &start);
+    ClipCoord(bleft, tright, &end);
+    vert_it.add_after_then_move(new ICOORDELT(start));
+    vert_it.add_after_then_move(new ICOORDELT(end));
+    UpdateRange(start.x(), &min_x, &max_x);
+    UpdateRange(end.x(), &min_x, &max_x);
+    UpdateRange(start.y(), &min_y, &max_y);
+    UpdateRange(end.y(), &min_y, &max_y);
+    if ((iteration == 0 && it.at_first()) ||
+        (iteration == 1 && it.at_last())) {
+      ++iteration;
+      it.move_to_last();
+    }
+  } while (iteration < 2);
+  if (textord_debug_tabfind)
+    tprintf("Making block at (%d,%d)->(%d,%d)\n",
+            min_x, min_y, max_x, max_y);
+  auto* block = new BLOCK("", true, 0, 0, min_x, min_y, max_x, max_y);
+  block->pdblk.set_poly_block(new POLY_BLOCK(&vertices, type));
+  return MoveBlobsToBlock(false, line_spacing, block, block_parts, used_parts);
+}
+
+// Constructs a block from the given list of vertical text partitions.
+// Currently only creates rectangular blocks.
+TO_BLOCK* ColPartition::MakeVerticalTextBlock(const ICOORD& bleft,
+                                              const ICOORD& tright,
+                                              ColPartition_LIST* block_parts,
+                                              ColPartition_LIST* used_parts) {
+  if (block_parts->empty())
+    return nullptr;  // Nothing to do.
+  ColPartition_IT it(block_parts);
+  ColPartition* part = it.data();
+  TBOX block_box = part->bounding_box();
+  int line_spacing = block_box.width();
+  PolyBlockType type = it.data()->type();
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    block_box += it.data()->bounding_box();
+  }
+  if (textord_debug_tabfind) {
+    tprintf("Making block at:");
+    block_box.print();
+  }
+  auto* block = new BLOCK("", true, 0, 0, block_box.left(), block_box.bottom(),
+                           block_box.right(), block_box.top());
+  block->pdblk.set_poly_block(new POLY_BLOCK(block_box, type));
+  return MoveBlobsToBlock(true, line_spacing, block, block_parts, used_parts);
+}
+
+// Makes a TO_ROW matching this and moves all the blobs to it, transferring
+// ownership to to returned TO_ROW.
+TO_ROW* ColPartition::MakeToRow() {
+  BLOBNBOX_C_IT blob_it(&boxes_);
+  TO_ROW* row = nullptr;
+  int line_size = IsVerticalType() ? median_width_ : median_height_;
+  // Add all the blobs to a single TO_ROW.
+  for (; !blob_it.empty(); blob_it.forward()) {
+    BLOBNBOX* blob = blob_it.extract();
+//    blob->compute_bounding_box();
+    int top = blob->bounding_box().top();
+    int bottom = blob->bounding_box().bottom();
+    if (row == nullptr) {
+      row = new TO_ROW(blob, static_cast<float>(top),
+                       static_cast<float>(bottom),
+                       static_cast<float>(line_size));
+    } else {
+      row->add_blob(blob, static_cast<float>(top),
+                    static_cast<float>(bottom),
+                    static_cast<float>(line_size));
+    }
+  }
+  return row;
+}
+
+// Returns a copy of everything except the list of boxes. The resulting
+// ColPartition is only suitable for keeping in a column candidate list.
+ColPartition* ColPartition::ShallowCopy() const {
+  auto* part = new ColPartition(blob_type_, vertical_);
+  part->left_margin_ = left_margin_;
+  part->right_margin_ = right_margin_;
+  part->bounding_box_ = bounding_box_;
+  memcpy(part->special_blobs_densities_, special_blobs_densities_,
+         sizeof(special_blobs_densities_));
+  part->median_bottom_ = median_bottom_;
+  part->median_top_ = median_top_;
+  part->median_height_ = median_height_;
+  part->median_left_ = median_left_;
+  part->median_right_ = median_right_;
+  part->median_width_ = median_width_;
+  part->good_width_ = good_width_;
+  part->good_column_ = good_column_;
+  part->left_key_tab_ = left_key_tab_;
+  part->right_key_tab_ = right_key_tab_;
+  part->type_ = type_;
+  part->flow_ = flow_;
+  part->left_key_ = left_key_;
+  part->right_key_ = right_key_;
+  part->first_column_ = first_column_;
+  part->last_column_ = last_column_;
+  part->owns_blobs_ = false;
+  return part;
+}
+
+ColPartition* ColPartition::CopyButDontOwnBlobs() {
+  ColPartition* copy = ShallowCopy();
+  copy->set_owns_blobs(false);
+  BLOBNBOX_C_IT inserter(copy->boxes());
+  BLOBNBOX_C_IT traverser(boxes());
+  for (traverser.mark_cycle_pt(); !traverser.cycled_list(); traverser.forward())
+    inserter.add_after_then_move(traverser.data());
+  return copy;
+}
+
+#ifndef GRAPHICS_DISABLED
+// Provides a color for BBGrid to draw the rectangle.
+// Must be kept in sync with PolyBlockType.
+ScrollView::Color  ColPartition::BoxColor() const {
+  if (type_ == PT_UNKNOWN)
+    return BLOBNBOX::TextlineColor(blob_type_, flow_);
+  return POLY_BLOCK::ColorForPolyBlockType(type_);
+}
+#endif // !GRAPHICS_DISABLED
+
+// Keep in sync with BlobRegionType.
+static char kBlobTypes[BRT_COUNT + 1] = "NHSRIUVT";
+
+// Prints debug information on this.
+void ColPartition::Print() const {
+  int y = MidY();
+  tprintf("ColPart:%c(M%d-%c%d-B%d/%d,%d/%d)->(%dB-%d%c-%dM/%d,%d/%d)"
+          " w-ok=%d, v-ok=%d, type=%d%c%d, fc=%d, lc=%d, boxes=%d"
+          " ts=%d bs=%d ls=%d rs=%d\n",
+          boxes_.empty() ? 'E' : ' ',
+          left_margin_, left_key_tab_ ? 'T' : 'B', LeftAtY(y),
+          bounding_box_.left(), median_left_,
+          bounding_box_.bottom(), median_bottom_,
+          bounding_box_.right(), RightAtY(y), right_key_tab_ ? 'T' : 'B',
+          right_margin_, median_right_, bounding_box_.top(), median_top_,
+          good_width_, good_column_, type_,
+          kBlobTypes[blob_type_], flow_,
+          first_column_, last_column_, boxes_.length(),
+          space_above_, space_below_, space_to_left_, space_to_right_);
+}
+
+// Prints debug information on the colors.
+void ColPartition::PrintColors() {
+  tprintf("Colors:(%d, %d, %d)%d -> (%d, %d, %d)\n",
+          color1_[COLOR_RED], color1_[COLOR_GREEN], color1_[COLOR_BLUE],
+          color1_[L_ALPHA_CHANNEL],
+          color2_[COLOR_RED], color2_[COLOR_GREEN], color2_[COLOR_BLUE]);
+}
+
+// Sets the types of all partitions in the run to be the max of the types.
+void ColPartition::SmoothPartnerRun(int working_set_count) {
+  STATS left_stats(0, working_set_count);
+  STATS right_stats(0, working_set_count);
+  PolyBlockType max_type = type_;
+  ColPartition* partner;
+  for (partner = SingletonPartner(false); partner != nullptr;
+       partner = partner->SingletonPartner(false)) {
+    if (partner->type_ > max_type)
+      max_type = partner->type_;
+    if (column_set_ == partner->column_set_) {
+      left_stats.add(partner->first_column_, 1);
+      right_stats.add(partner->last_column_, 1);
+    }
+  }
+  type_ = max_type;
+  // TODO(rays) Either establish that it isn't necessary to set the columns,
+  // or find a way to do it that does not cause an assert failure in
+  // AddToWorkingSet.
+#if 0
+  first_column_ = left_stats.mode();
+  last_column_ = right_stats.mode();
+  if (last_column_ < first_column_)
+    last_column_ = first_column_;
+#endif
+
+  for (partner = SingletonPartner(false); partner != nullptr;
+       partner = partner->SingletonPartner(false)) {
+    partner->type_ = max_type;
+#if 0  // See TODO above
+    if (column_set_ == partner->column_set_) {
+      partner->first_column_ = first_column_;
+      partner->last_column_ = last_column_;
+    }
+#endif
+  }
+}
+
+// ======= Scenario common to all Refine*Partners* functions =======
+// ColPartitions are aiming to represent textlines, or horizontal slices
+// of images, and we are trying to form bi-directional (upper/lower) chains
+// of UNIQUE partner ColPartitions that can be made into blocks.
+// The ColPartitions have previously been typed (see SetPartitionType)
+// according to a combination of the content type and
+// how they lie on the columns. We want to chain text into
+// groups of a single type, but image ColPartitions may have been typed
+// differently in different parts of the image, due to being non-rectangular.
+//
+// We previously ran a search for upper and lower partners, but there may
+// be more than one, and they may be of mixed types, so now we wish to
+// refine the partners down to at most one.
+// A heading may have multiple partners:
+// ===============================
+// ========  ==========  =========
+// ========  ==========  =========
+// but it should be a different type.
+// A regular flowing text line may have multiple partners:
+// ==================   ===================
+// =======   =================  ===========
+// This could be the start of a pull-out, or it might all be in a single
+// column and might be caused by tightly spaced text, bold words, bullets,
+// funny punctuation etc, all of which can cause textlines to be split into
+// multiple ColPartitions. Pullouts and figure captions should now be different
+// types so we can more aggressively merge groups of partners that all sit
+// in a single column.
+//
+// Cleans up the partners of the given type so that there is at most
+// one partner. This makes block creation simpler.
+// If get_desperate is true, goes to more desperate merge methods
+// to merge flowing text before breaking partnerships.
+void ColPartition::RefinePartners(PolyBlockType type, bool get_desperate,
+                                  ColPartitionGrid* grid) {
+  if (TypesSimilar(type_, type)) {
+    RefinePartnersInternal(true, get_desperate, grid);
+    RefinePartnersInternal(false, get_desperate, grid);
+  } else if (type == PT_COUNT) {
+    // This is the final pass. Make sure only the correctly typed
+    // partners surivive, however many there are.
+    RefinePartnersByType(true, &upper_partners_);
+    RefinePartnersByType(false, &lower_partners_);
+    // It is possible for a merge to have given a partition multiple
+    // partners again, so the last resort is to use overlap which is
+    // guaranteed to leave at most one partner left.
+    if (!upper_partners_.empty() && !upper_partners_.singleton())
+      RefinePartnersByOverlap(true, &upper_partners_);
+    if (!lower_partners_.empty() && !lower_partners_.singleton())
+      RefinePartnersByOverlap(false, &lower_partners_);
+  }
+}
+
+////////////////// PRIVATE CODE /////////////////////////////
+
+// Cleans up the partners above if upper is true, else below.
+// If get_desperate is true, goes to more desperate merge methods
+// to merge flowing text before breaking partnerships.
+void ColPartition::RefinePartnersInternal(bool upper, bool get_desperate,
+                                          ColPartitionGrid* grid) {
+  ColPartition_CLIST* partners = upper ? &upper_partners_ : &lower_partners_;
+  if (!partners->empty() && !partners->singleton()) {
+    RefinePartnersByType(upper, partners);
+    if (!partners->empty() && !partners->singleton()) {
+      // Check for transitive partnerships and break the cycle.
+      RefinePartnerShortcuts(upper, partners);
+      if (!partners->empty() && !partners->singleton()) {
+        // Types didn't fix it. Flowing text keeps the one with the longest
+        // sequence of singleton matching partners. All others max overlap.
+        if (TypesSimilar(type_, PT_FLOWING_TEXT) && get_desperate) {
+          RefineTextPartnersByMerge(upper, false, partners, grid);
+          if (!partners->empty() && !partners->singleton())
+            RefineTextPartnersByMerge(upper, true, partners, grid);
+        }
+        // The last resort is to use overlap.
+        if (!partners->empty() && !partners->singleton())
+          RefinePartnersByOverlap(upper, partners);
+      }
+    }
+  }
+}
+
+// Cleans up the partners above if upper is true, else below.
+// Restricts the partners to only desirable types. For text and BRT_HLINE this
+// means the same type_ , and for image types it means any image type.
+void ColPartition::RefinePartnersByType(bool upper,
+                                        ColPartition_CLIST* partners) {
+  bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(),
+                                         bounding_box_.bottom());
+  if (debug) {
+    tprintf("Refining %d %s partners by type for:\n",
+            partners->length(), upper ? "Upper" : "Lower");
+    Print();
+  }
+  ColPartition_C_IT it(partners);
+  // Purify text by type.
+  if (!IsImageType() && !IsLineType() && type() != PT_TABLE) {
+    // Keep only partners matching type_.
+    // Exception: PT_VERTICAL_TEXT is allowed to stay with the other
+    // text types if it is the only partner.
+    for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+      ColPartition* partner = it.data();
+      if (!TypesSimilar(type_, partner->type_)) {
+        if (debug) {
+          tprintf("Removing partner:");
+          partner->Print();
+        }
+        partner->RemovePartner(!upper, this);
+        it.extract();
+      } else if (debug) {
+        tprintf("Keeping partner:");
+        partner->Print();
+      }
+    }
+  } else {
+    // Only polyimages are allowed to have partners of any kind!
+    for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+      ColPartition* partner = it.data();
+      if (partner->blob_type() != BRT_POLYIMAGE ||
+          blob_type() != BRT_POLYIMAGE) {
+        if (debug) {
+          tprintf("Removing partner:");
+          partner->Print();
+        }
+        partner->RemovePartner(!upper, this);
+        it.extract();
+      } else if (debug) {
+        tprintf("Keeping partner:");
+        partner->Print();
+      }
+    }
+  }
+}
+
+// Cleans up the partners above if upper is true, else below.
+// Remove transitive partnerships: this<->a, and a<->b and this<->b.
+// Gets rid of this<->b, leaving a clean chain.
+// Also if we have this<->a and a<->this, then gets rid of this<->a, as
+// this has multiple partners.
+void ColPartition::RefinePartnerShortcuts(bool upper,
+                                          ColPartition_CLIST* partners) {
+  bool done_any = false;
+  do {
+    done_any = false;
+    ColPartition_C_IT it(partners);
+    for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+      ColPartition* a = it.data();
+      // Check for a match between all of a's partners (it1/b1) and all
+      // of this's partners (it2/b2).
+      ColPartition_C_IT it1(upper ? &a->upper_partners_ : &a->lower_partners_);
+      for (it1.mark_cycle_pt(); !it1.cycled_list(); it1.forward()) {
+        ColPartition* b1 = it1.data();
+        if (b1 == this) {
+          done_any = true;
+          it.extract();
+          a->RemovePartner(!upper, this);
+          break;
+        }
+        ColPartition_C_IT it2(partners);
+        for (it2.mark_cycle_pt(); !it2.cycled_list(); it2.forward()) {
+          ColPartition* b2 = it2.data();
+          if (b1 == b2) {
+            // Jackpot! b2 should not be a partner of this.
+            it2.extract();
+            b2->RemovePartner(!upper, this);
+            done_any = true;
+            // That potentially invalidated all the iterators, so break out
+            // and start again.
+            break;
+          }
+        }
+        if (done_any)
+          break;
+      }
+      if (done_any)
+        break;
+    }
+  } while (done_any && !partners->empty() && !partners->singleton());
+}
+
+// Cleans up the partners above if upper is true, else below.
+// If multiple text partners can be merged, (with each other, NOT with this),
+// then do so.
+// If desperate is true, then an increase in overlap with the merge is
+// allowed. If the overlap increases, then the desperately_merged_ flag
+// is set, indicating that the textlines probably need to be regenerated
+// by aggressive line fitting/splitting, as there are probably vertically
+// joined blobs that cross textlines.
+void ColPartition::RefineTextPartnersByMerge(bool upper, bool desperate,
+                                             ColPartition_CLIST* partners,
+                                             ColPartitionGrid* grid) {
+  bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(),
+                                         bounding_box_.bottom());
+  if (debug) {
+    tprintf("Refining %d %s partners by merge for:\n",
+            partners->length(), upper ? "Upper" : "Lower");
+    Print();
+  }
+  while (!partners->empty() && !partners->singleton()) {
+    // Absorb will mess up the iterators, so we have to merge one partition
+    // at a time and rebuild the iterators each time.
+    ColPartition_C_IT it(partners);
+    ColPartition* part = it.data();
+    // Gather a list of merge candidates, from the list of partners, that
+    // are all in the same single column. See general scenario comment above.
+    ColPartition_CLIST candidates;
+    ColPartition_C_IT cand_it(&candidates);
+    for (it.forward(); !it.at_first(); it.forward()) {
+      ColPartition* candidate = it.data();
+      if (part->first_column_ == candidate->last_column_ &&
+          part->last_column_ == candidate->first_column_)
+        cand_it.add_after_then_move(it.data());
+    }
+    int overlap_increase;
+    ColPartition* candidate = grid->BestMergeCandidate(part, &candidates, debug,
+                                                       nullptr, &overlap_increase);
+    if (candidate != nullptr && (overlap_increase <= 0 || desperate)) {
+      if (debug) {
+        tprintf("Merging:hoverlap=%d, voverlap=%d, OLI=%d\n",
+                part->HCoreOverlap(*candidate), part->VCoreOverlap(*candidate),
+                overlap_increase);
+      }
+      // Remove before merge and re-insert to keep the integrity of the grid.
+      grid->RemoveBBox(candidate);
+      grid->RemoveBBox(part);
+      part->Absorb(candidate, nullptr);
+      // We modified the box of part, so re-insert it into the grid.
+      grid->InsertBBox(true, true, part);
+      if (overlap_increase > 0)
+        part->desperately_merged_ = true;
+    } else {
+      break;  // Can't merge.
+    }
+  }
+}
+
+// Cleans up the partners above if upper is true, else below.
+// Keep the partner with the biggest overlap.
+void ColPartition::RefinePartnersByOverlap(bool upper,
+                                           ColPartition_CLIST* partners) {
+  bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(),
+                                         bounding_box_.bottom());
+  if (debug) {
+    tprintf("Refining %d %s partners by overlap for:\n",
+            partners->length(), upper ? "Upper" : "Lower");
+    Print();
+  }
+  ColPartition_C_IT it(partners);
+  ColPartition* best_partner = it.data();
+  // Find the partner with the best overlap.
+  int best_overlap = 0;
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    ColPartition* partner = it.data();
+    int overlap = std::min(bounding_box_.right(), partner->bounding_box_.right())
+                - std::max(bounding_box_.left(), partner->bounding_box_.left());
+    if (overlap > best_overlap) {
+      best_overlap = overlap;
+      best_partner = partner;
+    }
+  }
+  // Keep only the best partner.
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    ColPartition* partner = it.data();
+    if (partner != best_partner) {
+      if (debug) {
+        tprintf("Removing partner:");
+        partner->Print();
+      }
+      partner->RemovePartner(!upper, this);
+      it.extract();
+    }
+  }
+}
+
+// Return true if bbox belongs better in this than other.
+bool ColPartition::ThisPartitionBetter(BLOBNBOX* bbox,
+                                       const ColPartition& other) {
+  const TBOX& box = bbox->bounding_box();
+  // Margins take priority.
+  int left = box.left();
+  int right = box.right();
+  if (left < left_margin_ || right > right_margin_)
+    return false;
+  if (left < other.left_margin_ || right > other.right_margin_)
+    return true;
+  int top = box.top();
+  int bottom = box.bottom();
+  int this_overlap = std::min(top, median_top_) - std::max(bottom, median_bottom_);
+  int other_overlap = std::min(top, other.median_top_) -
+          std::max(bottom, other.median_bottom_);
+  int this_miss = median_top_ - median_bottom_ - this_overlap;
+  int other_miss = other.median_top_ - other.median_bottom_ - other_overlap;
+  if (TabFind::WithinTestRegion(3, box.left(), box.bottom())) {
+    tprintf("Unique on (%d,%d)->(%d,%d) overlap %d/%d, miss %d/%d, mt=%d/%d\n",
+            box.left(), box.bottom(), box.right(), box.top(),
+            this_overlap, other_overlap, this_miss, other_miss,
+            median_top_, other.median_top_);
+  }
+  if (this_miss < other_miss)
+    return true;
+  if (this_miss > other_miss)
+    return false;
+  if (this_overlap > other_overlap)
+    return true;
+  if (this_overlap < other_overlap)
+    return false;
+  return median_top_ >= other.median_top_;
+}
+
+// Returns the median line-spacing between the current position and the end
+// of the list.
+// The iterator is passed by value so the iteration does not modify the
+// caller's iterator.
+static int MedianSpacing(int page_height, ColPartition_IT it) {
+  STATS stats(0, page_height);
+  while (!it.cycled_list()) {
+    ColPartition* part = it.data();
+    it.forward();
+    stats.add(part->bottom_spacing(), 1);
+    stats.add(part->top_spacing(), 1);
+  }
+  return static_cast<int>(stats.median() + 0.5);
+}
+
+// Returns true if this column partition is in the same column as
+// part. This function will only work after the SetPartitionType function
+// has been called on both column partitions. This is useful for
+// doing a SideSearch when you want things in the same page column.
+//
+// Currently called by the table detection code to identify if potential table
+// partitions exist in the same column.
+bool ColPartition::IsInSameColumnAs(const ColPartition& part) const {
+  // Overlap does not occur when last < part.first or first > part.last.
+  // In other words, one is completely to the side of the other.
+  // This is just DeMorgan's law applied to that so the function returns true.
+  return (last_column_ >= part.first_column_) &&
+         (first_column_ <= part.last_column_);
+}
+
+// Smoothes the spacings in the list into groups of equal linespacing.
+// resolution is the resolution of the original image, used as a basis
+// for thresholds in change of spacing. page_height is in pixels.
+void ColPartition::SmoothSpacings(int resolution, int page_height,
+                                  ColPartition_LIST* parts) {
+  // The task would be trivial if we didn't have to allow for blips -
+  // occasional offsets in spacing caused by anomalous text, such as all
+  // caps, groups of descenders, joined words, Arabic etc.
+  // The neighbourhood stores a consecutive group of partitions so that
+  // blips can be detected correctly, yet conservatively enough to not
+  // mistake genuine spacing changes for blips. See example below.
+  ColPartition* neighbourhood[PN_COUNT];
+  ColPartition_IT it(parts);
+  it.mark_cycle_pt();
+  // Although we know nothing about the spacings is this list, the median is
+  // used as an approximation to allow blips.
+  // If parts of this block aren't spaced to the median, then we can't
+  // accept blips in those parts, but we'll recalculate it each time we
+  // split the block, so the median becomes more likely to match all the text.
+  int median_space = MedianSpacing(page_height, it);
+  ColPartition_IT start_it(it);
+  ColPartition_IT end_it(it);
+  for (int i = 0; i < PN_COUNT; ++i) {
+    if (i < PN_UPPER || it.cycled_list()) {
+      neighbourhood[i] = nullptr;
+    } else {
+      if (i == PN_LOWER)
+        end_it = it;
+      neighbourhood[i] = it.data();
+      it.forward();
+    }
+  }
+  while (neighbourhood[PN_UPPER] != nullptr) {
+    // Test for end of a group. Normally SpacingsEqual is true within a group,
+    // but in the case of a blip, it will be false. Here is an example:
+    // Line enum   Spacing below (spacing between tops of lines)
+    //  1   ABOVE2    20
+    //  2   ABOVE1    20
+    //  3   UPPER     15
+    //  4   LOWER     25
+    //  5   BELOW1    20
+    //  6   BELOW2    20
+    // Line 4 is all in caps (regular caps), so the spacing between line 3
+    // and line 4 (looking at the tops) is smaller than normal, and the
+    // spacing between line 4 and line 5 is larger than normal, but the
+    // two of them add to twice the normal spacing.
+    // The following if has to accept unequal spacings 3 times to pass the
+    // blip (20/15, 15/25 and 25/20)
+    // When the blip is in the middle, OKSpacingBlip tests that one of
+    // ABOVE1 and BELOW1 matches the median.
+    // The first time, everything is shifted down 1, so we present
+    // OKSpacingBlip with neighbourhood+1 and check that PN_UPPER is median.
+    // The last time, everything is shifted up 1, so we present OKSpacingBlip
+    // with neighbourhood-1 and check that PN_LOWER matches the median.
+    if (neighbourhood[PN_LOWER] == nullptr ||
+        (!neighbourhood[PN_UPPER]->SpacingsEqual(*neighbourhood[PN_LOWER],
+                                                 resolution) &&
+         (neighbourhood[PN_UPPER] == nullptr ||
+          neighbourhood[PN_LOWER] == nullptr ||
+          !OKSpacingBlip(resolution, median_space, neighbourhood, 0)) &&
+         (neighbourhood[PN_UPPER - 1] == nullptr ||
+          neighbourhood[PN_LOWER - 1] == nullptr ||
+          !OKSpacingBlip(resolution, median_space, neighbourhood, -1) ||
+          !neighbourhood[PN_LOWER]->SpacingEqual(median_space, resolution)) &&
+         (neighbourhood[PN_UPPER + 1] == nullptr ||
+          neighbourhood[PN_LOWER + 1] == nullptr ||
+          !OKSpacingBlip(resolution, median_space, neighbourhood, 1) ||
+          !neighbourhood[PN_UPPER]->SpacingEqual(median_space, resolution)))) {
+      // The group has ended. PN_UPPER is the last member.
+      // Compute the mean spacing over the group.
+      ColPartition_IT sum_it(start_it);
+      ColPartition* last_part = neighbourhood[PN_UPPER];
+      double total_bottom = 0.0;
+      double total_top = 0.0;
+      int total_count = 0;
+      ColPartition* upper = sum_it.data();
+      // We do not process last_part, as its spacing is different.
+      while (upper != last_part) {
+        total_bottom += upper->bottom_spacing();
+        total_top += upper->top_spacing();
+        ++total_count;
+        sum_it.forward();
+        upper = sum_it.data();
+      }
+      if (total_count > 0) {
+        // There were at least 2 lines, so set them all to the mean.
+        int top_spacing = static_cast<int>(total_top / total_count + 0.5);
+        int bottom_spacing = static_cast<int>(total_bottom / total_count + 0.5);
+        if (textord_debug_tabfind) {
+          tprintf("Spacing run ended. Cause:");
+          if (neighbourhood[PN_LOWER] == nullptr) {
+            tprintf("No more lines\n");
+          } else {
+            tprintf("Spacing change. Spacings:\n");
+            for (int i = 0; i < PN_COUNT; ++i) {
+              if (neighbourhood[i] == nullptr) {
+                tprintf("NULL");
+                if (i > 0 && neighbourhood[i - 1] != nullptr) {
+                  if (neighbourhood[i - 1]->SingletonPartner(false) != nullptr) {
+                    tprintf(" Lower partner:");
+                    neighbourhood[i - 1]->SingletonPartner(false)->Print();
+                  } else {
+                    tprintf(" nullptr lower partner:\n");
+                  }
+                } else {
+                  tprintf("\n");
+                }
+              } else {
+                tprintf("Top = %d, bottom = %d\n",
+                        neighbourhood[i]->top_spacing(),
+                        neighbourhood[i]->bottom_spacing());
+              }
+            }
+          }
+          tprintf("Mean spacing = %d/%d\n", top_spacing, bottom_spacing);
+        }
+        sum_it = start_it;
+        upper = sum_it.data();
+        while (upper != last_part) {
+          upper->set_top_spacing(top_spacing);
+          upper->set_bottom_spacing(bottom_spacing);
+          if (textord_debug_tabfind) {
+            tprintf("Setting mean on:");
+            upper->Print();
+          }
+          sum_it.forward();
+          upper = sum_it.data();
+        }
+      }
+      // PN_LOWER starts the next group and end_it is the next start_it.
+      start_it = end_it;
+      // Recalculate the median spacing to maximize the chances of detecting
+      // spacing blips.
+      median_space = MedianSpacing(page_height, end_it);
+    }
+    // Shuffle pointers.
+    for (int j = 1; j < PN_COUNT; ++j) {
+      neighbourhood[j - 1] = neighbourhood[j];
+    }
+    if (it.cycled_list()) {
+      neighbourhood[PN_COUNT - 1] = nullptr;
+    } else {
+      neighbourhood[PN_COUNT - 1] = it.data();
+      it.forward();
+    }
+    end_it.forward();
+  }
+}
+
+// Returns true if the parts array of pointers to partitions matches the
+// condition for a spacing blip. See SmoothSpacings for what this means
+// and how it is used.
+bool ColPartition::OKSpacingBlip(int resolution, int median_spacing,
+                                 ColPartition** parts, int offset) {
+  // The blip is OK if upper and lower sum to an OK value and at least
+  // one of above1 and below1 is equal to the median.
+  parts += offset;
+  return parts[PN_UPPER]->SummedSpacingOK(*parts[PN_LOWER],
+                                          median_spacing, resolution) &&
+         ((parts[PN_ABOVE1] != nullptr &&
+           parts[PN_ABOVE1]->SpacingEqual(median_spacing, resolution)) ||
+          (parts[PN_BELOW1] != nullptr &&
+           parts[PN_BELOW1]->SpacingEqual(median_spacing, resolution)));
+}
+
+// Returns true if both the top and bottom spacings of this match the given
+// spacing to within suitable margins dictated by the image resolution.
+bool ColPartition::SpacingEqual(int spacing, int resolution) const {
+  int bottom_error = BottomSpacingMargin(resolution);
+  int top_error = TopSpacingMargin(resolution);
+  return NearlyEqual(bottom_spacing_, spacing, bottom_error) &&
+         NearlyEqual(top_spacing_, spacing, top_error);
+}
+
+// Returns true if both the top and bottom spacings of this and other
+// match to within suitable margins dictated by the image resolution.
+bool ColPartition::SpacingsEqual(const ColPartition& other,
+                                 int resolution) const {
+  int bottom_error = std::max(BottomSpacingMargin(resolution),
+                         other.BottomSpacingMargin(resolution));
+  int top_error = std::max(TopSpacingMargin(resolution),
+                      other.TopSpacingMargin(resolution));
+  return NearlyEqual(bottom_spacing_, other.bottom_spacing_, bottom_error) &&
+         (NearlyEqual(top_spacing_, other.top_spacing_, top_error) ||
+          NearlyEqual(top_spacing_ + other.top_spacing_, bottom_spacing_ * 2,
+                      bottom_error));
+}
+
+// Returns true if the sum spacing of this and other match the given
+// spacing (or twice the given spacing) to within a suitable margin dictated
+// by the image resolution.
+bool ColPartition::SummedSpacingOK(const ColPartition& other,
+                                   int spacing, int resolution) const {
+  int bottom_error = std::max(BottomSpacingMargin(resolution),
+                         other.BottomSpacingMargin(resolution));
+  int top_error = std::max(TopSpacingMargin(resolution),
+                      other.TopSpacingMargin(resolution));
+  int bottom_total = bottom_spacing_ + other.bottom_spacing_;
+  int top_total = top_spacing_ + other.top_spacing_;
+  return (NearlyEqual(spacing, bottom_total, bottom_error) &&
+          NearlyEqual(spacing, top_total, top_error)) ||
+         (NearlyEqual(spacing * 2, bottom_total, bottom_error) &&
+          NearlyEqual(spacing * 2, top_total, top_error));
+}
+
+// Returns a suitable spacing margin that can be applied to bottoms of
+// text lines, based on the resolution and the stored side_step_.
+int ColPartition::BottomSpacingMargin(int resolution) const {
+  return static_cast<int>(kMaxSpacingDrift * resolution + 0.5) + side_step_;
+}
+
+// Returns a suitable spacing margin that can be applied to tops of
+// text lines, based on the resolution and the stored side_step_.
+int ColPartition::TopSpacingMargin(int resolution) const {
+  return static_cast<int>(kMaxTopSpacingFraction * median_height_ + 0.5) +
+         BottomSpacingMargin(resolution);
+}
+
+// Returns true if the median text sizes of this and other agree to within
+// a reasonable multiplicative factor.
+bool ColPartition::SizesSimilar(const ColPartition& other) const {
+  return median_height_ <= other.median_height_ * kMaxSizeRatio &&
+         other.median_height_ <= median_height_ * kMaxSizeRatio;
+}
+
+// Helper updates margin_left and margin_right, being the bounds of the left
+// margin of part of a block. Returns false and does not update the bounds if
+// this partition has a disjoint margin with the established margin.
+static bool UpdateLeftMargin(const ColPartition& part,
+                             int* margin_left, int* margin_right) {
+  const TBOX& part_box = part.bounding_box();
+  int top = part_box.top();
+  int bottom = part_box.bottom();
+  int tl_key = part.SortKey(part.left_margin(), top);
+  int tr_key = part.SortKey(part_box.left(), top);
+  int bl_key = part.SortKey(part.left_margin(), bottom);
+  int br_key = part.SortKey(part_box.left(), bottom);
+  int left_key = std::max(tl_key, bl_key);
+  int right_key = std::min(tr_key, br_key);
+  if (left_key <= *margin_right && right_key >= *margin_left) {
+    // This part is good - let's keep it.
+    *margin_right = std::min(*margin_right, right_key);
+    *margin_left = std::max(*margin_left, left_key);
+    return true;
+  }
+  return false;
+}
+
+// Computes and returns in start, end a line segment formed from a
+// forwards-iterated group of left edges of partitions that satisfy the
+// condition that the intersection of the left margins is non-empty, ie the
+// rightmost left margin is to the left of the leftmost left bounding box edge.
+// On return the iterator is set to the start of the next run.
+void ColPartition::LeftEdgeRun(ColPartition_IT* part_it,
+                               ICOORD* start, ICOORD* end) {
+  ColPartition* part = part_it->data();
+  ColPartition* start_part = part;
+  int start_y = part->bounding_box_.top();
+  if (!part_it->at_first()) {
+    int prev_bottom = part_it->data_relative(-1)->bounding_box_.bottom();
+    if (prev_bottom < start_y)
+      start_y = prev_bottom;
+    else if (prev_bottom > start_y)
+      start_y = (start_y + prev_bottom) / 2;
+  }
+  int end_y = part->bounding_box_.bottom();
+  int margin_right = INT32_MAX;
+  int margin_left = -INT32_MAX;
+  UpdateLeftMargin(*part, &margin_left, &margin_right);
+  do {
+    part_it->forward();
+    part = part_it->data();
+  } while (!part_it->at_first() &&
+           UpdateLeftMargin(*part, &margin_left, &margin_right));
+  // The run ended. If we were pushed inwards, compute the next run and
+  // extend it backwards into the run we just calculated to find the end of
+  // this run that provides a tight box.
+  int next_margin_right = INT32_MAX;
+  int next_margin_left = -INT32_MAX;
+  UpdateLeftMargin(*part, &next_margin_left, &next_margin_right);
+  if (next_margin_left > margin_right) {
+    ColPartition_IT next_it(*part_it);
+    do {
+      next_it.forward();
+      part = next_it.data();
+    } while (!next_it.at_first() &&
+             UpdateLeftMargin(*part, &next_margin_left, &next_margin_right));
+    // Now extend the next run backwards into the original run to get the
+    // tightest fit.
+    do {
+      part_it->backward();
+      part = part_it->data();
+    } while (part != start_part &&
+             UpdateLeftMargin(*part, &next_margin_left, &next_margin_right));
+    part_it->forward();
+  }
+  // Now calculate the end_y.
+  part = part_it->data_relative(-1);
+  end_y = part->bounding_box_.bottom();
+  if (!part_it->at_first() && part_it->data()->bounding_box_.top() < end_y)
+    end_y = (end_y + part_it->data()->bounding_box_.top()) / 2;
+  start->set_y(start_y);
+  start->set_x(part->XAtY(margin_right, start_y));
+  end->set_y(end_y);
+  end->set_x(part->XAtY(margin_right, end_y));
+  if (textord_debug_tabfind && !part_it->at_first())
+    tprintf("Left run from y=%d to %d terminated with sum %d-%d, new %d-%d\n",
+            start_y, end_y, part->XAtY(margin_left, end_y),
+            end->x(), part->left_margin_, part->bounding_box_.left());
+}
+
+// Helper updates margin_left and margin_right, being the bounds of the right
+// margin of part of a block. Returns false and does not update the bounds if
+// this partition has a disjoint margin with the established margin.
+static bool UpdateRightMargin(const ColPartition& part,
+                              int* margin_left, int* margin_right) {
+  const TBOX& part_box = part.bounding_box();
+  int top = part_box.top();
+  int bottom = part_box.bottom();
+  int tl_key = part.SortKey(part_box.right(), top);
+  int tr_key = part.SortKey(part.right_margin(), top);
+  int bl_key = part.SortKey(part_box.right(), bottom);
+  int br_key = part.SortKey(part.right_margin(), bottom);
+  int left_key = std::max(tl_key, bl_key);
+  int right_key = std::min(tr_key, br_key);
+  if (left_key <= *margin_right && right_key >= *margin_left) {
+    // This part is good - let's keep it.
+    *margin_right = std::min(*margin_right, right_key);
+    *margin_left = std::max(*margin_left, left_key);
+    return true;
+  }
+  return false;
+}
+
+// Computes and returns in start, end a line segment formed from a
+// backwards-iterated group of right edges of partitions that satisfy the
+// condition that the intersection of the right margins is non-empty, ie the
+// leftmost right margin is to the right of the rightmost right bounding box
+// edge.
+// On return the iterator is set to the start of the next run.
+void ColPartition::RightEdgeRun(ColPartition_IT* part_it,
+                                ICOORD* start, ICOORD* end) {
+  ColPartition* part = part_it->data();
+  ColPartition* start_part = part;
+  int start_y = part->bounding_box_.bottom();
+  if (!part_it->at_last()) {
+    int next_y = part_it->data_relative(1)->bounding_box_.top();
+    if (next_y > start_y)
+      start_y = next_y;
+    else if (next_y < start_y)
+      start_y = (start_y + next_y) / 2;
+  }
+  int end_y = part->bounding_box_.top();
+  int margin_right = INT32_MAX;
+  int margin_left = -INT32_MAX;
+  UpdateRightMargin(*part, &margin_left, &margin_right);
+  do {
+    part_it->backward();
+    part = part_it->data();
+  } while (!part_it->at_last() &&
+           UpdateRightMargin(*part, &margin_left, &margin_right));
+  // The run ended. If we were pushed inwards, compute the next run and
+  // extend it backwards to find the end of this run for a tight box.
+  int next_margin_right = INT32_MAX;
+  int next_margin_left = -INT32_MAX;
+  UpdateRightMargin(*part, &next_margin_left, &next_margin_right);
+  if (next_margin_right < margin_left) {
+    ColPartition_IT next_it(*part_it);
+    do {
+      next_it.backward();
+      part = next_it.data();
+    } while (!next_it.at_last() &&
+             UpdateRightMargin(*part, &next_margin_left,
+                               &next_margin_right));
+    // Now extend the next run forwards into the original run to get the
+    // tightest fit.
+    do {
+      part_it->forward();
+      part = part_it->data();
+    } while (part != start_part &&
+             UpdateRightMargin(*part, &next_margin_left,
+                               &next_margin_right));
+    part_it->backward();
+  }
+  // Now calculate the end_y.
+  part = part_it->data_relative(1);
+  end_y = part->bounding_box().top();
+  if (!part_it->at_last() &&
+      part_it->data()->bounding_box_.bottom() > end_y)
+    end_y = (end_y + part_it->data()->bounding_box_.bottom()) / 2;
+  start->set_y(start_y);
+  start->set_x(part->XAtY(margin_left, start_y));
+  end->set_y(end_y);
+  end->set_x(part->XAtY(margin_left, end_y));
+  if (textord_debug_tabfind && !part_it->at_last())
+    tprintf("Right run from y=%d to %d terminated with sum %d-%d, new %d-%d\n",
+            start_y, end_y, end->x(), part->XAtY(margin_right, end_y),
+            part->bounding_box_.right(), part->right_margin_);
+}
+
+}  // namespace tesseract.
diff --git a/tesseract/src/textord/colpartition.h b/tesseract/src/textord/colpartition.h
new file mode 100644
index 00000000..5c299b3e
--- /dev/null
+++ b/tesseract/src/textord/colpartition.h
@@ -0,0 +1,927 @@
+///////////////////////////////////////////////////////////////////////
+// File:        colpartition.h
+// Description: Class to hold partitions of the page that correspond
+//              roughly to text lines.
+// Author:      Ray Smith
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TEXTORD_COLPARTITION_H_
+#define TESSERACT_TEXTORD_COLPARTITION_H_
+
+#include "bbgrid.h"
+#include "blobbox.h"       // For BlobRegionType.
+#include "ocrblock.h"
+#include "rect.h"           // For TBOX.
+#include "scrollview.h"
+#include "tabfind.h"        // For WidthCallback.
+#include "tabvector.h"      // For BLOBNBOX_CLIST.
+
+#include <algorithm>
+
+namespace tesseract {
+
+// Number of colors in the color1, color2 arrays.
+const int kRGBRMSColors = 4;
+
+class ColPartition;
+class ColPartitionSet;
+class ColPartitionGrid;
+class WorkingPartSet;
+class WorkingPartSet_LIST;
+
+// An enum to indicate how a partition sits on the columns.
+// The order of flowing/heading/pullout must be kept consistent with
+// PolyBlockType.
+enum ColumnSpanningType {
+  CST_NOISE,        // Strictly between columns.
+  CST_FLOWING,      // Strictly within a single column.
+  CST_HEADING,      // Spans multiple columns.
+  CST_PULLOUT,      // Touches multiple columns, but doesn't span them.
+  CST_COUNT         // Number of entries.
+};
+
+ELIST2IZEH(ColPartition)
+CLISTIZEH(ColPartition)
+
+/**
+ * ColPartition is a partition of a horizontal slice of the page.
+ * It starts out as a collection of blobs at a particular y-coord in the grid,
+ * but ends up (after merging and uniquing) as an approximate text line.
+ * ColPartitions are also used to hold a partitioning of the page into
+ * columns, each representing one column. Although a ColPartition applies
+ * to a given y-coordinate range, eventually, a ColPartitionSet of ColPartitions
+ * emerges, which represents the columns over a wide y-coordinate range.
+ */
+class TESS_API ColPartition : public ELIST2_LINK {
+ public:
+  // This empty constructor is here only so that the class can be ELISTIZED.
+  // TODO(rays) change deep_copy in elst.h line 955 to take a callback copier
+  // and eliminate CLASSNAME##_copier.
+  ColPartition() = default;
+
+  /**
+   * @param blob_type is the blob_region_type_ of the blobs in this partition.
+   * @param vertical is the direction of logical vertical on the possibly skewed image.
+   */
+  ColPartition(BlobRegionType blob_type, const ICOORD& vertical);
+  /**
+   * Constructs a fake ColPartition with no BLOBNBOXes to represent a
+   * horizontal or vertical line, given a type and a bounding box.
+   */
+  static ColPartition* MakeLinePartition(BlobRegionType blob_type,
+                                         const ICOORD& vertical,
+                                         int left, int bottom,
+                                         int right, int top);
+
+  // Constructs and returns a fake ColPartition with a single fake BLOBNBOX,
+  // all made from a single TBOX.
+  // WARNING: Despite being on C_LISTs, the BLOBNBOX owns the C_BLOB and
+  // the ColPartition owns the BLOBNBOX!!!
+  // Call DeleteBoxes before deleting the ColPartition.
+  static ColPartition* FakePartition(const TBOX& box,
+                                     PolyBlockType block_type,
+                                     BlobRegionType blob_type,
+                                     BlobTextFlowType flow);
+
+  // Constructs and returns a ColPartition with the given real BLOBNBOX,
+  // and sets it up to be a "big" partition (single-blob partition bigger
+  // than the surrounding text that may be a dropcap, two or more vertically
+  // touching characters, or some graphic element.
+  // If the given list is not nullptr, the partition is also added to the list.
+  static ColPartition* MakeBigPartition(BLOBNBOX* box,
+                                        ColPartition_LIST* big_part_list);
+
+  ~ColPartition();
+
+  // Simple accessors.
+  const TBOX& bounding_box() const {
+    return bounding_box_;
+  }
+  int left_margin() const {
+    return left_margin_;
+  }
+  void set_left_margin(int margin) {
+    left_margin_ = margin;
+  }
+  int right_margin() const {
+    return right_margin_;
+  }
+  void set_right_margin(int margin) {
+    right_margin_ = margin;
+  }
+  int median_top() const {
+    return median_top_;
+  }
+  int median_bottom() const {
+    return median_bottom_;
+  }
+  int median_left() const {
+    return median_left_;
+  }
+  int median_right() const {
+    return median_right_;
+  }
+  int median_height() const {
+    return median_height_;
+  }
+  void set_median_height(int height) {
+    median_height_ = height;
+  }
+  int median_width() const {
+    return median_width_;
+  }
+  void set_median_width(int width) {
+    median_width_ = width;
+  }
+  BlobRegionType blob_type() const {
+    return blob_type_;
+  }
+  void set_blob_type(BlobRegionType t) {
+    blob_type_ = t;
+  }
+  BlobTextFlowType flow() const {
+    return flow_;
+  }
+  void set_flow(BlobTextFlowType f) {
+    flow_ = f;
+  }
+  int good_blob_score() const {
+    return good_blob_score_;
+  }
+  bool good_width() const {
+    return good_width_;
+  }
+  bool good_column() const {
+    return good_column_;
+  }
+  bool left_key_tab() const {
+    return left_key_tab_;
+  }
+  int left_key() const {
+    return left_key_;
+  }
+  bool right_key_tab() const {
+    return right_key_tab_;
+  }
+  int right_key() const {
+    return right_key_;
+  }
+  PolyBlockType type() const {
+    return type_;
+  }
+  void set_type(PolyBlockType t) {
+    type_ = t;
+  }
+  BLOBNBOX_CLIST* boxes() {
+    return &boxes_;
+  }
+  int boxes_count() const {
+    return boxes_.length();
+  }
+  void set_vertical(const ICOORD& v) {
+    vertical_ = v;
+  }
+  ColPartition_CLIST* upper_partners() {
+    return &upper_partners_;
+  }
+  ColPartition_CLIST* lower_partners() {
+    return &lower_partners_;
+  }
+  void set_working_set(WorkingPartSet* working_set) {
+    working_set_ = working_set;
+  }
+  bool block_owned() const {
+    return block_owned_;
+  }
+  void set_block_owned(bool owned) {
+    block_owned_ = owned;
+  }
+  bool desperately_merged() const {
+    return desperately_merged_;
+  }
+  ColPartitionSet* column_set() const {
+    return column_set_;
+  }
+  void set_side_step(int step) {
+    side_step_ = step;
+  }
+  int bottom_spacing() const {
+    return bottom_spacing_;
+  }
+  void set_bottom_spacing(int spacing) {
+    bottom_spacing_ = spacing;
+  }
+  int top_spacing() const {
+    return top_spacing_;
+  }
+  void set_top_spacing(int spacing) {
+    top_spacing_ = spacing;
+  }
+
+  void set_table_type() {
+    if (type_ != PT_TABLE) {
+      type_before_table_ = type_;
+      type_ = PT_TABLE;
+    }
+  }
+  void clear_table_type() {
+    if (type_ == PT_TABLE)
+      type_ = type_before_table_;
+  }
+  bool inside_table_column() {
+    return inside_table_column_;
+  }
+  void set_inside_table_column(bool val) {
+    inside_table_column_ = val;
+  }
+  ColPartition* nearest_neighbor_above() const {
+    return nearest_neighbor_above_;
+  }
+  void set_nearest_neighbor_above(ColPartition* part) {
+    nearest_neighbor_above_ = part;
+  }
+  ColPartition* nearest_neighbor_below() const {
+    return nearest_neighbor_below_;
+  }
+  void set_nearest_neighbor_below(ColPartition* part) {
+    nearest_neighbor_below_ = part;
+  }
+  int space_above() const {
+    return space_above_;
+  }
+  void set_space_above(int space) {
+    space_above_ = space;
+  }
+  int space_below() const {
+    return space_below_;
+  }
+  void set_space_below(int space) {
+    space_below_ = space;
+  }
+  int space_to_left() const {
+    return space_to_left_;
+  }
+  void set_space_to_left(int space) {
+    space_to_left_ = space;
+  }
+  int space_to_right() const {
+    return space_to_right_;
+  }
+  void set_space_to_right(int space) {
+    space_to_right_ = space;
+  }
+  uint8_t* color1() {
+    return color1_;
+  }
+  uint8_t* color2() {
+    return color2_;
+  }
+  bool owns_blobs() const {
+    return owns_blobs_;
+  }
+  void set_owns_blobs(bool owns_blobs) {
+    // Do NOT change ownership flag when there are blobs in the list.
+    // Immediately set the ownership flag when creating copies.
+    ASSERT_HOST(boxes_.empty());
+    owns_blobs_ = owns_blobs;
+  }
+
+  // Inline quasi-accessors that require some computation.
+
+  // Returns the middle y-coord of the bounding box.
+  int MidY() const {
+    return (bounding_box_.top() + bounding_box_.bottom()) / 2;
+  }
+  // Returns the middle y-coord of the median top and bottom.
+  int MedianY() const {
+    return (median_top_ + median_bottom_) / 2;
+  }
+  // Returns the middle x-coord of the bounding box.
+  int MidX() const {
+    return (bounding_box_.left() + bounding_box_.right()) / 2;
+  }
+  // Returns the sort key at any given x,y.
+  int SortKey(int x, int y) const {
+    return TabVector::SortKey(vertical_, x, y);
+  }
+  // Returns the x corresponding to the sortkey, y pair.
+  int XAtY(int sort_key, int y) const {
+    return TabVector::XAtY(vertical_, sort_key, y);
+  }
+  // Returns the x difference between the two sort keys.
+  int KeyWidth(int left_key, int right_key) const {
+    return (right_key - left_key) / vertical_.y();
+  }
+  // Returns the column width between the left and right keys.
+  int ColumnWidth() const {
+    return KeyWidth(left_key_, right_key_);
+  }
+  // Returns the sort key of the box left edge.
+  int BoxLeftKey() const {
+    return SortKey(bounding_box_.left(), MidY());
+  }
+  // Returns the sort key of the box right edge.
+  int BoxRightKey() const {
+    return SortKey(bounding_box_.right(), MidY());
+  }
+  // Returns the left edge at the given y, using the sort key.
+  int LeftAtY(int y) const {
+    return XAtY(left_key_, y);
+  }
+  // Returns the right edge at the given y, using the sort key.
+  int RightAtY(int y) const {
+    return XAtY(right_key_, y);
+  }
+  // Returns true if the right edge of this is to the left of the right
+  // edge of other.
+  bool IsLeftOf(const ColPartition& other) const {
+    return bounding_box_.right() < other.bounding_box_.right();
+  }
+  // Returns true if the partition contains the given x coordinate at the y.
+  bool ColumnContains(int x, int y) const {
+    return LeftAtY(y) - 1 <= x && x <= RightAtY(y) + 1;
+  }
+  // Returns true if there are no blobs in the list.
+  bool IsEmpty() const {
+    return boxes_.empty();
+  }
+  // Returns true if there is a single blob in the list.
+  bool IsSingleton() const {
+    return boxes_.singleton();
+  }
+  // Returns true if this and other overlap horizontally by bounding box.
+  bool HOverlaps(const ColPartition& other) const {
+    return bounding_box_.x_overlap(other.bounding_box_);
+  }
+  // Returns true if this and other's bounding boxes overlap vertically.
+  // TODO(rays) Make HOverlaps and VOverlaps truly symmetric.
+  bool VOverlaps(const ColPartition& other) const {
+    return bounding_box_.y_gap(other.bounding_box_) < 0;
+  }
+  // Returns the vertical overlap (by median) of this and other.
+  // WARNING! Only makes sense on horizontal partitions!
+  int VCoreOverlap(const ColPartition& other) const {
+    if (median_bottom_ == INT32_MAX || other.median_bottom_ == INT32_MAX) {
+      return 0;
+    }
+    return std::min(median_top_, other.median_top_) -
+            std::max(median_bottom_, other.median_bottom_);
+  }
+  // Returns the horizontal overlap (by median) of this and other.
+  // WARNING! Only makes sense on vertical partitions!
+  int HCoreOverlap(const ColPartition& other) const {
+    return std::min(median_right_, other.median_right_) -
+            std::max(median_left_, other.median_left_);
+  }
+  // Returns true if this and other overlap significantly vertically.
+  // WARNING! Only makes sense on horizontal partitions!
+  bool VSignificantCoreOverlap(const ColPartition& other) const {
+    if (median_bottom_ == INT32_MAX || other.median_bottom_ == INT32_MAX) {
+      return false;
+    }
+    int overlap = VCoreOverlap(other);
+    int height = std::min(median_top_ - median_bottom_,
+                     other.median_top_ - other.median_bottom_);
+    return overlap * 3 > height;
+  }
+  // Returns true if this and other can be combined without putting a
+  // horizontal step in either left or right edge of the resulting block.
+  bool WithinSameMargins(const ColPartition& other) const {
+    return left_margin_ <= other.bounding_box_.left() &&
+           bounding_box_.left() >= other.left_margin_ &&
+           bounding_box_.right() <= other.right_margin_ &&
+           right_margin_ >= other.bounding_box_.right();
+  }
+  // Returns true if the region types (aligned_text_) match.
+  // Lines never match anything, as they should never be merged or chained.
+  bool TypesMatch(const ColPartition& other) const {
+    return TypesMatch(blob_type_, other.blob_type_);
+  }
+  static bool TypesMatch(BlobRegionType type1, BlobRegionType type2) {
+    return (type1 == type2 || type1 == BRT_UNKNOWN || type2 == BRT_UNKNOWN) &&
+           !BLOBNBOX::IsLineType(type1) && !BLOBNBOX::IsLineType(type2);
+  }
+
+  // Returns true if the types are similar to each other.
+  static bool TypesSimilar(PolyBlockType type1, PolyBlockType type2) {
+    return (type1 == type2 ||
+            (type1 == PT_FLOWING_TEXT && type2 == PT_INLINE_EQUATION) ||
+            (type2 == PT_FLOWING_TEXT && type1 == PT_INLINE_EQUATION));
+  }
+
+  // Returns true if partitions is of horizontal line type
+  bool IsLineType() const {
+    return PTIsLineType(type_);
+  }
+  // Returns true if partitions is of image type
+  bool IsImageType() const {
+    return PTIsImageType(type_);
+  }
+  // Returns true if partitions is of text type
+  bool IsTextType() const {
+    return PTIsTextType(type_);
+  }
+  // Returns true if partitions is of pullout(inter-column) type
+  bool IsPulloutType() const {
+    return PTIsPulloutType(type_);
+  }
+  // Returns true if the partition is of an exclusively vertical type.
+  bool IsVerticalType() const {
+    return blob_type_ == BRT_VERT_TEXT || blob_type_ == BRT_VLINE;
+  }
+  // Returns true if the partition is of a definite horizontal type.
+  bool IsHorizontalType() const {
+    return blob_type_ == BRT_TEXT || blob_type_ == BRT_HLINE;
+  }
+  // Returns true is the partition is of a type that cannot be merged.
+  bool IsUnMergeableType() const {
+    return BLOBNBOX::UnMergeableType(blob_type_) || type_ == PT_NOISE;
+  }
+  // Returns true if this partition is a vertical line
+  // TODO(nbeato): Use PartitionType enum when Ray's code is submitted.
+  bool IsVerticalLine() const {
+    return IsVerticalType() && IsLineType();
+  }
+  // Returns true if this partition is a horizontal line
+  // TODO(nbeato): Use PartitionType enum when Ray's code is submitted.
+  bool IsHorizontalLine() const {
+    return IsHorizontalType() && IsLineType();
+  }
+
+  // Adds the given box to the partition, updating the partition bounds.
+  // The list of boxes in the partition is updated, ensuring that no box is
+  // recorded twice, and the boxes are kept in increasing left position.
+  void AddBox(BLOBNBOX* box);
+
+  // Removes the given box from the partition, updating the bounds.
+  void RemoveBox(BLOBNBOX* box);
+
+  // Returns the tallest box in the partition, as measured perpendicular to the
+  // presumed flow of text.
+  BLOBNBOX* BiggestBox();
+
+  // Returns the bounding box excluding the given box.
+  TBOX BoundsWithoutBox(BLOBNBOX* box);
+
+  // Claims the boxes in the boxes_list by marking them with a this owner
+  // pointer.
+  void ClaimBoxes();
+
+  // nullptr the owner of the blobs in this partition, so they can be deleted
+  // independently of the ColPartition.
+  void DisownBoxes();
+  // nullptr the owner of the blobs in this partition that are owned by this
+  // partition, so they can be deleted independently of the ColPartition.
+  // Any blobs that are not owned by this partition get to keep their owner
+  // without an assert failure.
+  void DisownBoxesNoAssert();
+  // Nulls the owner of the blobs in this partition that are owned by this
+  // partition and not leader blobs, removing them from the boxes_ list, thus
+  // turning this partition back to a leader partition if it contains a leader,
+  // or otherwise leaving it empty. Returns true if any boxes remain.
+  bool ReleaseNonLeaderBoxes();
+
+  // Delete the boxes that this partition owns.
+  void DeleteBoxes();
+
+  // Reflects the partition in the y-axis, assuming that its blobs have
+  // already been done. Corrects only a limited part of the members, since
+  // this function is assumed to be used shortly after initial creation, which
+  // is before a lot of the members are used.
+  void ReflectInYAxis();
+
+  // Returns true if this is a legal partition - meaning that the conditions
+  // left_margin <= bounding_box left
+  // left_key <= bounding box left key
+  // bounding box left <= bounding box right
+  // and likewise for right margin and key
+  // are all met.
+  bool IsLegal();
+
+  // Returns true if the left and right edges are approximately equal.
+  bool MatchingColumns(const ColPartition& other) const;
+
+  // Returns true if the colors match for two text partitions.
+  bool MatchingTextColor(const ColPartition& other) const;
+
+  // Returns true if the sizes match for two text partitions,
+  // taking orientation into account
+  bool MatchingSizes(const ColPartition& other) const;
+
+  // Returns true if there is no tabstop violation in merging this and other.
+  bool ConfirmNoTabViolation(const ColPartition& other) const;
+
+  // Returns true if other has a similar stroke width to this.
+  bool MatchingStrokeWidth(const ColPartition& other,
+                           double fractional_tolerance,
+                           double constant_tolerance) const;
+  // Returns true if candidate is an acceptable diacritic base char merge
+  // with this as the diacritic.
+  bool OKDiacriticMerge(const ColPartition& candidate, bool debug) const;
+
+  // Sets the sort key using either the tab vector, or the bounding box if
+  // the tab vector is nullptr. If the tab_vector lies inside the bounding_box,
+  // use the edge of the box as a key any way.
+  void SetLeftTab(const TabVector* tab_vector);
+  void SetRightTab(const TabVector* tab_vector);
+
+  // Copies the left/right tab from the src partition, but if take_box is
+  // true, copies the box instead and uses that as a key.
+  void CopyLeftTab(const ColPartition& src, bool take_box);
+  void CopyRightTab(const ColPartition& src, bool take_box);
+
+  // Returns the left rule line x coord of the leftmost blob.
+  int LeftBlobRule() const;
+  // Returns the right rule line x coord of the rightmost blob.
+  int RightBlobRule() const;
+
+  // Returns the density value for a particular BlobSpecialTextType.
+  float SpecialBlobsDensity(const BlobSpecialTextType type) const;
+  // Returns the number of blobs for a  particular BlobSpecialTextType.
+  int SpecialBlobsCount(const BlobSpecialTextType type);
+  // Set the density value for a particular BlobSpecialTextType, should ONLY be
+  // used for debugging or testing. In production code, use
+  // ComputeSpecialBlobsDensity instead.
+  void SetSpecialBlobsDensity(
+      const BlobSpecialTextType type, const float density);
+  // Compute the SpecialTextType density of blobs, where we assume
+  // that the SpecialTextType in the boxes_ has been set.
+  void ComputeSpecialBlobsDensity();
+
+  // Add a partner above if upper, otherwise below.
+  // Add them uniquely and keep the list sorted by box left.
+  // Partnerships are added symmetrically to partner and this.
+  void AddPartner(bool upper, ColPartition* partner);
+  // Removes the partner from this, but does not remove this from partner.
+  // This asymmetric removal is so as not to mess up the iterator that is
+  // working on partner's partner list.
+  void RemovePartner(bool upper, ColPartition* partner);
+  // Returns the partner if the given partner is a singleton, otherwise nullptr.
+  ColPartition* SingletonPartner(bool upper);
+
+  // Merge with the other partition and delete it.
+  void Absorb(ColPartition* other, WidthCallback cb);
+
+  // Returns true if the overlap between this and the merged pair of
+  // merge candidates is sufficiently trivial to be allowed.
+  // The merged box can graze the edge of this by the ok_box_overlap
+  // if that exceeds the margin to the median top and bottom.
+  bool OKMergeOverlap(const ColPartition& merge1, const ColPartition& merge2,
+                      int ok_box_overlap, bool debug);
+
+  // Find the blob at which to split this to minimize the overlap with the
+  // given box. Returns the first blob to go in the second partition.
+  BLOBNBOX* OverlapSplitBlob(const TBOX& box);
+
+  // Split this partition keeping the first half in this and returning
+  // the second half.
+  // Splits by putting the split_blob and the blobs that follow
+  // in the second half, and the rest in the first half.
+  ColPartition* SplitAtBlob(BLOBNBOX* split_blob);
+
+  // Splits this partition at the given x coordinate, returning the right
+  // half and keeping the left half in this.
+  ColPartition* SplitAt(int split_x);
+
+  // Recalculates all the coordinate limits of the partition.
+  void ComputeLimits();
+
+  // Returns the number of boxes that overlap the given box.
+  int CountOverlappingBoxes(const TBOX& box);
+
+  // Computes and sets the type_, first_column_, last_column_ and column_set_.
+  // resolution refers to the ppi resolution of the image.
+  void SetPartitionType(int resolution, ColPartitionSet* columns);
+
+  // Returns the PartitionType from the current BlobRegionType and a column
+  // flow spanning type ColumnSpanningType, generated by
+  // ColPartitionSet::SpanningType, that indicates how the partition sits
+  // in the columns.
+  PolyBlockType PartitionType(ColumnSpanningType flow) const;
+
+  // Returns the first and last column touched by this partition.
+  // resolution refers to the ppi resolution of the image.
+  void ColumnRange(int resolution, ColPartitionSet* columns,
+                   int* first_col, int* last_col);
+
+  // Sets the internal flags good_width_ and good_column_.
+  void SetColumnGoodness(WidthCallback cb);
+
+  // Determines whether the blobs in this partition mostly represent
+  // a leader (fixed pitch sequence) and sets the member blobs accordingly.
+  // Note that height is assumed to have been tested elsewhere, and that this
+  // function will find most fixed-pitch text as leader without a height filter.
+  // Leader detection is limited to sequences of identical width objects,
+  // such as .... or ----, so patterns, such as .-.-.-.-. will not be found.
+  bool MarkAsLeaderIfMonospaced();
+  // Given the result of TextlineProjection::EvaluateColPartition, (positive for
+  // horizontal text, negative for vertical text, and near zero for non-text),
+  // sets the blob_type_ and flow_ for this partition to indicate whether it
+  // is strongly or weakly vertical or horizontal text, or non-text.
+  void SetRegionAndFlowTypesFromProjectionValue(int value);
+
+  // Sets all blobs with the partition blob type and flow, but never overwrite
+  // leader blobs, as we need to be able to identify them later.
+  void SetBlobTypes();
+
+  // Returns true if a decent baseline can be fitted through the blobs.
+  // Works for both horizontal and vertical text.
+  bool HasGoodBaseline();
+
+  // Adds this ColPartition to a matching WorkingPartSet if one can be found,
+  // otherwise starts a new one in the appropriate column, ending the previous.
+  void AddToWorkingSet(const ICOORD& bleft, const ICOORD& tright,
+                       int resolution, ColPartition_LIST* used_parts,
+                       WorkingPartSet_LIST* working_set);
+
+  // From the given block_parts list, builds one or more BLOCKs and
+  // corresponding TO_BLOCKs, such that the line spacing is uniform in each.
+  // Created blocks are appended to the end of completed_blocks and to_blocks.
+  // The used partitions are put onto used_parts, as they may still be referred
+  // to in the partition grid. bleft, tright and resolution are the bounds
+  // and resolution of the original image.
+  static void LineSpacingBlocks(const ICOORD& bleft, const ICOORD& tright,
+                                int resolution,
+                                ColPartition_LIST* block_parts,
+                                ColPartition_LIST* used_parts,
+                                BLOCK_LIST* completed_blocks,
+                                TO_BLOCK_LIST* to_blocks);
+  // Constructs a block from the given list of partitions.
+  // Arguments are as LineSpacingBlocks above.
+  static TO_BLOCK* MakeBlock(const ICOORD& bleft, const ICOORD& tright,
+                             ColPartition_LIST* block_parts,
+                             ColPartition_LIST* used_parts);
+
+  // Constructs a block from the given list of vertical text partitions.
+  // Currently only creates rectangular blocks.
+  static TO_BLOCK* MakeVerticalTextBlock(const ICOORD& bleft,
+                                         const ICOORD& tright,
+                                         ColPartition_LIST* block_parts,
+                                         ColPartition_LIST* used_parts);
+
+  // Makes a TO_ROW matching this and moves all the blobs to it, transferring
+  // ownership to to returned TO_ROW.
+  TO_ROW* MakeToRow();
+
+
+  // Returns a copy of everything except the list of boxes. The resulting
+  // ColPartition is only suitable for keeping in a column candidate list.
+  ColPartition* ShallowCopy() const;
+  // Returns a copy of everything with a shallow copy of the blobs.
+  // The blobs are still owned by their original parent, so they are
+  // treated as read-only.
+  ColPartition* CopyButDontOwnBlobs();
+
+  #ifndef GRAPHICS_DISABLED
+  // Provides a color for BBGrid to draw the rectangle.
+  ScrollView::Color  BoxColor() const;
+  #endif // !GRAPHICS_DISABLED
+
+  // Prints debug information on this.
+  void Print() const;
+  // Prints debug information on the colors.
+  void PrintColors();
+
+  // Sets the types of all partitions in the run to be the max of the types.
+  void SmoothPartnerRun(int working_set_count);
+
+  // Cleans up the partners of the given type so that there is at most
+  // one partner. This makes block creation simpler.
+  // If get_desperate is true, goes to more desperate merge methods
+  // to merge flowing text before breaking partnerships.
+  void RefinePartners(PolyBlockType type, bool get_desperate,
+                      ColPartitionGrid* grid);
+
+  // Returns true if this column partition is in the same column as
+  // part. This function will only work after the SetPartitionType function
+  // has been called on both column partitions. This is useful for
+  // doing a SideSearch when you want things in the same page column.
+  bool IsInSameColumnAs(const ColPartition& part) const;
+
+  // Sort function to sort by bounding box.
+  static int SortByBBox(const void* p1, const void* p2) {
+    const ColPartition* part1 = *static_cast<const ColPartition* const*>(p1);
+    const ColPartition* part2 = *static_cast<const ColPartition* const*>(p2);
+    int mid_y1 = part1->bounding_box_.y_middle();
+    int mid_y2 = part2->bounding_box_.y_middle();
+    if ((part2->bounding_box_.bottom() <= mid_y1 &&
+         mid_y1 <= part2->bounding_box_.top()) ||
+        (part1->bounding_box_.bottom() <= mid_y2 &&
+         mid_y2 <= part1->bounding_box_.top())) {
+      // Sort by increasing x.
+      return part1->bounding_box_.x_middle() - part2->bounding_box_.x_middle();
+    }
+    // Sort by decreasing y.
+    return mid_y2 - mid_y1;
+  }
+
+  // Sets the column bounds. Primarily used in testing.
+  void set_first_column(int column) {
+    first_column_ = column;
+  }
+  void set_last_column(int column) {
+    last_column_ = column;
+  }
+
+ private:
+  // Cleans up the partners above if upper is true, else below.
+  // If get_desperate is true, goes to more desperate merge methods
+  // to merge flowing text before breaking partnerships.
+  void RefinePartnersInternal(bool upper, bool get_desperate,
+                              ColPartitionGrid* grid);
+  // Restricts the partners to only desirable types. For text and BRT_HLINE this
+  // means the same type_ , and for image types it means any image type.
+  void RefinePartnersByType(bool upper, ColPartition_CLIST* partners);
+  // Remove transitive partnerships: this<->a, and a<->b and this<->b.
+  // Gets rid of this<->b, leaving a clean chain.
+  // Also if we have this<->a and a<->this, then gets rid of this<->a, as
+  // this has multiple partners.
+  void RefinePartnerShortcuts(bool upper, ColPartition_CLIST* partners);
+  // If multiple text partners can be merged, then do so.
+  // If desperate is true, then an increase in overlap with the merge is
+  // allowed. If the overlap increases, then the desperately_merged_ flag
+  // is set, indicating that the textlines probably need to be regenerated
+  // by aggressive line fitting/splitting, as there are probably vertically
+  // joined blobs that cross textlines.
+  void RefineTextPartnersByMerge(bool upper, bool desperate,
+                                 ColPartition_CLIST* partners,
+                                 ColPartitionGrid* grid);
+  // Keep the partner with the biggest overlap.
+  void RefinePartnersByOverlap(bool upper, ColPartition_CLIST* partners);
+
+  // Return true if bbox belongs better in this than other.
+  bool ThisPartitionBetter(BLOBNBOX* bbox, const ColPartition& other);
+
+  // Smoothes the spacings in the list into groups of equal linespacing.
+  // resolution is the resolution of the original image, used as a basis
+  // for thresholds in change of spacing. page_height is in pixels.
+  static void SmoothSpacings(int resolution, int page_height,
+                             ColPartition_LIST* parts);
+
+  // Returns true if the parts array of pointers to partitions matches the
+  // condition for a spacing blip. See SmoothSpacings for what this means
+  // and how it is used.
+  static bool OKSpacingBlip(int resolution, int median_spacing,
+                            ColPartition** parts, int offset);
+
+  // Returns true if both the top and bottom spacings of this match the given
+  // spacing to within suitable margins dictated by the image resolution.
+  bool SpacingEqual(int spacing, int resolution) const;
+
+  // Returns true if both the top and bottom spacings of this and other
+  // match to within suitable margins dictated by the image resolution.
+  bool SpacingsEqual(const ColPartition& other, int resolution) const;
+
+  // Returns true if the sum spacing of this and other match the given
+  // spacing (or twice the given spacing) to within a suitable margin dictated
+  // by the image resolution.
+  bool SummedSpacingOK(const ColPartition& other,
+                       int spacing, int resolution) const;
+
+  // Returns a suitable spacing margin that can be applied to bottoms of
+  // text lines, based on the resolution and the stored side_step_.
+  int BottomSpacingMargin(int resolution) const;
+
+  // Returns a suitable spacing margin that can be applied to tops of
+  // text lines, based on the resolution and the stored side_step_.
+  int TopSpacingMargin(int resolution) const;
+
+  // Returns true if the median text sizes of this and other agree to within
+  // a reasonable multiplicative factor.
+  bool SizesSimilar(const ColPartition& other) const;
+
+  // Computes and returns in start, end a line segment formed from a
+  // forwards-iterated group of left edges of partitions that satisfy the
+  // condition that the rightmost left margin is to the left of the
+  // leftmost left bounding box edge.
+  // TODO(rays) Not good enough. Needs improving to tightly wrap text in both
+  // directions, and to loosely wrap images.
+  static void LeftEdgeRun(ColPartition_IT* part_it,
+                          ICOORD* start, ICOORD* end);
+  // Computes and returns in start, end a line segment formed from a
+  // backwards-iterated group of right edges of partitions that satisfy the
+  // condition that the leftmost right margin is to the right of the
+  // rightmost right bounding box edge.
+  // TODO(rays) Not good enough. Needs improving to tightly wrap text in both
+  // directions, and to loosely wrap images.
+  static void RightEdgeRun(ColPartition_IT* part_it,
+                           ICOORD* start, ICOORD* end);
+
+  // The margins are determined by the position of the nearest vertically
+  // overlapping neighbour to the side. They indicate the maximum extent
+  // that the block/column may be extended without touching something else.
+  // Leftmost coordinate that the region may occupy over the y limits.
+  int left_margin_ = 0;
+  // Rightmost coordinate that the region may occupy over the y limits.
+  int right_margin_ = 0;
+  // Bounding box of all blobs in the partition.
+  TBOX bounding_box_;
+  // Median top and bottom of blobs in this partition.
+  int median_bottom_ = 0;
+  int median_top_ = 0;
+  // Median height of blobs in this partition.
+  int median_height_ = 0;
+  // Median left and right of blobs in this partition.
+  int median_left_ = 0;
+  int median_right_ = 0;
+  // Median width of blobs in this partition.
+  int median_width_ = 0;
+  // blob_region_type_ for the blobs in this partition.
+  BlobRegionType blob_type_ = BRT_UNKNOWN;
+  BlobTextFlowType flow_ = BTFT_NONE; // Quality of text flow.
+  // Total of GoodTextBlob results for all blobs in the partition.
+  int good_blob_score_ = 0;
+  // True if this partition has a common width.
+  bool good_width_ = false;
+  // True if this is a good column candidate.
+  bool good_column_ = false;
+  // True if the left_key_ is from a tab vector.
+  bool left_key_tab_ = false;
+  // True if the right_key_ is from a tab vector.
+  bool right_key_tab_ = false;
+  // Left and right sort keys for the edges of the partition.
+  // If the respective *_key_tab_ is true then this key came from a tab vector.
+  // If not, then the class promises to keep the key equal to the sort key
+  // for the respective edge of the bounding box at the MidY, so that
+  // LeftAtY and RightAtY always returns an x coordinate on the line parallel
+  // to vertical_ through the bounding box edge at MidY.
+  int left_key_ = 0;
+  int right_key_ = 0;
+  // Type of this partition after looking at its relation to the columns.
+  PolyBlockType type_ = PT_UNKNOWN;
+  // The global vertical skew direction.
+  ICOORD vertical_;
+  // All boxes in the partition stored in increasing left edge coordinate.
+  BLOBNBOX_CLIST boxes_;
+  // The partitions above that matched this.
+  ColPartition_CLIST upper_partners_;
+  // The partitions below that matched this.
+  ColPartition_CLIST lower_partners_;
+  // The WorkingPartSet it lives in while blocks are being made.
+  WorkingPartSet* working_set_ = nullptr;
+  // Column_set_ is the column layout applicable to this ColPartition.
+  ColPartitionSet* column_set_ = nullptr;
+  // Flag is true when AddBox is sorting vertically, false otherwise.
+  bool last_add_was_vertical_ = false;
+  // True when the partition's ownership has been taken from the grid and
+  // placed in a working set, or, after that, in the good_parts_ list.
+  bool block_owned_ = false;
+  // Flag to indicate that this partition was subjected to a desperate merge,
+  // and therefore the textlines need rebuilding.
+  bool desperately_merged_ = false;
+  bool owns_blobs_ = true; // Does the partition own its blobs?
+  // The first and last column that this partition applies to.
+  // Flowing partitions (see type_) will have an equal first and last value
+  // of the form 2n + 1, where n is the zero-based index into the partitions
+  // in column_set_. (See ColPartitionSet::GetColumnByIndex).
+  // Heading partitions will have unequal values of the same form.
+  // Pullout partitions will have equal values, but may have even values,
+  // indicating placement between columns.
+  int first_column_ = -1;
+  int last_column_ = -1;
+  // Linespacing data.
+  int side_step_ = 0;      // Median y-shift to next blob on same line.
+  int top_spacing_ = 0;    // Line spacing from median_top_.
+  int bottom_spacing_ = 0; // Line spacing from median_bottom_.
+
+  // Nearest neighbor above with major x-overlap
+  ColPartition* nearest_neighbor_above_ = nullptr;
+  // Nearest neighbor below with major x-overlap
+  ColPartition* nearest_neighbor_below_ = nullptr;
+  int space_above_ = 0;    // Distance from nearest_neighbor_above
+  int space_below_ = 0;    // Distance from nearest_neighbor_below
+  int space_to_left_ = 0;  // Distance from the left edge of the column
+  int space_to_right_ = 0; // Distance from the right edge of the column
+  // Color foreground/background data.
+  uint8_t color1_[kRGBRMSColors];
+  uint8_t color2_[kRGBRMSColors];
+  // The density of special blobs.
+  float special_blobs_densities_[BSTT_COUNT];
+  // Type of this partition before considering it as a table cell. This is
+  // used to revert the type if a partition is first marked as a table cell but
+  // later filtering steps decide it does not belong to a table
+  PolyBlockType type_before_table_ = PT_UNKNOWN;
+  // Check whether the current partition has been assigned to a table column.
+  bool inside_table_column_ = false;
+};
+
+// Typedef it now in case it becomes a class later.
+using ColPartitionGridSearch = GridSearch<ColPartition,
+                   ColPartition_CLIST,
+                   ColPartition_C_IT> ;
+
+}  // namespace tesseract.
+
+#endif  // TESSERACT_TEXTORD_COLPARTITION_H_
diff --git a/tesseract/src/textord/colpartitiongrid.cpp b/tesseract/src/textord/colpartitiongrid.cpp
new file mode 100644
index 00000000..fcf9b000
--- /dev/null
+++ b/tesseract/src/textord/colpartitiongrid.cpp
@@ -0,0 +1,1743 @@
+///////////////////////////////////////////////////////////////////////
+// File:        colpartitiongrid.cpp
+// Description: Class collecting code that acts on a BBGrid of ColPartitions.
+// Author:      Ray Smith
+//
+// (C) Copyright 2009, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "colpartitiongrid.h"
+#include "colpartitionset.h"
+#include "imagefind.h"
+
+#include <algorithm>
+
+namespace tesseract {
+
+// Max pad factor used to search the neighbourhood of a partition to smooth
+// partition types.
+const int kMaxPadFactor = 6;
+// Max multiple of size (min(height, width)) for the distance of the nearest
+// neighbour for the change of type to be used.
+const int kMaxNeighbourDistFactor = 4;
+// Maximum number of lines in a credible figure caption.
+const int kMaxCaptionLines = 7;
+// Min ratio between biggest and smallest gap to bound a caption.
+const double kMinCaptionGapRatio = 2.0;
+// Min ratio between biggest gap and mean line height to bound a caption.
+const double kMinCaptionGapHeightRatio = 0.5;
+// Min fraction of ColPartition height to be overlapping for margin purposes.
+const double kMarginOverlapFraction = 0.25;
+// Size ratio required to consider an unmerged overlapping partition to be big.
+const double kBigPartSizeRatio = 1.75;
+// Fraction of gridsize to allow arbitrary overlap between partitions.
+const double kTinyEnoughTextlineOverlapFraction = 0.25;
+// Max vertical distance of neighbouring ColPartition as a multiple of
+// partition height for it to be a partner.
+// TODO(rays) fix the problem that causes a larger number to not work well.
+// The value needs to be larger as sparse text blocks in a page that gets
+// marked as single column will not find adjacent lines as partners, and
+// will merge horizontally distant, but aligned lines. See rep.4B3 p5.
+// The value needs to be small because double-spaced legal docs written
+// in a single column, but justified courier have widely spaced lines
+// that need to get merged before they partner-up with the lines above
+// and below. See legal.3B5 p13/17. Neither of these should depend on
+// the value of kMaxPartitionSpacing to be successful, and ColPartition
+// merging needs attention to fix this problem.
+const double kMaxPartitionSpacing = 1.75;
+// Margin by which text has to beat image or vice-versa to make a firm
+// decision in GridSmoothNeighbour.
+const int kSmoothDecisionMargin = 4;
+
+ColPartitionGrid::ColPartitionGrid(int gridsize,
+                                   const ICOORD& bleft, const ICOORD& tright)
+  : BBGrid<ColPartition, ColPartition_CLIST, ColPartition_C_IT>(gridsize,
+                                                                bleft, tright) {
+}
+
+// Handles a click event in a display window.
+void ColPartitionGrid::HandleClick(int x, int y) {
+  BBGrid<ColPartition,
+         ColPartition_CLIST, ColPartition_C_IT>::HandleClick(x, y);
+  // Run a radial search for partitions that overlap.
+  ColPartitionGridSearch radsearch(this);
+  radsearch.SetUniqueMode(true);
+  radsearch.StartRadSearch(x, y, 1);
+  ColPartition* neighbour;
+  FCOORD click(x, y);
+  while ((neighbour = radsearch.NextRadSearch()) != nullptr) {
+    const TBOX& nbox = neighbour->bounding_box();
+    if (nbox.contains(click)) {
+      tprintf("Block box:");
+      neighbour->bounding_box().print();
+      neighbour->Print();
+    }
+  }
+}
+
+// Merges ColPartitions in the grid that look like they belong in the same
+// textline.
+// For all partitions in the grid, calls the box_cb permanent callback
+// to compute the search box, searches the box, and if a candidate is found,
+// calls the confirm_cb to check any more rules. If the confirm_cb returns
+// true, then the partitions are merged.
+// Both callbacks are deleted before returning.
+void ColPartitionGrid::Merges(
+    std::function<bool(ColPartition*, TBOX*)> box_cb,
+    std::function<bool(const ColPartition*, const ColPartition*)> confirm_cb) {
+  // Iterate the ColPartitions in the grid.
+  ColPartitionGridSearch gsearch(this);
+  gsearch.StartFullSearch();
+  ColPartition* part;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    if (MergePart(box_cb, confirm_cb, part))
+      gsearch.RepositionIterator();
+  }
+}
+
+// For the given partition, calls the box_cb permanent callback
+// to compute the search box, searches the box, and if a candidate is found,
+// calls the confirm_cb to check any more rules. If the confirm_cb returns
+// true, then the partitions are merged.
+// Returns true if the partition is consumed by one or more merges.
+bool ColPartitionGrid::MergePart(
+    std::function<bool(ColPartition*, TBOX*)> box_cb,
+    std::function<bool(const ColPartition*, const ColPartition*)> confirm_cb,
+    ColPartition* part) {
+  if (part->IsUnMergeableType())
+    return false;
+  bool any_done = false;
+  // Repeatedly merge part while we find a best merge candidate that works.
+  bool merge_done = false;
+  do {
+    merge_done = false;
+    TBOX box = part->bounding_box();
+    bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom());
+    if (debug) {
+      tprintf("Merge candidate:");
+      box.print();
+    }
+    // Set up a rectangle search bounded by the part.
+    if (!box_cb(part, &box))
+      continue;
+    // Create a list of merge candidates.
+    ColPartition_CLIST merge_candidates;
+    FindMergeCandidates(part, box, debug, &merge_candidates);
+    // Find the best merge candidate based on minimal overlap increase.
+    int overlap_increase;
+    ColPartition* neighbour = BestMergeCandidate(part, &merge_candidates, debug,
+                                                 confirm_cb,
+                                                 &overlap_increase);
+    if (neighbour != nullptr && overlap_increase <= 0) {
+      if (debug) {
+        tprintf("Merging:hoverlap=%d, voverlap=%d, OLI=%d\n",
+                part->HCoreOverlap(*neighbour), part->VCoreOverlap(*neighbour),
+                overlap_increase);
+      }
+      // Looks like a good candidate so merge it.
+      RemoveBBox(neighbour);
+      // We will modify the box of part, so remove it from the grid, merge
+      // it and then re-insert it into the grid.
+      RemoveBBox(part);
+      part->Absorb(neighbour, nullptr);
+      InsertBBox(true, true, part);
+      merge_done = true;
+      any_done = true;
+    } else if (neighbour != nullptr) {
+      if (debug) {
+        tprintf("Overlapped when merged with increase %d: ", overlap_increase);
+        neighbour->bounding_box().print();
+      }
+    } else if (debug) {
+      tprintf("No candidate neighbour returned\n");
+    }
+  } while (merge_done);
+  return any_done;
+}
+
+// Returns true if the given part and merge candidate might believably
+// be part of a single text line according to the default rules.
+// In general we only want to merge partitions that look like they
+// are on the same text line, ie their median limits overlap, but we have
+// to make exceptions for diacritics and stray punctuation.
+static bool OKMergeCandidate(const ColPartition* part,
+                             const ColPartition* candidate,
+                             bool debug) {
+  const TBOX& part_box = part->bounding_box();
+  if (candidate == part)
+    return false;  // Ignore itself.
+  if (!part->TypesMatch(*candidate) || candidate->IsUnMergeableType())
+    return false;  // Don't mix inappropriate types.
+
+  const TBOX& c_box = candidate->bounding_box();
+  if (debug) {
+    tprintf("Examining merge candidate:");
+    c_box.print();
+  }
+  // Candidates must be within a reasonable distance.
+  if (candidate->IsVerticalType() || part->IsVerticalType()) {
+    int h_dist = -part->HCoreOverlap(*candidate);
+    if (h_dist >= std::max(part_box.width(), c_box.width()) / 2) {
+      if (debug)
+        tprintf("Too far away: h_dist = %d\n", h_dist);
+      return false;
+    }
+  } else {
+    // Coarse filter by vertical distance between partitions.
+    int v_dist = -part->VCoreOverlap(*candidate);
+    if (v_dist >= std::max(part_box.height(), c_box.height()) / 2) {
+      if (debug)
+        tprintf("Too far away: v_dist = %d\n", v_dist);
+      return false;
+    }
+    // Candidates must either overlap in median y,
+    // or part or candidate must be an acceptable diacritic.
+    if (!part->VSignificantCoreOverlap(*candidate) &&
+        !part->OKDiacriticMerge(*candidate, debug) &&
+        !candidate->OKDiacriticMerge(*part, debug)) {
+      if (debug)
+        tprintf("Candidate fails overlap and diacritic tests!\n");
+      return false;
+    }
+  }
+  return true;
+}
+
+// Helper function to compute the increase in overlap of the parts list of
+// Colpartitions with the combination of merge1 and merge2, compared to
+// the overlap with them uncombined.
+// An overlap is not counted if passes the OKMergeOverlap test with ok_overlap
+// as the pixel overlap limit. merge1 and merge2 must both be non-nullptr.
+static int IncreaseInOverlap(const ColPartition* merge1,
+                             const ColPartition* merge2,
+                             int ok_overlap,
+                             ColPartition_CLIST* parts) {
+  ASSERT_HOST(merge1 != nullptr && merge2 != nullptr);
+  int total_area = 0;
+  ColPartition_C_IT it(parts);
+  TBOX merged_box(merge1->bounding_box());
+  merged_box += merge2->bounding_box();
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    ColPartition* part = it.data();
+    if (part == merge1 || part == merge2)
+      continue;
+    TBOX part_box = part->bounding_box();
+    // Compute the overlap of the merged box with part.
+    int overlap_area = part_box.intersection(merged_box).area();
+    if (overlap_area > 0 && !part->OKMergeOverlap(*merge1, *merge2,
+                                                  ok_overlap, false)) {
+      total_area += overlap_area;
+      // Subtract the overlap of merge1 and merge2 individually.
+      overlap_area = part_box.intersection(merge1->bounding_box()).area();
+      if (overlap_area > 0)
+        total_area -= overlap_area;
+      TBOX intersection_box = part_box.intersection(merge2->bounding_box());
+      overlap_area = intersection_box.area();
+      if (overlap_area > 0) {
+        total_area -= overlap_area;
+        // Add back the 3-way area.
+        intersection_box &= merge1->bounding_box();  // In-place intersection.
+        overlap_area = intersection_box.area();
+        if (overlap_area > 0)
+          total_area += overlap_area;
+      }
+    }
+  }
+  return total_area;
+}
+
+// Helper function to test that each partition in candidates is either a
+// good diacritic merge with part or an OK merge candidate with all others
+// in the candidates list.
+// ASCII Art Scenario:
+// We sometimes get text such as "join-this" where the - is actually a long
+// dash culled from a standard set of extra characters that don't match the
+// font of the text. This makes its strokewidth not match and forms a broken
+// set of 3 partitions for "join", "-" and "this" and the dash may slightly
+// overlap BOTH words.
+// -------  -------
+// |     ====     |
+// -------  -------
+// The standard merge rule: "you can merge 2 partitions as long as there is
+// no increase in overlap elsewhere" fails miserably here. Merge any pair
+// of partitions and the combined box overlaps more with the third than
+// before. To allow the merge, we need to consider whether it is safe to
+// merge everything, without merging separate text lines. For that we need
+// everything to be an OKMergeCandidate (which is supposed to prevent
+// separate text lines merging), but this is hard for diacritics to satisfy,
+// so an alternative to being OKMergeCandidate with everything is to be an
+// OKDiacriticMerge with part as the base character.
+static bool TestCompatibleCandidates(const ColPartition& part, bool debug,
+                                     ColPartition_CLIST* candidates) {
+  ColPartition_C_IT it(candidates);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    ColPartition* candidate = it.data();
+    if (!candidate->OKDiacriticMerge(part, false)) {
+      ColPartition_C_IT it2(it);
+      for (it2.mark_cycle_pt(); !it2.cycled_list(); it2.forward()) {
+        ColPartition* candidate2 = it2.data();
+        if (candidate2 != candidate &&
+            !OKMergeCandidate(candidate, candidate2, false)) {
+          if (debug) {
+            tprintf("NC overlap failed:Candidate:");
+            candidate2->bounding_box().print();
+            tprintf("fails to be a good merge with:");
+            candidate->bounding_box().print();
+          }
+          return false;
+        }
+      }
+    }
+  }
+  return true;
+}
+
+// Computes and returns the total overlap of all partitions in the grid.
+// If overlap_grid is non-null, it is filled with a grid that holds empty
+// partitions representing the union of all overlapped partitions.
+int ColPartitionGrid::ComputeTotalOverlap(ColPartitionGrid** overlap_grid) {
+  int total_overlap = 0;
+  // Iterate the ColPartitions in the grid.
+  ColPartitionGridSearch gsearch(this);
+  gsearch.StartFullSearch();
+  ColPartition* part;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    ColPartition_CLIST neighbors;
+    const TBOX& part_box = part->bounding_box();
+    FindOverlappingPartitions(part_box, part, &neighbors);
+    ColPartition_C_IT n_it(&neighbors);
+    bool any_part_overlap = false;
+    for (n_it.mark_cycle_pt(); !n_it.cycled_list(); n_it.forward()) {
+      const TBOX& n_box = n_it.data()->bounding_box();
+      int overlap = n_box.intersection(part_box).area();
+      if (overlap > 0 && overlap_grid != nullptr) {
+        if (*overlap_grid == nullptr) {
+          *overlap_grid = new ColPartitionGrid(gridsize(), bleft(), tright());
+        }
+        (*overlap_grid)->InsertBBox(true, true, n_it.data()->ShallowCopy());
+        if (!any_part_overlap) {
+          (*overlap_grid)->InsertBBox(true, true, part->ShallowCopy());
+        }
+      }
+      any_part_overlap = true;
+      total_overlap += overlap;
+    }
+  }
+  return total_overlap;
+}
+
+// Finds all the ColPartitions in the grid that overlap with the given
+// box and returns them SortByBoxLeft(ed) and uniqued in the given list.
+// Any partition equal to not_this (may be nullptr) is excluded.
+void ColPartitionGrid::FindOverlappingPartitions(const TBOX& box,
+                                                 const ColPartition* not_this,
+                                                 ColPartition_CLIST* parts) {
+  ColPartitionGridSearch rsearch(this);
+  rsearch.StartRectSearch(box);
+  ColPartition* part;
+  while ((part = rsearch.NextRectSearch()) != nullptr) {
+    if (part != not_this)
+      parts->add_sorted(SortByBoxLeft<ColPartition>, true, part);
+  }
+}
+
+// Finds and returns the best candidate ColPartition to merge with part,
+// selected from the candidates list, based on the minimum increase in
+// pairwise overlap among all the partitions overlapped by the combined box.
+// If overlap_increase is not nullptr then it returns the increase in overlap
+// that would result from the merge.
+// confirm_cb is a permanent callback that (if non-null) will be used to
+// confirm the validity of a proposed merge candidate before selecting it.
+//
+// ======HOW MERGING WORKS======
+// The problem:
+// We want to merge all the parts of a textline together, but avoid merging
+// separate textlines. Diacritics, i dots, punctuation, and broken characters
+// are examples of small bits that need merging with the main textline.
+// Drop-caps and descenders in one line that touch ascenders in the one below
+// are examples of cases where we don't want to merge.
+//
+// The solution:
+// Merges that increase overlap among other partitions are generally bad.
+// Those that don't increase overlap (much) and minimize the total area
+// seem to be good.
+//
+// Ascii art example:
+// The text:
+// groggy descenders
+// minimum ascenders
+// The boxes: The === represents a small box near or overlapping the lower box.
+// -----------------
+// |               |
+// -----------------
+// -===-------------
+// |               |
+// -----------------
+// In considering what to do with the small === box, we find the 2 larger
+// boxes as neighbours and possible merge candidates, but merging with the
+// upper box increases overlap with the lower box, whereas merging with the
+// lower box does not increase overlap.
+// If the small === box didn't overlap either to start with, total area
+// would be minimized by merging with the nearer (lower) box.
+//
+// This is a simple example. In reality, we have to allow some increase
+// in overlap, or tightly spaced text would end up in bits.
+ColPartition* ColPartitionGrid::BestMergeCandidate(
+    const ColPartition* part, ColPartition_CLIST* candidates, bool debug,
+    std::function<bool(const ColPartition*, const ColPartition*)> confirm_cb,
+    int* overlap_increase) {
+  if (overlap_increase != nullptr)
+    *overlap_increase = 0;
+  if (candidates->empty())
+    return nullptr;
+  int ok_overlap =
+      static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5);
+  // The best neighbour to merge with is the one that causes least
+  // total pairwise overlap among all the neighbours.
+  // If more than one offers the same total overlap, choose the one
+  // with the least total area.
+  const TBOX& part_box = part->bounding_box();
+  ColPartition_C_IT it(candidates);
+  ColPartition* best_candidate = nullptr;
+  // Find the total combined box of all candidates and the original.
+  TBOX full_box(part_box);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    ColPartition* candidate = it.data();
+    full_box += candidate->bounding_box();
+  }
+  // Keep valid neighbours in a list.
+  ColPartition_CLIST neighbours;
+  // Now run a rect search of the merged box for overlapping neighbours, as
+  // we need anything that might be overlapped by the merged box.
+  FindOverlappingPartitions(full_box, part, &neighbours);
+  if (debug) {
+    tprintf("Finding best merge candidate from %d, %d neighbours for box:",
+            candidates->length(), neighbours.length());
+    part_box.print();
+  }
+  // If the best increase in overlap is positive, then we also check the
+  // worst non-candidate overlap. This catches the case of multiple good
+  // candidates that overlap each other when merged. If the worst
+  // non-candidate overlap is better than the best overlap, then return
+  // the worst non-candidate overlap instead.
+  ColPartition_CLIST non_candidate_neighbours;
+  non_candidate_neighbours.set_subtract(SortByBoxLeft<ColPartition>, true,
+                                        &neighbours, candidates);
+  int worst_nc_increase = 0;
+  int best_increase = INT32_MAX;
+  int best_area = 0;
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    ColPartition* candidate = it.data();
+    if (confirm_cb != nullptr && !confirm_cb(part, candidate)) {
+      if (debug) {
+        tprintf("Candidate not confirmed:");
+        candidate->bounding_box().print();
+      }
+      continue;
+    }
+    int increase = IncreaseInOverlap(part, candidate, ok_overlap, &neighbours);
+    const TBOX& cand_box = candidate->bounding_box();
+    if (best_candidate == nullptr || increase < best_increase) {
+      best_candidate = candidate;
+      best_increase = increase;
+      best_area = cand_box.bounding_union(part_box).area() - cand_box.area();
+      if (debug) {
+        tprintf("New best merge candidate has increase %d, area %d, over box:",
+                increase, best_area);
+        full_box.print();
+        candidate->Print();
+      }
+    } else if (increase == best_increase) {
+      int area = cand_box.bounding_union(part_box).area() - cand_box.area();
+      if (area < best_area) {
+        best_area = area;
+        best_candidate = candidate;
+      }
+    }
+    increase = IncreaseInOverlap(part, candidate, ok_overlap,
+                                 &non_candidate_neighbours);
+    if (increase > worst_nc_increase)
+      worst_nc_increase = increase;
+  }
+  if (best_increase > 0) {
+    // If the worst non-candidate increase is less than the best increase
+    // including the candidates, then all the candidates can merge together
+    // and the increase in outside overlap would be less, so use that result,
+    // but only if each candidate is either a good diacritic merge with part,
+    // or an ok merge candidate with all the others.
+    // See TestCompatibleCandidates for more explanation and a picture.
+    if (worst_nc_increase < best_increase &&
+        TestCompatibleCandidates(*part, debug, candidates)) {
+      best_increase = worst_nc_increase;
+    }
+  }
+  if (overlap_increase != nullptr)
+    *overlap_increase = best_increase;
+  return best_candidate;
+}
+
+// Helper to remove the given box from the given partition, put it in its
+// own partition, and add to the partition list.
+static void RemoveBadBox(BLOBNBOX* box, ColPartition* part,
+                         ColPartition_LIST* part_list) {
+  part->RemoveBox(box);
+  ColPartition::MakeBigPartition(box, part_list);
+}
+
+
+// Split partitions where it reduces overlap between their bounding boxes.
+// ColPartitions are after all supposed to be a partitioning of the blobs
+// AND of the space on the page!
+// Blobs that cause overlaps get removed, put in individual partitions
+// and added to the big_parts list. They are most likely characters on
+// 2 textlines that touch, or something big like a dropcap.
+void ColPartitionGrid::SplitOverlappingPartitions(
+    ColPartition_LIST* big_parts) {
+  int ok_overlap =
+      static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5);
+  // Iterate the ColPartitions in the grid.
+  ColPartitionGridSearch gsearch(this);
+  gsearch.StartFullSearch();
+  ColPartition* part;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    // Set up a rectangle search bounded by the part.
+    const TBOX& box = part->bounding_box();
+    ColPartitionGridSearch rsearch(this);
+    rsearch.SetUniqueMode(true);
+    rsearch.StartRectSearch(box);
+    int unresolved_overlaps = 0;
+
+    ColPartition* neighbour;
+    while ((neighbour = rsearch.NextRectSearch()) != nullptr) {
+      if (neighbour == part)
+        continue;
+      const TBOX& neighbour_box = neighbour->bounding_box();
+      if (neighbour->OKMergeOverlap(*part, *part, ok_overlap, false) &&
+          part->OKMergeOverlap(*neighbour, *neighbour, ok_overlap, false))
+        continue;  // The overlap is OK both ways.
+
+      // If removal of the biggest box from either partition eliminates the
+      // overlap, and it is much bigger than the box left behind, then
+      // it is either a drop-cap, an inter-line join, or some junk that
+      // we don't want anyway, so put it in the big_parts list.
+      if (!part->IsSingleton()) {
+        BLOBNBOX* excluded = part->BiggestBox();
+        TBOX shrunken = part->BoundsWithoutBox(excluded);
+        if (!shrunken.overlap(neighbour_box) &&
+            excluded->bounding_box().height() >
+              kBigPartSizeRatio * shrunken.height()) {
+          // Removing the biggest box fixes the overlap, so do it!
+          gsearch.RemoveBBox();
+          RemoveBadBox(excluded, part, big_parts);
+          InsertBBox(true, true, part);
+          gsearch.RepositionIterator();
+          break;
+        }
+      } else if (box.contains(neighbour_box)) {
+        ++unresolved_overlaps;
+        continue;  // No amount of splitting will fix it.
+      }
+      if (!neighbour->IsSingleton()) {
+        BLOBNBOX* excluded = neighbour->BiggestBox();
+        TBOX shrunken = neighbour->BoundsWithoutBox(excluded);
+        if (!shrunken.overlap(box) &&
+            excluded->bounding_box().height() >
+              kBigPartSizeRatio * shrunken.height()) {
+          // Removing the biggest box fixes the overlap, so do it!
+          rsearch.RemoveBBox();
+          RemoveBadBox(excluded, neighbour, big_parts);
+          InsertBBox(true, true, neighbour);
+          gsearch.RepositionIterator();
+          break;
+        }
+      }
+      int part_overlap_count = part->CountOverlappingBoxes(neighbour_box);
+      int neighbour_overlap_count = neighbour->CountOverlappingBoxes(box);
+      ColPartition* right_part = nullptr;
+      if (neighbour_overlap_count <= part_overlap_count ||
+          part->IsSingleton()) {
+        // Try to split the neighbour to reduce overlap.
+        BLOBNBOX* split_blob = neighbour->OverlapSplitBlob(box);
+        if (split_blob != nullptr) {
+          rsearch.RemoveBBox();
+          right_part = neighbour->SplitAtBlob(split_blob);
+          InsertBBox(true, true, neighbour);
+          ASSERT_HOST(right_part != nullptr);
+        }
+      } else {
+        // Try to split part to reduce overlap.
+        BLOBNBOX* split_blob = part->OverlapSplitBlob(neighbour_box);
+        if (split_blob != nullptr) {
+          gsearch.RemoveBBox();
+          right_part = part->SplitAtBlob(split_blob);
+          InsertBBox(true, true, part);
+          ASSERT_HOST(right_part != nullptr);
+        }
+      }
+      if (right_part != nullptr) {
+        InsertBBox(true, true, right_part);
+        gsearch.RepositionIterator();
+        rsearch.RepositionIterator();
+        break;
+      }
+    }
+    if (unresolved_overlaps > 2 && part->IsSingleton()) {
+      // This part is no good so just add to big_parts.
+      RemoveBBox(part);
+      ColPartition_IT big_it(big_parts);
+      part->set_block_owned(true);
+      big_it.add_to_end(part);
+      gsearch.RepositionIterator();
+    }
+  }
+}
+
+// Filters partitions of source_type by looking at local neighbours.
+// Where a majority of neighbours have a text type, the partitions are
+// changed to text, where the neighbours have image type, they are changed
+// to image, and partitions that have no definite neighbourhood type are
+// left unchanged.
+// im_box and rerotation are used to map blob coordinates onto the
+// nontext_map, which is used to prevent the spread of text neighbourhoods
+// into images.
+// Returns true if anything was changed.
+bool ColPartitionGrid::GridSmoothNeighbours(BlobTextFlowType source_type,
+                                            Pix* nontext_map,
+                                            const TBOX& im_box,
+                                            const FCOORD& rotation) {
+  // Iterate the ColPartitions in the grid.
+  ColPartitionGridSearch gsearch(this);
+  gsearch.StartFullSearch();
+  ColPartition* part;
+  bool any_changed = false;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    if (part->flow() != source_type || BLOBNBOX::IsLineType(part->blob_type()))
+      continue;
+    const TBOX& box = part->bounding_box();
+    bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom());
+    if (SmoothRegionType(nontext_map, im_box, rotation, debug, part))
+      any_changed = true;
+  }
+  return any_changed;
+}
+
+// Reflects the grid and its colpartitions in the y-axis, assuming that
+// all blob boxes have already been done.
+void ColPartitionGrid::ReflectInYAxis() {
+  ColPartition_LIST parts;
+  ColPartition_IT part_it(&parts);
+  // Iterate the ColPartitions in the grid to extract them.
+  ColPartitionGridSearch gsearch(this);
+  gsearch.StartFullSearch();
+  ColPartition* part;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    part_it.add_after_then_move(part);
+  }
+  ICOORD bot_left(-tright().x(), bleft().y());
+  ICOORD top_right(-bleft().x(), tright().y());
+  // Reinitializing the grid with reflected coords also clears all the
+  // pointers, so parts will now own the ColPartitions. (Briefly).
+  Init(gridsize(), bot_left, top_right);
+  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
+    part = part_it.extract();
+    part->ReflectInYAxis();
+    InsertBBox(true, true, part);
+  }
+}
+
+// Transforms the grid of partitions to the output blocks, putting each
+// partition into a separate block. We don't really care about the order,
+// as we just want to get as much text as possible without trying to organize
+// it into proper blocks or columns.
+// TODO(rays) some kind of sort function would be useful and probably better
+// than the default here, which is to sort by order of the grid search.
+void ColPartitionGrid::ExtractPartitionsAsBlocks(BLOCK_LIST* blocks,
+                                                 TO_BLOCK_LIST* to_blocks) {
+  TO_BLOCK_IT to_block_it(to_blocks);
+  BLOCK_IT block_it(blocks);
+  // All partitions will be put on this list and deleted on return.
+  ColPartition_LIST parts;
+  ColPartition_IT part_it(&parts);
+  // Iterate the ColPartitions in the grid to extract them.
+  ColPartitionGridSearch gsearch(this);
+  gsearch.StartFullSearch();
+  ColPartition* part;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    part_it.add_after_then_move(part);
+    // The partition has to be at least vaguely like text.
+    BlobRegionType blob_type = part->blob_type();
+    if (BLOBNBOX::IsTextType(blob_type) ||
+        (blob_type == BRT_UNKNOWN && part->boxes_count() > 1)) {
+      PolyBlockType type = blob_type == BRT_VERT_TEXT ? PT_VERTICAL_TEXT
+                                                      : PT_FLOWING_TEXT;
+      // Get metrics from the row that will be used for the block.
+      TBOX box = part->bounding_box();
+      int median_width = part->median_width();
+      int median_height = part->median_height();
+      // Turn the partition into a TO_ROW.
+      TO_ROW* row = part->MakeToRow();
+      if (row == nullptr) {
+        // This partition is dead.
+        part->DeleteBoxes();
+        continue;
+      }
+      auto* block = new BLOCK("", true, 0, 0, box.left(), box.bottom(),
+                               box.right(), box.top());
+      block->pdblk.set_poly_block(new POLY_BLOCK(box, type));
+      auto* to_block = new TO_BLOCK(block);
+      TO_ROW_IT row_it(to_block->get_rows());
+      row_it.add_after_then_move(row);
+      // We haven't differentially rotated vertical and horizontal text at
+      // this point, so use width or height as appropriate.
+      if (blob_type == BRT_VERT_TEXT) {
+        to_block->line_size = static_cast<float>(median_width);
+        to_block->line_spacing = static_cast<float>(box.width());
+        to_block->max_blob_size = static_cast<float>(box.width() + 1);
+      } else {
+        to_block->line_size = static_cast<float>(median_height);
+        to_block->line_spacing = static_cast<float>(box.height());
+        to_block->max_blob_size = static_cast<float>(box.height() + 1);
+      }
+      if (to_block->line_size == 0) to_block->line_size = 1;
+      block_it.add_to_end(block);
+      to_block_it.add_to_end(to_block);
+    } else {
+      // This partition is dead.
+      part->DeleteBoxes();
+    }
+  }
+  Clear();
+  // Now it is safe to delete the ColPartitions as parts goes out of scope.
+}
+
+// Rotates the grid and its colpartitions by the given angle, assuming that
+// all blob boxes have already been done.
+void ColPartitionGrid::Deskew(const FCOORD& deskew) {
+  ColPartition_LIST parts;
+  ColPartition_IT part_it(&parts);
+  // Iterate the ColPartitions in the grid to extract them.
+  ColPartitionGridSearch gsearch(this);
+  gsearch.StartFullSearch();
+  ColPartition* part;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    part_it.add_after_then_move(part);
+  }
+  // Rebuild the grid to the new size.
+  TBOX grid_box(bleft_, tright_);
+  grid_box.rotate_large(deskew);
+  Init(gridsize(), grid_box.botleft(), grid_box.topright());
+  // Reinitializing the grid with rotated coords also clears all the
+  // pointers, so parts will now own the ColPartitions. (Briefly).
+  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
+    part = part_it.extract();
+    part->ComputeLimits();
+    InsertBBox(true, true, part);
+  }
+}
+
+// Sets the left and right tabs of the partitions in the grid.
+void ColPartitionGrid::SetTabStops(TabFind* tabgrid) {
+  // Iterate the ColPartitions in the grid.
+  ColPartitionGridSearch gsearch(this);
+  gsearch.StartFullSearch();
+  ColPartition* part;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    const TBOX& part_box = part->bounding_box();
+    TabVector* left_line = tabgrid->LeftTabForBox(part_box, true, false);
+    // If the overlapping line is not a left tab, try for non-overlapping.
+    if (left_line != nullptr && !left_line->IsLeftTab())
+      left_line = tabgrid->LeftTabForBox(part_box, false, false);
+    if (left_line != nullptr && left_line->IsLeftTab())
+      part->SetLeftTab(left_line);
+    TabVector* right_line = tabgrid->RightTabForBox(part_box, true, false);
+    if (right_line != nullptr && !right_line->IsRightTab())
+      right_line = tabgrid->RightTabForBox(part_box, false, false);
+    if (right_line != nullptr && right_line->IsRightTab())
+      part->SetRightTab(right_line);
+    part->SetColumnGoodness(tabgrid->WidthCB());
+  }
+}
+
+// Makes the ColPartSets and puts them in the PartSetVector ready
+// for finding column bounds. Returns false if no partitions were found.
+bool ColPartitionGrid::MakeColPartSets(PartSetVector* part_sets) {
+  auto* part_lists = new ColPartition_LIST[gridheight()];
+  part_sets->reserve(gridheight());
+  // Iterate the ColPartitions in the grid to get parts onto lists for the
+  // y bottom of each.
+  ColPartitionGridSearch gsearch(this);
+  gsearch.StartFullSearch();
+  ColPartition* part;
+  bool any_parts_found = false;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    BlobRegionType blob_type = part->blob_type();
+    if (blob_type != BRT_NOISE &&
+        (blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) {
+      int grid_x, grid_y;
+      const TBOX& part_box = part->bounding_box();
+      GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y);
+      ColPartition_IT part_it(&part_lists[grid_y]);
+      part_it.add_to_end(part);
+      any_parts_found = true;
+    }
+  }
+  if (any_parts_found) {
+    for (int grid_y = 0; grid_y < gridheight(); ++grid_y) {
+      ColPartitionSet* line_set = nullptr;
+      if (!part_lists[grid_y].empty()) {
+        line_set = new ColPartitionSet(&part_lists[grid_y]);
+      }
+      part_sets->push_back(line_set);
+    }
+  }
+  delete [] part_lists;
+  return any_parts_found;
+}
+
+// Makes a single ColPartitionSet consisting of a single ColPartition that
+// represents the total horizontal extent of the significant content on the
+// page. Used for the single column setting in place of automatic detection.
+// Returns nullptr if the page is empty of significant content.
+ColPartitionSet* ColPartitionGrid::MakeSingleColumnSet(WidthCallback cb) {
+  ColPartition* single_column_part = nullptr;
+  // Iterate the ColPartitions in the grid to get parts onto lists for the
+  // y bottom of each.
+  ColPartitionGridSearch gsearch(this);
+  gsearch.StartFullSearch();
+  ColPartition* part;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    BlobRegionType blob_type = part->blob_type();
+    if (blob_type != BRT_NOISE &&
+        (blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) {
+      // Consider for single column.
+      BlobTextFlowType flow = part->flow();
+      if ((blob_type == BRT_TEXT &&
+          (flow == BTFT_STRONG_CHAIN || flow == BTFT_CHAIN ||
+           flow == BTFT_LEADER || flow == BTFT_TEXT_ON_IMAGE)) ||
+          blob_type == BRT_RECTIMAGE || blob_type == BRT_POLYIMAGE) {
+        if (single_column_part == nullptr) {
+          single_column_part = part->ShallowCopy();
+          single_column_part->set_blob_type(BRT_TEXT);
+          // Copy the tabs from itself to properly setup the margins.
+          single_column_part->CopyLeftTab(*single_column_part, false);
+          single_column_part->CopyRightTab(*single_column_part, false);
+        } else {
+          if (part->left_key() < single_column_part->left_key())
+            single_column_part->CopyLeftTab(*part, false);
+          if (part->right_key() > single_column_part->right_key())
+            single_column_part->CopyRightTab(*part, false);
+        }
+      }
+    }
+  }
+  if (single_column_part != nullptr) {
+    // Make a ColPartitionSet out of the single_column_part as a candidate
+    // for the single column case.
+    single_column_part->SetColumnGoodness(cb);
+    return new ColPartitionSet(single_column_part);
+  }
+  return nullptr;
+}
+
+// Mark the BLOBNBOXes in each partition as being owned by that partition.
+void ColPartitionGrid::ClaimBoxes() {
+  // Iterate the ColPartitions in the grid.
+  ColPartitionGridSearch gsearch(this);
+  gsearch.StartFullSearch();
+  ColPartition* part;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    part->ClaimBoxes();
+  }
+}
+
+// Retypes all the blobs referenced by the partitions in the grid.
+// Image blobs are found and returned in the im_blobs list, as they are not
+// owned by the block.
+void ColPartitionGrid::ReTypeBlobs(BLOBNBOX_LIST* im_blobs) {
+  BLOBNBOX_IT im_blob_it(im_blobs);
+  ColPartition_LIST dead_parts;
+  ColPartition_IT dead_part_it(&dead_parts);
+  // Iterate the ColPartitions in the grid.
+  ColPartitionGridSearch gsearch(this);
+  gsearch.StartFullSearch();
+  ColPartition* part;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    BlobRegionType blob_type = part->blob_type();
+    BlobTextFlowType flow = part->flow();
+    bool any_blobs_moved = false;
+    if (blob_type == BRT_POLYIMAGE || blob_type == BRT_RECTIMAGE) {
+      BLOBNBOX_C_IT blob_it(part->boxes());
+      for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+        BLOBNBOX* blob = blob_it.data();
+        im_blob_it.add_after_then_move(blob);
+      }
+    } else if (blob_type != BRT_NOISE) {
+      // Make sure the blobs are marked with the correct type and flow.
+      BLOBNBOX_C_IT blob_it(part->boxes());
+      for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+        BLOBNBOX* blob = blob_it.data();
+        if (blob->region_type() == BRT_NOISE) {
+          // TODO(rays) Deprecated. Change this section to an assert to verify
+          // and then delete.
+          ASSERT_HOST(blob->cblob()->area() != 0);
+          blob->set_owner(nullptr);
+          blob_it.extract();
+          any_blobs_moved = true;
+        } else {
+          blob->set_region_type(blob_type);
+          if (blob->flow() != BTFT_LEADER)
+            blob->set_flow(flow);
+        }
+      }
+    }
+    if (blob_type == BRT_NOISE || part->boxes()->empty()) {
+      BLOBNBOX_C_IT blob_it(part->boxes());
+      part->DisownBoxes();
+      dead_part_it.add_to_end(part);
+      gsearch.RemoveBBox();
+      for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+        BLOBNBOX* blob = blob_it.data();
+        if (blob->cblob()->area() == 0) {
+          // Any blob with zero area is a fake image blob and should be deleted.
+          delete blob->cblob();
+          delete blob;
+        }
+      }
+    } else if (any_blobs_moved) {
+      gsearch.RemoveBBox();
+      part->ComputeLimits();
+      InsertBBox(true, true, part);
+      gsearch.RepositionIterator();
+    }
+  }
+}
+
+// The boxes within the partitions have changed (by deskew) so recompute
+// the bounds of all the partitions and reinsert them into the grid.
+void ColPartitionGrid::RecomputeBounds(int gridsize,
+                                       const ICOORD& bleft,
+                                       const ICOORD& tright,
+                                       const ICOORD& vertical) {
+  ColPartition_LIST saved_parts;
+  ColPartition_IT part_it(&saved_parts);
+  // Iterate the ColPartitions in the grid to get parts onto a list.
+  ColPartitionGridSearch gsearch(this);
+  gsearch.StartFullSearch();
+  ColPartition* part;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    part_it.add_to_end(part);
+  }
+  // Reinitialize grid to the new size.
+  Init(gridsize, bleft, tright);
+  // Recompute the bounds of the parts and put them back in the new grid.
+  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
+    part = part_it.extract();
+    part->set_vertical(vertical);
+    part->ComputeLimits();
+    InsertBBox(true, true, part);
+  }
+}
+
+// Improves the margins of the ColPartitions in the grid by calling
+// FindPartitionMargins on each.
+// best_columns, which may be nullptr, is an array of pointers indicating the
+// column set at each y-coordinate in the grid.
+// best_columns is usually the best_columns_ member of ColumnFinder.
+void ColPartitionGrid::GridFindMargins(ColPartitionSet** best_columns) {
+  // Iterate the ColPartitions in the grid.
+  ColPartitionGridSearch gsearch(this);
+  gsearch.StartFullSearch();
+  ColPartition* part;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    // Set up a rectangle search x-bounded by the column and y by the part.
+    ColPartitionSet* columns = best_columns != nullptr
+                             ? best_columns[gsearch.GridY()]
+                             : nullptr;
+    FindPartitionMargins(columns, part);
+    const TBOX& box = part->bounding_box();
+    if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom())) {
+      tprintf("Computed margins for part:");
+      part->Print();
+    }
+  }
+}
+
+// Improves the margins of the ColPartitions in the list by calling
+// FindPartitionMargins on each.
+// best_columns, which may be nullptr, is an array of pointers indicating the
+// column set at each y-coordinate in the grid.
+// best_columns is usually the best_columns_ member of ColumnFinder.
+void ColPartitionGrid::ListFindMargins(ColPartitionSet** best_columns,
+                                       ColPartition_LIST* parts) {
+  ColPartition_IT part_it(parts);
+  for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) {
+    ColPartition* part = part_it.data();
+    ColPartitionSet* columns = nullptr;
+    if (best_columns != nullptr) {
+      const TBOX& part_box = part->bounding_box();
+      // Get the columns from the y grid coord.
+      int grid_x, grid_y;
+      GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y);
+      columns = best_columns[grid_y];
+    }
+    FindPartitionMargins(columns, part);
+  }
+}
+
+// Deletes all the partitions in the grid after disowning all the blobs.
+void ColPartitionGrid::DeleteParts() {
+  ColPartition_LIST dead_parts;
+  ColPartition_IT dead_it(&dead_parts);
+  ColPartitionGridSearch gsearch(this);
+  gsearch.StartFullSearch();
+  ColPartition* part;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    part->DisownBoxes();
+    dead_it.add_to_end(part);  // Parts will be deleted on return.
+  }
+  Clear();
+}
+
+// Deletes all the partitions in the grid that are of type BRT_UNKNOWN and
+// all the blobs in them.
+void ColPartitionGrid::DeleteUnknownParts(TO_BLOCK* block) {
+  ColPartitionGridSearch gsearch(this);
+  gsearch.StartFullSearch();
+  ColPartition* part;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    if (part->blob_type() == BRT_UNKNOWN) {
+      gsearch.RemoveBBox();
+      // Once marked, the blobs will be swept up by DeleteUnownedNoise.
+      part->set_flow(BTFT_NONTEXT);
+      part->set_blob_type(BRT_NOISE);
+      part->SetBlobTypes();
+      part->DisownBoxes();
+      delete part;
+    }
+  }
+  block->DeleteUnownedNoise();
+}
+
+// Deletes all the partitions in the grid that are NOT of flow type BTFT_LEADER.
+void ColPartitionGrid::DeleteNonLeaderParts() {
+  ColPartitionGridSearch gsearch(this);
+  gsearch.StartFullSearch();
+  ColPartition* part;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    if (part->flow() != BTFT_LEADER) {
+      gsearch.RemoveBBox();
+      if (part->ReleaseNonLeaderBoxes()) {
+        InsertBBox(true, true, part);
+        gsearch.RepositionIterator();
+      } else {
+        delete part;
+      }
+    }
+  }
+}
+
+// Finds and marks text partitions that represent figure captions.
+void ColPartitionGrid::FindFigureCaptions() {
+  // For each image region find its best candidate text caption region,
+  // if any and mark it as such.
+  ColPartitionGridSearch gsearch(this);
+  gsearch.StartFullSearch();
+  ColPartition* part;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    if (part->IsImageType()) {
+      const TBOX& part_box = part->bounding_box();
+      bool debug = AlignedBlob::WithinTestRegion(2, part_box.left(),
+                                                 part_box.bottom());
+      ColPartition* best_caption = nullptr;
+      int best_dist = 0;   // Distance to best_caption.
+      int best_upper = 0;  // Direction of best_caption.
+      // Handle both lower and upper directions.
+      for (int upper = 0; upper < 2; ++upper) {
+        ColPartition_C_IT partner_it(upper ? part->upper_partners()
+                                           : part->lower_partners());
+        // If there are no image partners, then this direction is ok.
+        for (partner_it.mark_cycle_pt(); !partner_it.cycled_list();
+             partner_it.forward()) {
+          ColPartition* partner = partner_it.data();
+          if (partner->IsImageType()) {
+            break;
+          }
+        }
+        if (!partner_it.cycled_list()) continue;
+        // Find the nearest totally overlapping text partner.
+        for (partner_it.mark_cycle_pt(); !partner_it.cycled_list();
+             partner_it.forward()) {
+          ColPartition* partner = partner_it.data();
+          if (!partner->IsTextType() || partner->type() == PT_TABLE) continue;
+          const TBOX& partner_box = partner->bounding_box();
+          if (debug) {
+            tprintf("Finding figure captions for image part:");
+            part_box.print();
+            tprintf("Considering partner:");
+            partner_box.print();
+          }
+          if (partner_box.left() >= part_box.left() &&
+              partner_box.right() <= part_box.right()) {
+            int dist = partner_box.y_gap(part_box);
+            if (best_caption == nullptr || dist < best_dist) {
+              best_dist = dist;
+              best_caption = partner;
+              best_upper = upper;
+            }
+          }
+        }
+      }
+      if (best_caption != nullptr) {
+        if (debug) {
+          tprintf("Best caption candidate:");
+          best_caption->bounding_box().print();
+        }
+        // We have a candidate caption. Qualify it as being separable from
+        // any body text. We are looking for either a small number of lines
+        // or a big gap that indicates a separation from the body text.
+        int line_count = 0;
+        int biggest_gap = 0;
+        int smallest_gap = INT16_MAX;
+        int total_height = 0;
+        int mean_height = 0;
+        ColPartition* end_partner = nullptr;
+        ColPartition* next_partner = nullptr;
+        for (ColPartition* partner = best_caption; partner != nullptr &&
+             line_count <= kMaxCaptionLines;
+             partner = next_partner) {
+          if (!partner->IsTextType()) {
+            end_partner = partner;
+            break;
+          }
+          ++line_count;
+          total_height += partner->bounding_box().height();
+          next_partner = partner->SingletonPartner(best_upper);
+          if (next_partner != nullptr) {
+            int gap = partner->bounding_box().y_gap(
+                next_partner->bounding_box());
+            if (gap > biggest_gap) {
+              biggest_gap = gap;
+              end_partner = next_partner;
+              mean_height = total_height / line_count;
+            } else if (gap < smallest_gap) {
+              smallest_gap = gap;
+            }
+            // If the gap looks big compared to the text size and the smallest
+            // gap seen so far, then we can stop.
+            if (biggest_gap > mean_height * kMinCaptionGapHeightRatio &&
+                biggest_gap > smallest_gap * kMinCaptionGapRatio)
+              break;
+          }
+        }
+        if (debug) {
+          tprintf("Line count=%d, biggest gap %d, smallest%d, mean height %d\n",
+                  line_count, biggest_gap, smallest_gap, mean_height);
+          if (end_partner != nullptr) {
+            tprintf("End partner:");
+            end_partner->bounding_box().print();
+          }
+        }
+        if (next_partner == nullptr && line_count <= kMaxCaptionLines)
+          end_partner = nullptr;  // No gap, but line count is small.
+        if (line_count <= kMaxCaptionLines) {
+          // This is a qualified caption. Mark the text as caption.
+          for (ColPartition* partner = best_caption; partner != nullptr &&
+               partner != end_partner;
+               partner = next_partner) {
+            partner->set_type(PT_CAPTION_TEXT);
+            partner->SetBlobTypes();
+            if (debug) {
+              tprintf("Set caption type for partition:");
+              partner->bounding_box().print();
+            }
+            next_partner = partner->SingletonPartner(best_upper);
+          }
+        }
+      }
+    }
+  }
+}
+
+//////// Functions that manipulate ColPartitions in the part_grid_ /////
+//////// to find chains of partner partitions of the same type.  ///////
+
+// For every ColPartition in the grid, finds its upper and lower neighbours.
+void ColPartitionGrid::FindPartitionPartners() {
+  ColPartitionGridSearch gsearch(this);
+  gsearch.StartFullSearch();
+  ColPartition* part;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    if (part->IsVerticalType()) {
+      FindVPartitionPartners(true, part);
+      FindVPartitionPartners(false, part);
+    } else {
+      FindPartitionPartners(true, part);
+      FindPartitionPartners(false, part);
+    }
+  }
+}
+
+// Finds the best partner in the given direction for the given partition.
+// Stores the result with AddPartner.
+void ColPartitionGrid::FindPartitionPartners(bool upper, ColPartition* part) {
+  if (part->type() == PT_NOISE)
+    return;  // Noise is not allowed to partner anything.
+  const TBOX& box = part->bounding_box();
+  int top = part->median_top();
+  int bottom = part->median_bottom();
+  int height = top - bottom;
+  int mid_y = (bottom + top) / 2;
+  ColPartitionGridSearch vsearch(this);
+  // Search down for neighbour below
+  vsearch.StartVerticalSearch(box.left(), box.right(), part->MidY());
+  ColPartition* neighbour;
+  ColPartition* best_neighbour = nullptr;
+  int best_dist = INT32_MAX;
+  while ((neighbour = vsearch.NextVerticalSearch(!upper)) != nullptr) {
+    if (neighbour == part || neighbour->type() == PT_NOISE)
+      continue;  // Noise is not allowed to partner anything.
+    int neighbour_bottom = neighbour->median_bottom();
+    int neighbour_top = neighbour->median_top();
+    int neighbour_y = (neighbour_bottom + neighbour_top) / 2;
+    if (upper != (neighbour_y > mid_y))
+      continue;
+    if (!part->HOverlaps(*neighbour) && !part->WithinSameMargins(*neighbour))
+      continue;
+    if (!part->TypesMatch(*neighbour)) {
+      if (best_neighbour == nullptr)
+        best_neighbour = neighbour;
+      continue;
+    }
+    int dist = upper ? neighbour_bottom - top : bottom - neighbour_top;
+    if (dist <= kMaxPartitionSpacing * height) {
+      if (dist < best_dist) {
+        best_dist = dist;
+        best_neighbour = neighbour;
+      }
+    } else {
+      break;
+    }
+  }
+  if (best_neighbour != nullptr)
+    part->AddPartner(upper, best_neighbour);
+}
+
+// Finds the best partner in the given direction for the given partition.
+// Stores the result with AddPartner.
+void ColPartitionGrid::FindVPartitionPartners(bool to_the_left,
+                                              ColPartition* part) {
+  if (part->type() == PT_NOISE)
+    return;  // Noise is not allowed to partner anything.
+  const TBOX& box = part->bounding_box();
+  int left = part->median_left();
+  int right = part->median_right();
+  int width = right >= left ? right - left : -1;
+  int mid_x = (left + right) / 2;
+  ColPartitionGridSearch hsearch(this);
+  // Search left for neighbour to_the_left
+  hsearch.StartSideSearch(mid_x, box.bottom(), box.top());
+  ColPartition* neighbour;
+  ColPartition* best_neighbour = nullptr;
+  int best_dist = INT32_MAX;
+  while ((neighbour = hsearch.NextSideSearch(to_the_left)) != nullptr) {
+    if (neighbour == part || neighbour->type() == PT_NOISE)
+      continue;  // Noise is not allowed to partner anything.
+    int neighbour_left = neighbour->median_left();
+    int neighbour_right = neighbour->median_right();
+    int neighbour_x = (neighbour_left + neighbour_right) / 2;
+    if (to_the_left != (neighbour_x < mid_x))
+      continue;
+    if (!part->VOverlaps(*neighbour))
+      continue;
+    if (!part->TypesMatch(*neighbour))
+      continue;  // Only match to other vertical text.
+    int dist = to_the_left ? left - neighbour_right : neighbour_left - right;
+    if (dist <= kMaxPartitionSpacing * width) {
+      if (dist < best_dist || best_neighbour == nullptr) {
+        best_dist = dist;
+        best_neighbour = neighbour;
+      }
+    } else {
+      break;
+    }
+  }
+  // For vertical partitions, the upper partner is to the left, and lower is
+  // to the right.
+  if (best_neighbour != nullptr)
+    part->AddPartner(to_the_left, best_neighbour);
+}
+
+// For every ColPartition with multiple partners in the grid, reduces the
+// number of partners to 0 or 1. If get_desperate is true, goes to more
+// desperate merge methods to merge flowing text before breaking partnerships.
+void ColPartitionGrid::RefinePartitionPartners(bool get_desperate) {
+  ColPartitionGridSearch gsearch(this);
+  // Refine in type order so that chasing multiple partners can be done
+  // before eliminating type mis-matching partners.
+  for (int type = PT_UNKNOWN + 1; type <= PT_COUNT; type++) {
+    // Iterate the ColPartitions in the grid.
+    gsearch.StartFullSearch();
+    ColPartition* part;
+    while ((part = gsearch.NextFullSearch()) != nullptr) {
+      part->RefinePartners(static_cast<PolyBlockType>(type),
+                           get_desperate, this);
+      // Iterator may have been messed up by a merge.
+      gsearch.RepositionIterator();
+    }
+  }
+}
+
+
+// ========================== PRIVATE CODE ========================
+
+// Finds and returns a list of candidate ColPartitions to merge with part.
+// The candidates must overlap search_box, and when merged must not
+// overlap any other partitions that are not overlapped by each individually.
+void ColPartitionGrid::FindMergeCandidates(const ColPartition* part,
+                                           const TBOX& search_box, bool debug,
+                                           ColPartition_CLIST* candidates) {
+  int ok_overlap =
+      static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5);
+  const TBOX& part_box = part->bounding_box();
+  // Now run the rect search.
+  ColPartitionGridSearch rsearch(this);
+  rsearch.SetUniqueMode(true);
+  rsearch.StartRectSearch(search_box);
+  ColPartition* candidate;
+  while ((candidate = rsearch.NextRectSearch()) != nullptr) {
+    if (!OKMergeCandidate(part, candidate, debug))
+      continue;
+    const TBOX& c_box = candidate->bounding_box();
+    // Candidate seems to be a potential merge with part. If one contains
+    // the other, then the merge is a no-brainer. Otherwise, search the
+    // combined box to see if anything else is inappropriately overlapped.
+    if (!part_box.contains(c_box) && !c_box.contains(part_box)) {
+      // Search the combined rectangle to see if anything new is overlapped.
+      // This is a preliminary test designed to quickly weed-out poor
+      // merge candidates that would create a big list of overlapped objects
+      // for the squared-order overlap analysis. Eg. vertical and horizontal
+      // line-like objects that overlap real text when merged:
+      // || ==========================
+      // ||
+      // ||  r e a l  t e x t
+      // ||
+      // ||
+      TBOX merged_box(part_box);
+      merged_box += c_box;
+      ColPartitionGridSearch msearch(this);
+      msearch.SetUniqueMode(true);
+      msearch.StartRectSearch(merged_box);
+      ColPartition* neighbour;
+      while ((neighbour = msearch.NextRectSearch()) != nullptr) {
+        if (neighbour == part || neighbour == candidate)
+          continue;  // Ignore itself.
+        if (neighbour->OKMergeOverlap(*part, *candidate, ok_overlap, false))
+          continue;  // This kind of merge overlap is OK.
+        TBOX n_box = neighbour->bounding_box();
+        // The overlap is OK if:
+        // * the n_box already overlapped the part or the candidate OR
+        // * the n_box is a suitable merge with either part or candidate
+        if (!n_box.overlap(part_box) && !n_box.overlap(c_box) &&
+            !OKMergeCandidate(part, neighbour, false) &&
+            !OKMergeCandidate(candidate, neighbour, false))
+          break;
+      }
+      if (neighbour != nullptr) {
+        if (debug) {
+          tprintf("Combined box overlaps another that is not OK despite"
+                  " allowance of %d:", ok_overlap);
+          neighbour->bounding_box().print();
+          tprintf("Reason:");
+          OKMergeCandidate(part, neighbour, true);
+          tprintf("...and:");
+          OKMergeCandidate(candidate, neighbour, true);
+          tprintf("Overlap:");
+          neighbour->OKMergeOverlap(*part, *candidate, ok_overlap, true);
+        }
+        continue;
+      }
+    }
+    if (debug) {
+      tprintf("Adding candidate:");
+      candidate->bounding_box().print();
+    }
+    // Unique elements as they arrive.
+    candidates->add_sorted(SortByBoxLeft<ColPartition>, true, candidate);
+  }
+}
+
+// Smoothes the region type/flow type of the given part by looking at local
+// neighbours and the given image mask. Searches a padded rectangle with the
+// padding truncated on one size of the part's box in turn for each side,
+// using the result (if any) that has the least distance to all neighbours
+// that contribute to the decision. This biases in favor of rectangular
+// regions without completely enforcing them.
+// If a good decision cannot be reached, the part is left unchanged.
+// im_box and rerotation are used to map blob coordinates onto the
+// nontext_map, which is used to prevent the spread of text neighbourhoods
+// into images.
+// Returns true if the partition was changed.
+bool ColPartitionGrid::SmoothRegionType(Pix* nontext_map,
+                                        const TBOX& im_box,
+                                        const FCOORD& rerotation,
+                                        bool debug,
+                                        ColPartition* part) {
+  const TBOX& part_box = part->bounding_box();
+  if (debug) {
+    tprintf("Smooothing part at:");
+    part_box.print();
+  }
+  BlobRegionType best_type = BRT_UNKNOWN;
+  int best_dist = INT32_MAX;
+  int max_dist = std::min(part_box.width(), part_box.height());
+  max_dist = std::max(max_dist * kMaxNeighbourDistFactor, gridsize() * 2);
+  // Search with the pad truncated on each side of the box in turn.
+  bool any_image = false;
+  bool all_image = true;
+  for (int d = 0; d < BND_COUNT; ++d) {
+    int dist;
+    auto dir = static_cast<BlobNeighbourDir>(d);
+    BlobRegionType type = SmoothInOneDirection(dir, nontext_map, im_box,
+                                               rerotation, debug, *part,
+                                               &dist);
+    if (debug) {
+      tprintf("Result in dir %d = %d at dist %d\n", dir, type, dist);
+    }
+    if (type != BRT_UNKNOWN && dist < best_dist) {
+      best_dist = dist;
+      best_type = type;
+    }
+    if (type == BRT_POLYIMAGE)
+      any_image = true;
+    else
+      all_image = false;
+  }
+  if (best_dist > max_dist)
+    return false;  // Too far away to set the type with it.
+  if (part->flow() == BTFT_STRONG_CHAIN && !all_image) {
+      return false;  // We are not modifying it.
+  }
+  BlobRegionType new_type = part->blob_type();
+  BlobTextFlowType new_flow = part->flow();
+  if (best_type == BRT_TEXT && !any_image) {
+    new_flow = BTFT_STRONG_CHAIN;
+    new_type = BRT_TEXT;
+  } else if (best_type == BRT_VERT_TEXT && !any_image) {
+    new_flow = BTFT_STRONG_CHAIN;
+    new_type = BRT_VERT_TEXT;
+  } else if (best_type == BRT_POLYIMAGE) {
+    new_flow = BTFT_NONTEXT;
+    new_type = BRT_UNKNOWN;
+  }
+  if (new_type != part->blob_type() || new_flow != part->flow()) {
+    part->set_flow(new_flow);
+    part->set_blob_type(new_type);
+    part->SetBlobTypes();
+    if (debug) {
+      tprintf("Modified part:");
+      part->Print();
+    }
+    return true;
+  } else {
+    return false;
+  }
+}
+
+// Sets up a search box based on the part_box, padded in all directions
+// except direction. Also setup dist_scaling to weight x,y distances according
+// to the given direction.
+static void ComputeSearchBoxAndScaling(BlobNeighbourDir direction,
+                                       const TBOX& part_box,
+                                       int min_padding,
+                                       TBOX* search_box,
+                                       ICOORD* dist_scaling) {
+  *search_box = part_box;
+  // Generate a pad value based on the min dimension of part_box, but at least
+  // min_padding and then scaled by kMaxPadFactor.
+  int padding = std::min(part_box.height(), part_box.width());
+  padding = std::max(padding, min_padding);
+  padding *= kMaxPadFactor;
+  search_box->pad(padding, padding);
+  // Truncate the box in the appropriate direction and make the distance
+  // metric slightly biased in the truncated direction.
+  switch (direction) {
+    case BND_LEFT:
+      search_box->set_left(part_box.left());
+      *dist_scaling = ICOORD(2, 1);
+      break;
+    case BND_BELOW:
+      search_box->set_bottom(part_box.bottom());
+      *dist_scaling = ICOORD(1, 2);
+      break;
+    case BND_RIGHT:
+      search_box->set_right(part_box.right());
+      *dist_scaling = ICOORD(2, 1);
+      break;
+    case BND_ABOVE:
+      search_box->set_top(part_box.top());
+      *dist_scaling = ICOORD(1, 2);
+      break;
+    default:
+      ASSERT_HOST(false);
+  }
+}
+
+// Local enum used by SmoothInOneDirection and AccumulatePartDistances
+// for the different types of partition neighbour.
+enum NeighbourPartitionType {
+  NPT_HTEXT,       // Definite horizontal text.
+  NPT_VTEXT,       // Definite vertical text.
+  NPT_WEAK_HTEXT,  // Weakly horizontal text. Counts as HTEXT for HTEXT, but
+                   // image for image and VTEXT.
+  NPT_WEAK_VTEXT,  // Weakly vertical text. Counts as VTEXT for VTEXT, but
+                   // image for image and HTEXT.
+  NPT_IMAGE,       // Defininte non-text.
+  NPT_COUNT        // Number of array elements.
+};
+
+// Executes the search for SmoothRegionType in a single direction.
+// Creates a bounding box that is padded in all directions except direction,
+// and searches it for other partitions. Finds the nearest collection of
+// partitions that makes a decisive result (if any) and returns the type
+// and the distance of the collection. If there are any pixels in the
+// nontext_map, then the decision is biased towards image.
+BlobRegionType ColPartitionGrid::SmoothInOneDirection(
+    BlobNeighbourDir direction, Pix* nontext_map,
+    const TBOX& im_box, const FCOORD& rerotation,
+    bool debug, const ColPartition& part, int* best_distance) {
+  // Set up a rectangle search bounded by the part.
+  const TBOX& part_box = part.bounding_box();
+  TBOX search_box;
+  ICOORD dist_scaling;
+  ComputeSearchBoxAndScaling(direction, part_box, gridsize(),
+                             &search_box, &dist_scaling);
+  bool image_region = ImageFind::CountPixelsInRotatedBox(search_box, im_box,
+                                                         rerotation,
+                                                         nontext_map) > 0;
+  GenericVector<int> dists[NPT_COUNT];
+  AccumulatePartDistances(part, dist_scaling, search_box,
+                          nontext_map, im_box, rerotation, debug, dists);
+  // By iteratively including the next smallest distance across the vectors,
+  // (as in a merge sort) we can use the vector indices as counts of each type
+  // and find the nearest set of objects that give us a definite decision.
+  int counts[NPT_COUNT];
+  memset(counts, 0, sizeof(counts[0]) * NPT_COUNT);
+  // If there is image in the search box, tip the balance in image's favor.
+  int image_bias = image_region ? kSmoothDecisionMargin / 2 : 0;
+  BlobRegionType text_dir = part.blob_type();
+  BlobTextFlowType flow_type = part.flow();
+  int min_dist = 0;
+  do {
+    // Find the minimum new entry across the vectors
+    min_dist = INT32_MAX;
+    for (int i = 0; i < NPT_COUNT; ++i) {
+      if (counts[i] < dists[i].size() && dists[i][counts[i]] < min_dist)
+        min_dist = dists[i][counts[i]];
+    }
+    // Step all the indices/counts forward to include min_dist.
+    for (int i = 0; i < NPT_COUNT; ++i) {
+      while (counts[i] < dists[i].size() && dists[i][counts[i]] <= min_dist)
+        ++counts[i];
+    }
+    *best_distance = min_dist;
+    if (debug) {
+      tprintf("Totals: htext=%d+%d, vtext=%d+%d, image=%d+%d, at dist=%d\n",
+              counts[NPT_HTEXT], counts[NPT_WEAK_HTEXT],
+              counts[NPT_VTEXT], counts[NPT_WEAK_VTEXT],
+              counts[NPT_IMAGE], image_bias, min_dist);
+    }
+    // See if we have a decision yet.
+    int image_count = counts[NPT_IMAGE];
+    int htext_score = counts[NPT_HTEXT] + counts[NPT_WEAK_HTEXT] -
+        (image_count + counts[NPT_WEAK_VTEXT]);
+    int vtext_score = counts[NPT_VTEXT] + counts[NPT_WEAK_VTEXT] -
+        (image_count + counts[NPT_WEAK_HTEXT]);
+    if (image_count > 0 &&
+        image_bias - htext_score >= kSmoothDecisionMargin &&
+        image_bias - vtext_score >= kSmoothDecisionMargin) {
+      *best_distance = dists[NPT_IMAGE][0];
+      if (!dists[NPT_WEAK_VTEXT].empty() &&
+          *best_distance > dists[NPT_WEAK_VTEXT][0])
+        *best_distance = dists[NPT_WEAK_VTEXT][0];
+      if (!dists[NPT_WEAK_HTEXT].empty() &&
+          *best_distance > dists[NPT_WEAK_HTEXT][0])
+        *best_distance = dists[NPT_WEAK_HTEXT][0];
+      return BRT_POLYIMAGE;
+    }
+    if ((text_dir != BRT_VERT_TEXT || flow_type != BTFT_CHAIN) &&
+        counts[NPT_HTEXT] > 0 && htext_score >= kSmoothDecisionMargin) {
+      *best_distance = dists[NPT_HTEXT][0];
+      return BRT_TEXT;
+    } else if ((text_dir != BRT_TEXT || flow_type != BTFT_CHAIN) &&
+        counts[NPT_VTEXT] > 0 && vtext_score >= kSmoothDecisionMargin) {
+      *best_distance = dists[NPT_VTEXT][0];
+      return BRT_VERT_TEXT;
+    }
+  } while (min_dist < INT32_MAX);
+  return BRT_UNKNOWN;
+}
+
+// Counts the partitions in the given search_box by appending the gap
+// distance (scaled by dist_scaling) of the part from the base_part to the
+// vector of the appropriate type for the partition. Prior to return, the
+// vectors in the dists array are sorted in increasing order.
+// The nontext_map (+im_box, rerotation) is used to make text invisible if
+// there is non-text in between.
+// dists must be an array of GenericVectors of size NPT_COUNT.
+void ColPartitionGrid::AccumulatePartDistances(const ColPartition& base_part,
+                                               const ICOORD& dist_scaling,
+                                               const TBOX& search_box,
+                                               Pix* nontext_map,
+                                               const TBOX& im_box,
+                                               const FCOORD& rerotation,
+                                               bool debug,
+                                               GenericVector<int>* dists) {
+  const TBOX& part_box = base_part.bounding_box();
+  ColPartitionGridSearch rsearch(this);
+  rsearch.SetUniqueMode(true);
+  rsearch.StartRectSearch(search_box);
+  ColPartition* neighbour;
+  // Search for compatible neighbours with a similar strokewidth, but not
+  // on the other side of a tab vector.
+  while ((neighbour = rsearch.NextRectSearch()) != nullptr) {
+    if (neighbour->IsUnMergeableType() ||
+        !base_part.ConfirmNoTabViolation(*neighbour) ||
+        neighbour == &base_part)
+      continue;
+    TBOX nbox = neighbour->bounding_box();
+    BlobRegionType n_type = neighbour->blob_type();
+    if ((n_type == BRT_TEXT || n_type == BRT_VERT_TEXT) &&
+        !ImageFind::BlankImageInBetween(part_box, nbox, im_box, rerotation,
+                                        nontext_map))
+      continue;  // Text not visible the other side of image.
+    if (BLOBNBOX::IsLineType(n_type))
+      continue;  // Don't use horizontal lines as neighbours.
+    int x_gap = std::max(part_box.x_gap(nbox), 0);
+    int y_gap = std::max(part_box.y_gap(nbox), 0);
+    int n_dist = x_gap * dist_scaling.x() + y_gap* dist_scaling.y();
+    if (debug) {
+      tprintf("Part has x-gap=%d, y=%d, dist=%d at:",
+              x_gap, y_gap, n_dist);
+      nbox.print();
+    }
+    // Truncate the number of boxes, so text doesn't get too much advantage.
+    int n_boxes = std::min(neighbour->boxes_count(), kSmoothDecisionMargin);
+    BlobTextFlowType n_flow = neighbour->flow();
+    GenericVector<int>* count_vector = nullptr;
+    if (n_flow == BTFT_STRONG_CHAIN) {
+      if (n_type == BRT_TEXT)
+        count_vector = &dists[NPT_HTEXT];
+      else
+        count_vector = &dists[NPT_VTEXT];
+      if (debug) {
+        tprintf("%s %d\n", n_type == BRT_TEXT ? "Htext" : "Vtext", n_boxes);
+      }
+    } else if ((n_type == BRT_TEXT || n_type == BRT_VERT_TEXT) &&
+               (n_flow == BTFT_CHAIN || n_flow == BTFT_NEIGHBOURS)) {
+      // Medium text counts as weak, and all else counts as image.
+      if (n_type == BRT_TEXT)
+        count_vector = &dists[NPT_WEAK_HTEXT];
+      else
+        count_vector = &dists[NPT_WEAK_VTEXT];
+      if (debug) tprintf("Weak %d\n", n_boxes);
+    } else {
+      count_vector = &dists[NPT_IMAGE];
+      if (debug) tprintf("Image %d\n", n_boxes);
+    }
+    if (count_vector != nullptr) {
+      for (int i = 0; i < n_boxes; ++i)
+        count_vector->push_back(n_dist);
+    }
+    if (debug) {
+      neighbour->Print();
+    }
+  }
+  for (int i = 0; i < NPT_COUNT; ++i)
+    dists[i].sort();
+}
+
+// Improves the margins of the part ColPartition by searching for
+// neighbours that vertically overlap significantly.
+// columns may be nullptr, and indicates the assigned column structure this
+// is applicable to part.
+void ColPartitionGrid::FindPartitionMargins(ColPartitionSet* columns,
+                                            ColPartition* part) {
+  // Set up a rectangle search x-bounded by the column and y by the part.
+  TBOX box = part->bounding_box();
+  int y = part->MidY();
+  // Initial left margin is based on the column, if there is one.
+  int left_margin = bleft().x();
+  int right_margin = tright().x();
+  if (columns != nullptr) {
+    ColPartition* column = columns->ColumnContaining(box.left(), y);
+    if (column != nullptr)
+      left_margin = column->LeftAtY(y);
+    column = columns->ColumnContaining(box.right(), y);
+    if (column != nullptr)
+      right_margin = column->RightAtY(y);
+  }
+  left_margin -= kColumnWidthFactor;
+  right_margin += kColumnWidthFactor;
+  // Search for ColPartitions that reduce the margin.
+  left_margin = FindMargin(box.left() + box.height(), true, left_margin,
+                           box.bottom(), box.top(), part);
+  part->set_left_margin(left_margin);
+  // Search for ColPartitions that reduce the margin.
+  right_margin = FindMargin(box.right() - box.height(), false, right_margin,
+                            box.bottom(), box.top(), part);
+  part->set_right_margin(right_margin);
+}
+
+// Starting at x, and going in the specified direction, up to x_limit, finds
+// the margin for the given y range by searching sideways,
+// and ignoring not_this.
+int ColPartitionGrid::FindMargin(int x, bool right_to_left, int x_limit,
+                                 int y_bottom, int y_top,
+                                 const ColPartition* not_this) {
+  int height = y_top - y_bottom;
+  // Iterate the ColPartitions in the grid.
+  ColPartitionGridSearch side_search(this);
+  side_search.SetUniqueMode(true);
+  side_search.StartSideSearch(x, y_bottom, y_top);
+  ColPartition* part;
+  while ((part = side_search.NextSideSearch(right_to_left)) != nullptr) {
+    // Ignore itself.
+    if (part == not_this)  // || part->IsLineType())
+      continue;
+    // Must overlap by enough, based on the min of the heights, so
+    // large partitions can't smash through small ones.
+    TBOX box = part->bounding_box();
+    int min_overlap = std::min(height, static_cast<int>(box.height()));
+    min_overlap = static_cast<int>(min_overlap * kMarginOverlapFraction + 0.5);
+    int y_overlap = std::min(y_top, static_cast<int>(box.top())) - std::max(y_bottom, static_cast<int>(box.bottom()));
+    if (y_overlap < min_overlap)
+      continue;
+    // Must be going the right way.
+    int x_edge = right_to_left ? box.right() : box.left();
+    if ((x_edge < x) != right_to_left)
+      continue;
+    // If we have gone past x_limit, then x_limit will do.
+    if ((x_edge < x_limit) == right_to_left)
+      break;
+    // It reduces x limit, so save the new one.
+    x_limit = x_edge;
+  }
+  return x_limit;
+}
+
+
+}  // namespace tesseract.
diff --git a/tesseract/src/textord/colpartitiongrid.h b/tesseract/src/textord/colpartitiongrid.h
new file mode 100644
index 00000000..85ab7f3d
--- /dev/null
+++ b/tesseract/src/textord/colpartitiongrid.h
@@ -0,0 +1,252 @@
+///////////////////////////////////////////////////////////////////////
+// File:        colpartitiongrid.h
+// Description: Class collecting code that acts on a BBGrid of ColPartitions.
+// Author:      Ray Smith
+//
+// (C) Copyright 2009, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TEXTORD_COLPARTITIONGRID_H_
+#define TESSERACT_TEXTORD_COLPARTITIONGRID_H_
+
+#include "bbgrid.h"
+#include "colpartition.h"
+#include "colpartitionset.h"
+
+namespace tesseract {
+
+class TabFind;
+
+// ColPartitionGrid is a BBGrid of ColPartition.
+// It collects functions that work on the grid.
+class TESS_API ColPartitionGrid : public BBGrid<ColPartition,
+                                       ColPartition_CLIST,
+                                       ColPartition_C_IT> {
+ public:
+  ColPartitionGrid() = default;
+  ColPartitionGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright);
+
+  ~ColPartitionGrid() override = default;
+
+  // Handles a click event in a display window.
+  void HandleClick(int x, int y) override;
+
+  // Merges ColPartitions in the grid that look like they belong in the same
+  // textline.
+  // For all partitions in the grid, calls the box_cb permanent callback
+  // to compute the search box, searches the box, and if a candidate is found,
+  // calls the confirm_cb to check any more rules. If the confirm_cb returns
+  // true, then the partitions are merged.
+  // Both callbacks are deleted before returning.
+  void Merges(std::function<bool(ColPartition*, TBOX*)> box_cb,
+              std::function<bool(const ColPartition*,
+                                 const ColPartition*)> confirm_cb);
+
+  // For the given partition, calls the box_cb permanent callback
+  // to compute the search box, searches the box, and if a candidate is found,
+  // calls the confirm_cb to check any more rules. If the confirm_cb returns
+  // true, then the partitions are merged.
+  // Returns true if the partition is consumed by one or more merges.
+  bool MergePart(std::function<bool(ColPartition*, TBOX*)> box_cb,
+                 std::function<bool(const ColPartition*,
+                                    const ColPartition*)> confirm_cb,
+                 ColPartition* part);
+
+  // Computes and returns the total overlap of all partitions in the grid.
+  // If overlap_grid is non-null, it is filled with a grid that holds empty
+  // partitions representing the union of all overlapped partitions.
+  int ComputeTotalOverlap(ColPartitionGrid** overlap_grid);
+
+  // Finds all the ColPartitions in the grid that overlap with the given
+  // box and returns them SortByBoxLeft(ed) and uniqued in the given list.
+  // Any partition equal to not_this (may be nullptr) is excluded.
+  void FindOverlappingPartitions(const TBOX& box, const ColPartition* not_this,
+                                 ColPartition_CLIST* parts);
+
+  // Finds and returns the best candidate ColPartition to merge with part,
+  // selected from the candidates list, based on the minimum increase in
+  // pairwise overlap among all the partitions overlapped by the combined box.
+  // If overlap_increase is not nullptr then it returns the increase in overlap
+  // that would result from the merge.
+  // See colpartitiongrid.cpp for a diagram.
+  ColPartition* BestMergeCandidate(
+      const ColPartition* part, ColPartition_CLIST* candidates, bool debug,
+      std::function<bool(const ColPartition*,
+                         const ColPartition*)> confirm_cb,
+      int* overlap_increase);
+
+  // Split partitions where it reduces overlap between their bounding boxes.
+  // ColPartitions are after all supposed to be a partitioning of the blobs
+  // AND of the space on the page!
+  // Blobs that cause overlaps get removed, put in individual partitions
+  // and added to the big_parts list. They are most likely characters on
+  // 2 textlines that touch, or something big like a dropcap.
+  void SplitOverlappingPartitions(ColPartition_LIST* big_parts);
+
+  // Filters partitions of source_type by looking at local neighbours.
+  // Where a majority of neighbours have a text type, the partitions are
+  // changed to text, where the neighbours have image type, they are changed
+  // to image, and partitions that have no definite neighbourhood type are
+  // left unchanged.
+  // im_box and rerotation are used to map blob coordinates onto the
+  // nontext_map, which is used to prevent the spread of text neighbourhoods
+  // into images.
+  // Returns true if anything was changed.
+  bool GridSmoothNeighbours(BlobTextFlowType source_type, Pix* nontext_map,
+                            const TBOX& im_box, const FCOORD& rerotation);
+
+  // Reflects the grid and its colpartitions in the y-axis, assuming that
+  // all blob boxes have already been done.
+  void ReflectInYAxis();
+
+  // Rotates the grid and its colpartitions by the given angle, assuming that
+  // all blob boxes have already been done.
+  void Deskew(const FCOORD& deskew);
+
+  // Transforms the grid of partitions to the output blocks, putting each
+  // partition into a separate block. We don't really care about the order,
+  // as we just want to get as much text as possible without trying to organize
+  // it into proper blocks or columns.
+  void ExtractPartitionsAsBlocks(BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks);
+
+  // Sets the left and right tabs of the partitions in the grid.
+  void SetTabStops(TabFind* tabgrid);
+
+  // Makes the ColPartSets and puts them in the PartSetVector ready
+  // for finding column bounds. Returns false if no partitions were found.
+  // Each ColPartition in the grid is placed in a single ColPartSet based
+  // on the bottom-left of its bounding box.
+  bool MakeColPartSets(PartSetVector* part_sets);
+
+  // Makes a single ColPartitionSet consisting of a single ColPartition that
+  // represents the total horizontal extent of the significant content on the
+  // page. Used for the single column setting in place of automatic detection.
+  // Returns nullptr if the page is empty of significant content.
+  ColPartitionSet* MakeSingleColumnSet(WidthCallback cb);
+
+  // Mark the BLOBNBOXes in each partition as being owned by that partition.
+  void ClaimBoxes();
+
+  // Retypes all the blobs referenced by the partitions in the grid.
+  // Image blobs are sliced on the grid boundaries to give the tab finder
+  // a better handle on the edges of the images, and the actual blobs are
+  // returned in the im_blobs list, as they are not owned by the block.
+  void ReTypeBlobs(BLOBNBOX_LIST* im_blobs);
+
+  // The boxes within the partitions have changed (by deskew) so recompute
+  // the bounds of all the partitions and reinsert them into the grid.
+  void RecomputeBounds(int gridsize, const ICOORD& bleft,
+                       const ICOORD& tright, const ICOORD& vertical);
+
+  // Improves the margins of the ColPartitions in the grid by calling
+  // FindPartitionMargins on each.
+  void GridFindMargins(ColPartitionSet** best_columns);
+
+  // Improves the margins of the ColPartitions in the list by calling
+  // FindPartitionMargins on each.
+  void ListFindMargins(ColPartitionSet** best_columns,
+                       ColPartition_LIST* parts);
+
+  // Deletes all the partitions in the grid after disowning all the blobs.
+  void DeleteParts();
+
+  // Deletes all the partitions in the grid that are of type BRT_UNKNOWN and
+  // all the blobs in them.
+  void DeleteUnknownParts(TO_BLOCK* block);
+
+  // Deletes all the partitions in the grid that are NOT of flow type
+  // BTFT_LEADER.
+  void DeleteNonLeaderParts();
+
+  // Finds and marks text partitions that represent figure captions.
+  void FindFigureCaptions();
+
+  //////// Functions that manipulate ColPartitions in the grid     ///////
+  //////// to find chains of partner partitions of the same type.  ///////
+  // For every ColPartition in the grid, finds its upper and lower neighbours.
+  void FindPartitionPartners();
+  // Finds the best partner in the given direction for the given partition.
+  // Stores the result with AddPartner.
+  void FindPartitionPartners(bool upper, ColPartition* part);
+  // Finds the best partner in the given direction for the given partition.
+  // Stores the result with AddPartner.
+  void FindVPartitionPartners(bool to_the_left, ColPartition* part);
+  // For every ColPartition with multiple partners in the grid, reduces the
+  // number of partners to 0 or 1. If get_desperate is true, goes to more
+  // desperate merge methods to merge flowing text before breaking partnerships.
+  void RefinePartitionPartners(bool get_desperate);
+
+ private:
+  // Finds and returns a list of candidate ColPartitions to merge with part.
+  // The candidates must overlap search_box, and when merged must not
+  // overlap any other partitions that are not overlapped by each individually.
+  void FindMergeCandidates(const ColPartition* part, const TBOX& search_box,
+                           bool debug, ColPartition_CLIST* candidates);
+
+  // Smoothes the region type/flow type of the given part by looking at local
+  // neighbours and the given image mask. Searches a padded rectangle with the
+  // padding truncated on one size of the part's box in turn for each side,
+  // using the result (if any) that has the least distance to all neighbours
+  // that contribute to the decision. This biases in favor of rectangular
+  // regions without completely enforcing them.
+  // If a good decision cannot be reached, the part is left unchanged.
+  // im_box and rerotation are used to map blob coordinates onto the
+  // nontext_map, which is used to prevent the spread of text neighbourhoods
+  // into images.
+  // Returns true if the partition was changed.
+  bool SmoothRegionType(Pix* nontext_map,
+                        const TBOX& im_box,
+                        const FCOORD& rerotation,
+                        bool debug,
+                        ColPartition* part);
+  // Executes the search for SmoothRegionType in a single direction.
+  // Creates a bounding box that is padded in all directions except direction,
+  // and searches it for other partitions. Finds the nearest collection of
+  // partitions that makes a decisive result (if any) and returns the type
+  // and the distance of the collection. If there are any pixels in the
+  // nontext_map, then the decision is biased towards image.
+  BlobRegionType SmoothInOneDirection(BlobNeighbourDir direction,
+                                      Pix* nontext_map,
+                                      const TBOX& im_box,
+                                      const FCOORD& rerotation,
+                                      bool debug,
+                                      const ColPartition& part,
+                                      int* best_distance);
+  // Counts the partitions in the given search_box by appending the gap
+  // distance (scaled by dist_scaling) of the part from the base_part to the
+  // vector of the appropriate type for the partition. Prior to return, the
+  // vectors in the dists array are sorted in increasing order.
+  // dists must be an array of GenericVectors of size NPT_COUNT.
+  void AccumulatePartDistances(const ColPartition& base_part,
+                               const ICOORD& dist_scaling,
+                               const TBOX& search_box,
+                               Pix* nontext_map,
+                               const TBOX& im_box,
+                               const FCOORD& rerotation,
+                               bool debug,
+                               GenericVector<int>* dists);
+
+  // Improves the margins of the ColPartition by searching for
+  // neighbours that vertically overlap significantly.
+  void FindPartitionMargins(ColPartitionSet* columns, ColPartition* part);
+
+  // Starting at x, and going in the specified direction, up to x_limit, finds
+  // the margin for the given y range by searching sideways,
+  // and ignoring not_this.
+  int FindMargin(int x, bool right_to_left, int x_limit,
+                 int y_bottom, int y_top, const ColPartition* not_this);
+};
+
+}  // namespace tesseract.
+
+#endif  // TESSERACT_TEXTORD_COLPARTITIONGRID_H_
diff --git a/tesseract/src/textord/colpartitionset.cpp b/tesseract/src/textord/colpartitionset.cpp
new file mode 100644
index 00000000..c53235e6
--- /dev/null
+++ b/tesseract/src/textord/colpartitionset.cpp
@@ -0,0 +1,667 @@
+///////////////////////////////////////////////////////////////////////
+// File:        colpartitionset.cpp
+// Description: Class to hold a list of ColPartitions of the page that
+//              correspond roughly to columns.
+// Author:      Ray Smith
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "colpartitionset.h"
+#include "workingpartset.h"
+#include "tablefind.h"
+
+namespace tesseract {
+
+// Minimum width of a column to be interesting as a multiple of resolution.
+const double kMinColumnWidth = 2.0 / 3;
+
+ELISTIZE(ColPartitionSet)
+
+ColPartitionSet::ColPartitionSet(ColPartition_LIST* partitions) {
+  ColPartition_IT it(&parts_);
+  it.add_list_after(partitions);
+  ComputeCoverage();
+}
+
+ColPartitionSet::ColPartitionSet(ColPartition* part) {
+  ColPartition_IT it(&parts_);
+  it.add_after_then_move(part);
+  ComputeCoverage();
+}
+
+// Returns the number of columns of good width.
+int ColPartitionSet::GoodColumnCount() const {
+  int num_good_cols = 0;
+  // This is a read-only iteration of the list.
+  ColPartition_IT it(const_cast<ColPartition_LIST*>(&parts_));
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    if (it.data()->good_width()) ++num_good_cols;
+  }
+  return num_good_cols;
+}
+
+// Return an element of the parts_ list from its index.
+ColPartition* ColPartitionSet::GetColumnByIndex(int index) {
+  ColPartition_IT it(&parts_);
+  it.mark_cycle_pt();
+  for (int i = 0; i < index && !it.cycled_list(); ++i, it.forward());
+  if (it.cycled_list())
+    return nullptr;
+  return it.data();
+}
+
+// Return the ColPartition that contains the given coords, if any, else nullptr.
+ColPartition* ColPartitionSet::ColumnContaining(int x, int y) {
+  ColPartition_IT it(&parts_);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    ColPartition* part = it.data();
+    if (part->ColumnContains(x, y))
+      return part;
+  }
+  return nullptr;
+}
+
+// Extract all the parts from the list, relinquishing ownership.
+void ColPartitionSet::RelinquishParts() {
+  ColPartition_IT it(&parts_);
+  while (!it.empty()) {
+    it.extract();
+    it.forward();
+  }
+}
+
+// Attempt to improve this by adding partitions or expanding partitions.
+void ColPartitionSet::ImproveColumnCandidate(WidthCallback cb,
+                                             PartSetVector* src_sets) {
+  int set_size = src_sets->size();
+  // Iterate over the provided column sets, as each one may have something
+  // to improve this.
+  for (int i = 0; i < set_size; ++i) {
+    ColPartitionSet* column_set = src_sets->get(i);
+    if (column_set == nullptr)
+      continue;
+    // Iterate over the parts in this and column_set, adding bigger or
+    // new parts in column_set to this.
+    ColPartition_IT part_it(&parts_);
+    ASSERT_HOST(!part_it.empty());
+    int prev_right = INT32_MIN;
+    part_it.mark_cycle_pt();
+    ColPartition_IT col_it(&column_set->parts_);
+    for (col_it.mark_cycle_pt(); !col_it.cycled_list(); col_it.forward()) {
+      ColPartition* col_part = col_it.data();
+      if (col_part->blob_type() < BRT_UNKNOWN)
+        continue;  // Ignore image partitions.
+      int col_left = col_part->left_key();
+      int col_right = col_part->right_key();
+      // Sync-up part_it (in this) so it matches the col_part in column_set.
+      ColPartition* part = part_it.data();
+      while (!part_it.at_last() && part->right_key() < col_left) {
+        prev_right = part->right_key();
+        part_it.forward();
+        part = part_it.data();
+      }
+      int part_left = part->left_key();
+      int part_right = part->right_key();
+      if (part_right < col_left || col_right < part_left) {
+        // There is no overlap so this is a new partition.
+        AddPartition(col_part->ShallowCopy(), &part_it);
+        continue;
+      }
+      // Check the edges of col_part to see if they can improve part.
+      bool part_width_ok = cb(part->KeyWidth(part_left, part_right));
+      if (col_left < part_left && col_left > prev_right) {
+        // The left edge of the column is better and it doesn't overlap,
+        // so we can potentially expand it.
+        int col_box_left = col_part->BoxLeftKey();
+        bool tab_width_ok = cb(part->KeyWidth(col_left, part_right));
+        bool box_width_ok = cb(part->KeyWidth(col_box_left, part_right));
+        if (tab_width_ok || (!part_width_ok)) {
+          // The tab is leaving the good column metric at least as good as
+          // it was before, so use the tab.
+          part->CopyLeftTab(*col_part, false);
+          part->SetColumnGoodness(cb);
+        } else if (col_box_left < part_left &&
+                   (box_width_ok || !part_width_ok)) {
+          // The box is leaving the good column metric at least as good as
+          // it was before, so use the box.
+          part->CopyLeftTab(*col_part, true);
+          part->SetColumnGoodness(cb);
+        }
+        part_left = part->left_key();
+      }
+      if (col_right > part_right &&
+          (part_it.at_last() ||
+           part_it.data_relative(1)->left_key() > col_right)) {
+        // The right edge is better, so we can possibly expand it.
+        int col_box_right = col_part->BoxRightKey();
+        bool tab_width_ok = cb(part->KeyWidth(part_left, col_right));
+        bool box_width_ok = cb(part->KeyWidth(part_left, col_box_right));
+        if (tab_width_ok || (!part_width_ok)) {
+          // The tab is leaving the good column metric at least as good as
+          // it was before, so use the tab.
+          part->CopyRightTab(*col_part, false);
+          part->SetColumnGoodness(cb);
+        } else if (col_box_right > part_right &&
+                   (box_width_ok || !part_width_ok)) {
+          // The box is leaving the good column metric at least as good as
+          // it was before, so use the box.
+          part->CopyRightTab(*col_part, true);
+          part->SetColumnGoodness(cb);
+        }
+      }
+    }
+  }
+  ComputeCoverage();
+}
+
+// If this set is good enough to represent a new partitioning into columns,
+// add it to the vector of sets, otherwise delete it.
+void ColPartitionSet::AddToColumnSetsIfUnique(PartSetVector* column_sets,
+                                              WidthCallback cb) {
+  bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(),
+                                         bounding_box_.bottom());
+  if (debug) {
+    tprintf("Considering new column candidate:\n");
+    Print();
+  }
+  if (!LegalColumnCandidate()) {
+    if (debug) {
+      tprintf("Not a legal column candidate:\n");
+      Print();
+    }
+    delete this;
+    return;
+  }
+  for (int i = 0; i < column_sets->size(); ++i) {
+    ColPartitionSet* columns = column_sets->get(i);
+    // In ordering the column set candidates, good_coverage_ is king,
+    // followed by good_column_count_ and then bad_coverage_.
+    bool better = good_coverage_ > columns->good_coverage_;
+    if (good_coverage_ == columns->good_coverage_) {
+      better = good_column_count_ > columns->good_column_count_;
+      if (good_column_count_ == columns->good_column_count_) {
+          better = bad_coverage_ > columns->bad_coverage_;
+      }
+    }
+    if (better) {
+      // The new one is better so add it.
+      if (debug)
+        tprintf("Good one\n");
+      column_sets->insert(this, i);
+      return;
+    }
+    if (columns->CompatibleColumns(false, this, cb)) {
+      if (debug)
+        tprintf("Duplicate\n");
+      delete this;
+      return;  // It is not unique.
+    }
+  }
+  if (debug)
+    tprintf("Added to end\n");
+  column_sets->push_back(this);
+}
+
+// Return true if the partitions in other are all compatible with the columns
+// in this.
+bool ColPartitionSet::CompatibleColumns(bool debug, ColPartitionSet* other,
+                                        WidthCallback cb) {
+  if (debug) {
+    tprintf("CompatibleColumns testing compatibility\n");
+    Print();
+    other->Print();
+  }
+  if (other->parts_.empty()) {
+    if (debug)
+      tprintf("CompatibleColumns true due to empty other\n");
+    return true;
+  }
+  ColPartition_IT it(&other->parts_);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    ColPartition* part = it.data();
+    if (part->blob_type() < BRT_UNKNOWN) {
+      if (debug) {
+        tprintf("CompatibleColumns ignoring image partition\n");
+        part->Print();
+      }
+      continue;  // Image partitions are irrelevant to column compatibility.
+    }
+    int y = part->MidY();
+    int left = part->bounding_box().left();
+    int right = part->bounding_box().right();
+    ColPartition* left_col = ColumnContaining(left, y);
+    ColPartition* right_col = ColumnContaining(right, y);
+    if (right_col == nullptr || left_col == nullptr) {
+      if (debug) {
+        tprintf("CompatibleColumns false due to partition edge outside\n");
+        part->Print();
+      }
+      return false;  // A partition edge lies outside of all columns
+    }
+    if (right_col != left_col && cb(right - left)) {
+      if (debug) {
+        tprintf("CompatibleColumns false due to good width in multiple cols\n");
+        part->Print();
+      }
+      return false;  // Partition with a good width must be in a single column.
+    }
+
+    ColPartition_IT it2= it;
+    while (!it2.at_last()) {
+      it2.forward();
+      ColPartition* next_part = it2.data();
+      if (!BLOBNBOX::IsTextType(next_part->blob_type()))
+        continue;  // Non-text partitions are irrelevant.
+      int next_left = next_part->bounding_box().left();
+      if (next_left == right) {
+        break;  // They share the same edge, so one must be a pull-out.
+      }
+      // Search to see if right and next_left fall within a single column.
+      ColPartition* next_left_col = ColumnContaining(next_left, y);
+      if (right_col == next_left_col) {
+        // There is a column break in this column.
+        // This can be due to a figure caption within a column, a pull-out
+        // block, or a simple broken textline that remains to be merged:
+        // all allowed, or a change in column layout: not allowed.
+        // If both partitions are of good width, then it is likely
+        // a change in column layout, otherwise probably an allowed situation.
+        if (part->good_width() && next_part->good_width()) {
+          if (debug) {
+            int next_right = next_part->bounding_box().right();
+            tprintf("CompatibleColumns false due to 2 parts of good width\n");
+            tprintf("part1 %d-%d, part2 %d-%d\n",
+                    left, right, next_left, next_right);
+            right_col->Print();
+          }
+          return false;
+        }
+      }
+      break;
+    }
+  }
+  if (debug)
+    tprintf("CompatibleColumns true!\n");
+  return true;
+}
+
+// Returns the total width of all blobs in the part_set that do not lie
+// within an approved column. Used as a cost measure for using this
+// column set over another that might be compatible.
+int ColPartitionSet::UnmatchedWidth(ColPartitionSet* part_set) {
+  int total_width = 0;
+  ColPartition_IT it(&part_set->parts_);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    ColPartition* part = it.data();
+    if (!BLOBNBOX::IsTextType(part->blob_type())) {
+      continue;  // Non-text partitions are irrelevant to column compatibility.
+    }
+    int y = part->MidY();
+    BLOBNBOX_C_IT box_it(part->boxes());
+    for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) {
+      const TBOX& box = it.data()->bounding_box();
+      // Assume that the whole blob is outside any column iff its x-middle
+      // is outside.
+      int x = (box.left() + box.right()) / 2;
+      ColPartition* col = ColumnContaining(x, y);
+      if (col == nullptr)
+        total_width += box.width();
+    }
+  }
+  return total_width;
+}
+
+// Return true if this ColPartitionSet makes a legal column candidate by
+// having legal individual partitions and non-overlapping adjacent pairs.
+bool ColPartitionSet::LegalColumnCandidate() {
+  ColPartition_IT it(&parts_);
+  if (it.empty())
+    return false;
+  bool any_text_parts = false;
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    ColPartition* part = it.data();
+    if (BLOBNBOX::IsTextType(part->blob_type())) {
+      if (!part->IsLegal())
+        return false;  // Individual partition is illegal.
+      any_text_parts = true;
+    }
+    if (!it.at_last()) {
+      ColPartition* next_part = it.data_relative(1);
+      if (next_part->left_key() < part->right_key()) {
+        return false;
+      }
+    }
+  }
+  return any_text_parts;
+}
+
+// Return a copy of this. If good_only will only copy the Good ColPartitions.
+ColPartitionSet* ColPartitionSet::Copy(bool good_only) {
+  ColPartition_LIST copy_parts;
+  ColPartition_IT src_it(&parts_);
+  ColPartition_IT dest_it(&copy_parts);
+  for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
+    ColPartition* part = src_it.data();
+    if (BLOBNBOX::IsTextType(part->blob_type()) &&
+        (!good_only || part->good_width() || part->good_column()))
+      dest_it.add_after_then_move(part->ShallowCopy());
+  }
+  if (dest_it.empty())
+    return nullptr;
+  return new ColPartitionSet(&copy_parts);
+}
+
+// Return the bounding boxes of columns at the given y-range
+void ColPartitionSet::GetColumnBoxes(int y_bottom, int y_top,
+                                     ColSegment_LIST *segments) {
+  ColPartition_IT it(&parts_);
+  ColSegment_IT col_it(segments);
+  col_it.move_to_last();
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    ColPartition* part = it.data();
+    ICOORD bot_left(part->LeftAtY(y_top), y_bottom);
+    ICOORD top_right(part->RightAtY(y_bottom), y_top);
+    auto *col_seg = new ColSegment();
+    col_seg->InsertBox(TBOX(bot_left, top_right));
+    col_it.add_after_then_move(col_seg);
+  }
+}
+
+#ifndef GRAPHICS_DISABLED
+
+// Display the edges of the columns at the given y coords.
+void ColPartitionSet::DisplayColumnEdges(int y_bottom, int y_top,
+                                         ScrollView* win) {
+  ColPartition_IT it(&parts_);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    ColPartition* part = it.data();
+    win->Line(part->LeftAtY(y_top), y_top, part->LeftAtY(y_bottom), y_bottom);
+    win->Line(part->RightAtY(y_top), y_top, part->RightAtY(y_bottom), y_bottom);
+  }
+}
+
+#endif // !GRAPHICS_DISABLED
+
+// Return the ColumnSpanningType that best explains the columns overlapped
+// by the given coords(left,right,y), with the given margins.
+// Also return the first and last column index touched by the coords and
+// the leftmost spanned column.
+// Column indices are 2n + 1 for real columns (0 based) and even values
+// represent the gaps in between columns, with 0 being left of the leftmost.
+// resolution refers to the ppi resolution of the image.
+ColumnSpanningType ColPartitionSet::SpanningType(int resolution,
+                                                 int left, int right,
+                                                 int height, int y,
+                                                 int left_margin,
+                                                 int right_margin,
+                                                 int* first_col,
+                                                 int* last_col,
+                                                 int* first_spanned_col) {
+  *first_col = -1;
+  *last_col = -1;
+  *first_spanned_col = -1;
+  int margin_columns = 0;
+  ColPartition_IT it(&parts_);
+  int col_index = 1;
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), col_index += 2) {
+    ColPartition* part = it.data();
+    if (part->ColumnContains(left, y) ||
+        (it.at_first() && part->ColumnContains(left + height, y))) {
+      // In the default case, first_col is set, but columns_spanned remains
+      // zero, so first_col will get reset in the first column genuinely
+      // spanned, but we can tell the difference from a noise partition
+      // that touches no column.
+      *first_col = col_index;
+      if (part->ColumnContains(right, y) ||
+          (it.at_last() && part->ColumnContains(right - height, y))) {
+        // Both within a single column.
+        *last_col = col_index;
+        return CST_FLOWING;
+      }
+      if (left_margin <= part->LeftAtY(y)) {
+        // It completely spans this column.
+        *first_spanned_col = col_index;
+        margin_columns = 1;
+      }
+    } else if (part->ColumnContains(right, y) ||
+               (it.at_last() && part->ColumnContains(right - height, y))) {
+      if (*first_col < 0) {
+        // It started in-between.
+        *first_col = col_index - 1;
+      }
+      if (right_margin >= part->RightAtY(y)) {
+        // It completely spans this column.
+        if (margin_columns == 0)
+          *first_spanned_col = col_index;
+        ++margin_columns;
+      }
+      *last_col = col_index;
+      break;
+    } else if (left < part->LeftAtY(y) && right > part->RightAtY(y)) {
+      // Neither left nor right are contained within, so it spans this
+      // column.
+      if (*first_col < 0) {
+        // It started in between the previous column and the current column.
+        *first_col = col_index - 1;
+      }
+      if (margin_columns == 0)
+        *first_spanned_col = col_index;
+      *last_col = col_index;
+    } else if (right < part->LeftAtY(y)) {
+      // We have gone past the end.
+      *last_col = col_index - 1;
+      if (*first_col < 0) {
+        // It must lie completely between columns =>noise.
+        *first_col = col_index - 1;
+      }
+      break;
+    }
+  }
+  if (*first_col < 0)
+    *first_col = col_index - 1;  // The last in-between.
+  if (*last_col < 0)
+    *last_col = col_index - 1;  // The last in-between.
+  ASSERT_HOST(*first_col >= 0 && *last_col >= 0);
+  ASSERT_HOST(*first_col <= *last_col);
+  if (*first_col == *last_col && right - left < kMinColumnWidth * resolution) {
+    // Neither end was in a column, and it didn't span any, so it lies
+    // entirely between columns, therefore noise.
+    return CST_NOISE;
+  } else if (margin_columns <= 1) {
+    // An exception for headings that stick outside of single-column text.
+    if (margin_columns == 1 && parts_.singleton()) {
+      return CST_HEADING;
+    }
+    // It is a pullout, as left and right were not in the same column, but
+    // it doesn't go to the edge of its start and end.
+    return CST_PULLOUT;
+  }
+  // Its margins went to the edges of first and last columns => heading.
+  return CST_HEADING;
+}
+
+// The column_set has changed. Close down all in-progress WorkingPartSets in
+// columns that do not match and start new ones for the new columns in this.
+// As ColPartitions are turned into BLOCKs, the used ones are put in
+// used_parts, as they still need to be referenced in the grid.
+void ColPartitionSet::ChangeWorkColumns(const ICOORD& bleft,
+                                        const ICOORD& tright,
+                                        int resolution,
+                                        ColPartition_LIST* used_parts,
+                                        WorkingPartSet_LIST* working_set_list) {
+  // Move the input list to a temporary location so we can delete its elements
+  // as we add them to the output working_set.
+  WorkingPartSet_LIST work_src;
+  WorkingPartSet_IT src_it(&work_src);
+  src_it.add_list_after(working_set_list);
+  src_it.move_to_first();
+  WorkingPartSet_IT dest_it(working_set_list);
+  // Completed blocks and to_blocks are accumulated and given to the first new
+  // one  whenever we keep a column, or at the end.
+  BLOCK_LIST completed_blocks;
+  TO_BLOCK_LIST to_blocks;
+  WorkingPartSet* first_new_set = nullptr;
+  WorkingPartSet* working_set = nullptr;
+  ColPartition_IT col_it(&parts_);
+  for (col_it.mark_cycle_pt(); !col_it.cycled_list(); col_it.forward()) {
+    ColPartition* column = col_it.data();
+    // Any existing column to the left of column is completed.
+    while (!src_it.empty() &&
+           ((working_set = src_it.data())->column() == nullptr ||
+            working_set->column()->right_key() <= column->left_key())) {
+      src_it.extract();
+      working_set->ExtractCompletedBlocks(bleft, tright, resolution,
+                                          used_parts, &completed_blocks,
+                                          &to_blocks);
+      delete working_set;
+      src_it.forward();
+    }
+    // Make a new between-column WorkingSet for before the current column.
+    working_set = new WorkingPartSet(nullptr);
+    dest_it.add_after_then_move(working_set);
+    if (first_new_set == nullptr)
+      first_new_set = working_set;
+    // A matching column gets to stay, and first_new_set gets all the
+    // completed_sets.
+    working_set = src_it.empty() ? nullptr : src_it.data();
+    if (working_set != nullptr &&
+        working_set->column()->MatchingColumns(*column)) {
+      working_set->set_column(column);
+      dest_it.add_after_then_move(src_it.extract());
+      src_it.forward();
+      first_new_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
+      first_new_set = nullptr;
+    } else {
+      // Just make a new working set for the current column.
+      working_set = new WorkingPartSet(column);
+      dest_it.add_after_then_move(working_set);
+    }
+  }
+  // Complete any remaining src working sets.
+  while (!src_it.empty()) {
+    working_set = src_it.extract();
+    working_set->ExtractCompletedBlocks(bleft, tright, resolution,
+                                        used_parts, &completed_blocks,
+                                        &to_blocks);
+    delete working_set;
+    src_it.forward();
+  }
+  // Make a new between-column WorkingSet for after the last column.
+  working_set = new WorkingPartSet(nullptr);
+  dest_it.add_after_then_move(working_set);
+  if (first_new_set == nullptr)
+    first_new_set = working_set;
+  // The first_new_set now gets any accumulated completed_parts/blocks.
+  first_new_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
+}
+
+// Accumulate the widths and gaps into the given variables.
+void ColPartitionSet::AccumulateColumnWidthsAndGaps(int* total_width,
+                                                    int* width_samples,
+                                                    int* total_gap,
+                                                    int* gap_samples) {
+  ColPartition_IT it(&parts_);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    ColPartition* part = it.data();
+    *total_width += part->ColumnWidth();
+    ++*width_samples;
+    if (!it.at_last()) {
+      ColPartition* next_part = it.data_relative(1);
+      int part_left = part->right_key();
+      int part_right = next_part->left_key();
+      int gap = part->KeyWidth(part_left, part_right);
+      *total_gap += gap;
+      ++*gap_samples;
+    }
+  }
+}
+
+// Provide debug output for this ColPartitionSet and all the ColPartitions.
+void ColPartitionSet::Print() {
+  ColPartition_IT it(&parts_);
+  tprintf("Partition set of %d parts, %d good, coverage=%d+%d"
+          " (%d,%d)->(%d,%d)\n",
+          it.length(), good_column_count_, good_coverage_, bad_coverage_,
+          bounding_box_.left(), bounding_box_.bottom(),
+          bounding_box_.right(), bounding_box_.top());
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    ColPartition* part = it.data();
+    part->Print();
+  }
+}
+
+// PRIVATE CODE.
+
+// Add the given partition to the list in the appropriate place.
+void ColPartitionSet::AddPartition(ColPartition* new_part,
+                                   ColPartition_IT* it) {
+  AddPartitionCoverageAndBox(*new_part);
+  int new_right = new_part->right_key();
+  if (it->data()->left_key() >= new_right)
+    it->add_before_stay_put(new_part);
+  else
+    it->add_after_stay_put(new_part);
+}
+
+// Compute the coverage and good column count. Coverage is the amount of the
+// width of the page (in pixels) that is covered by ColPartitions, which are
+// used to provide candidate column layouts.
+// Coverage is split into good and bad. Good coverage is provided by
+// ColPartitions of a frequent width (according to the callback function
+// provided by TabFinder::WidthCB, which accesses stored statistics on the
+// widths of ColPartitions) and bad coverage is provided by all other
+// ColPartitions, even if they have tab vectors at both sides. Thus:
+// |-----------------------------------------------------------------|
+// |        Double     width    heading                              |
+// |-----------------------------------------------------------------|
+// |-------------------------------| |-------------------------------|
+// |   Common width ColParition    | |  Common width ColPartition    |
+// |-------------------------------| |-------------------------------|
+// the layout with two common-width columns has better coverage than the
+// double width heading, because the coverage is "good," even though less in
+// total coverage than the heading, because the heading coverage is "bad."
+void ColPartitionSet::ComputeCoverage() {
+  // Count the number of good columns and sum their width.
+  ColPartition_IT it(&parts_);
+  good_column_count_ = 0;
+  good_coverage_ = 0;
+  bad_coverage_ = 0;
+  bounding_box_ = TBOX();
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    ColPartition* part = it.data();
+    AddPartitionCoverageAndBox(*part);
+  }
+}
+
+// Adds the coverage, column count and box for a single partition,
+// without adding it to the list. (Helper factored from ComputeCoverage.)
+void ColPartitionSet::AddPartitionCoverageAndBox(const ColPartition& part) {
+  bounding_box_ += part.bounding_box();
+  int coverage = part.ColumnWidth();
+  if (part.good_width()) {
+    good_coverage_ += coverage;
+    good_column_count_ += 2;
+  } else {
+    if (part.blob_type() < BRT_UNKNOWN)
+      coverage /= 2;
+    if (part.good_column())
+      ++good_column_count_;
+    bad_coverage_ += coverage;
+  }
+}
+
+}  // namespace tesseract.
diff --git a/tesseract/src/textord/colpartitionset.h b/tesseract/src/textord/colpartitionset.h
new file mode 100644
index 00000000..57b61b34
--- /dev/null
+++ b/tesseract/src/textord/colpartitionset.h
@@ -0,0 +1,171 @@
+///////////////////////////////////////////////////////////////////////
+// File:        colpartitionset.h
+// Description: Class to hold a list of ColPartitions of the page that
+//              correspond roughly to columns.
+// Author:      Ray Smith
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TEXTORD_COLPARTITIONSET_H_
+#define TESSERACT_TEXTORD_COLPARTITIONSET_H_
+
+#include "colpartition.h"   // For ColPartition_LIST.
+#include "genericvector.h"  // For GenericVector.
+#include "rect.h"           // For TBOX.
+#include "tabvector.h"      // For BLOBNBOX_CLIST.
+
+namespace tesseract {
+
+class WorkingPartSet_LIST;
+class ColSegment_LIST;
+class ColPartitionSet;
+using PartSetVector = GenericVector<ColPartitionSet*>;
+
+// ColPartitionSet is a class that holds a list of ColPartitions.
+// Its main use is in holding a candidate partitioning of the width of the
+// image into columns, where each member ColPartition is a single column.
+// ColPartitionSets are used in building the column layout of a page.
+class ColPartitionSet : public ELIST_LINK {
+ public:
+  ColPartitionSet() = default;
+  explicit ColPartitionSet(ColPartition_LIST* partitions);
+  explicit ColPartitionSet(ColPartition* partition);
+
+  ~ColPartitionSet() = default;
+
+  // Simple accessors.
+  const TBOX& bounding_box() const {
+    return bounding_box_;
+  }
+  bool Empty() const {
+    return parts_.empty();
+  }
+  int ColumnCount() const {
+    return parts_.length();
+  }
+
+  // Returns the number of columns of good width.
+  int GoodColumnCount() const;
+
+  // Return an element of the parts_ list from its index.
+  ColPartition* GetColumnByIndex(int index);
+
+  // Return the ColPartition that contains the given coords, if any, else nullptr.
+  ColPartition* ColumnContaining(int x, int y);
+
+  // Return the bounding boxes of columns at the given y-range
+  void GetColumnBoxes(int y_bottom, int y_top, ColSegment_LIST *segments);
+
+  // Extract all the parts from the list, relinquishing ownership.
+  void RelinquishParts();
+
+  // Attempt to improve this by adding partitions or expanding partitions.
+  void ImproveColumnCandidate(WidthCallback cb, PartSetVector* src_sets);
+
+  // If this set is good enough to represent a new partitioning into columns,
+  // add it to the vector of sets, otherwise delete it.
+  void AddToColumnSetsIfUnique(PartSetVector* column_sets, WidthCallback cb);
+
+  // Return true if the partitions in other are all compatible with the columns
+  // in this.
+  bool CompatibleColumns(bool debug, ColPartitionSet* other, WidthCallback cb);
+
+  // Returns the total width of all blobs in the part_set that do not lie
+  // within an approved column. Used as a cost measure for using this
+  // column set over another that might be compatible.
+  int UnmatchedWidth(ColPartitionSet* part_set);
+
+  // Return true if this ColPartitionSet makes a legal column candidate by
+  // having legal individual partitions and non-overlapping adjacent pairs.
+  bool LegalColumnCandidate();
+
+  // Return a copy of this. If good_only will only copy the Good ColPartitions.
+  ColPartitionSet* Copy(bool good_only);
+
+  // Display the edges of the columns at the given y coords.
+  void DisplayColumnEdges(int y_bottom, int y_top, ScrollView* win);
+
+  // Return the ColumnSpanningType that best explains the columns overlapped
+  // by the given coords(left,right,y), with the given margins.
+  // Also return the first and last column index touched by the coords and
+  // the leftmost spanned column.
+  // Column indices are 2n + 1 for real columns (0 based) and even values
+  // represent the gaps in between columns, with 0 being left of the leftmost.
+  // resolution refers to the ppi resolution of the image. It may be 0 if only
+  // the first_col and last_col are required.
+  ColumnSpanningType SpanningType(int resolution,
+                                  int left, int right, int height, int y,
+                                  int left_margin, int right_margin,
+                                  int* first_col, int* last_col,
+                                  int* first_spanned_col);
+
+  // The column_set has changed. Close down all in-progress WorkingPartSets in
+  // columns that do not match and start new ones for the new columns in this.
+  // As ColPartitions are turned into BLOCKs, the used ones are put in
+  // used_parts, as they still need to be referenced in the grid.
+  void ChangeWorkColumns(const ICOORD& bleft, const ICOORD& tright,
+                         int resolution, ColPartition_LIST* used_parts,
+                         WorkingPartSet_LIST* working_set);
+
+  // Accumulate the widths and gaps into the given variables.
+  void AccumulateColumnWidthsAndGaps(int* total_width, int* width_samples,
+                                     int* total_gap, int* gap_samples);
+
+  // Provide debug output for this ColPartitionSet and all the ColPartitions.
+  void Print();
+
+ private:
+  // Add the given partition to the list in the appropriate place.
+  void AddPartition(ColPartition* new_part, ColPartition_IT* it);
+
+  // Compute the coverage and good column count. Coverage is the amount of the
+  // width of the page (in pixels) that is covered by ColPartitions, which are
+  // used to provide candidate column layouts.
+  // Coverage is split into good and bad. Good coverage is provided by
+  // ColPartitions of a frequent width (according to the callback function
+  // provided by TabFinder::WidthCB, which accesses stored statistics on the
+  // widths of ColPartitions) and bad coverage is provided by all other
+  // ColPartitions, even if they have tab vectors at both sides. Thus:
+  // |-----------------------------------------------------------------|
+  // |        Double     width    heading                              |
+  // |-----------------------------------------------------------------|
+  // |-------------------------------| |-------------------------------|
+  // |   Common width ColParition    | |  Common width ColPartition    |
+  // |-------------------------------| |-------------------------------|
+  // the layout with two common-width columns has better coverage than the
+  // double width heading, because the coverage is "good," even though less in
+  // total coverage than the heading, because the heading coverage is "bad."
+  void ComputeCoverage();
+
+  // Adds the coverage, column count and box for a single partition,
+  // without adding it to the list. (Helper factored from ComputeCoverage.)
+  void AddPartitionCoverageAndBox(const ColPartition& part);
+
+  // The partitions in this column candidate.
+  ColPartition_LIST parts_;
+  // The number of partitions that have a frequent column width.
+  int good_column_count_;
+  // Total width of all the good ColPartitions.
+  int good_coverage_;
+  // Total width of all the bad ColPartitions.
+  int bad_coverage_;
+  // Bounding box of all partitions in the set.
+  TBOX bounding_box_;
+};
+
+ELISTIZEH(ColPartitionSet)
+
+}  // namespace tesseract.
+
+#endif  // TESSERACT_TEXTORD_COLPARTITION_H_
diff --git a/tesseract/src/textord/devanagari_processing.cpp b/tesseract/src/textord/devanagari_processing.cpp
new file mode 100644
index 00000000..2ea0d942
--- /dev/null
+++ b/tesseract/src/textord/devanagari_processing.cpp
@@ -0,0 +1,502 @@
+/**********************************************************************
+ * File:        devanagari_processing.cpp
+ * Description: Methods to process images containing devanagari symbols,
+ *              prior to classification.
+ * Author:      Shobhit Saxena
+ * Created:     Mon Nov 17 20:26:01 IST 2008
+ *
+ * (C) Copyright 2008, Google Inc.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "devanagari_processing.h"
+
+#include "debugpixa.h"
+#include "statistc.h"
+#include "tordmain.h"
+
+#include "allheaders.h"
+
+namespace tesseract {
+
+// Flags controlling the debugging information for shiro-rekha splitting
+// strategies.
+INT_VAR(devanagari_split_debuglevel, 0,
+        "Debug level for split shiro-rekha process.");
+
+BOOL_VAR(devanagari_split_debugimage, 0,
+         "Whether to create a debug image for split shiro-rekha process.");
+
+ShiroRekhaSplitter::ShiroRekhaSplitter() {
+  orig_pix_ = nullptr;
+  segmentation_block_list_ = nullptr;
+  splitted_image_ = nullptr;
+  global_xheight_ = kUnspecifiedXheight;
+  perform_close_ = false;
+  debug_image_ = nullptr;
+  pageseg_split_strategy_ = NO_SPLIT;
+  ocr_split_strategy_ = NO_SPLIT;
+}
+
+ShiroRekhaSplitter::~ShiroRekhaSplitter() {
+  Clear();
+}
+
+void ShiroRekhaSplitter::Clear() {
+  pixDestroy(&orig_pix_);
+  pixDestroy(&splitted_image_);
+  pageseg_split_strategy_ = NO_SPLIT;
+  ocr_split_strategy_ = NO_SPLIT;
+  pixDestroy(&debug_image_);
+  segmentation_block_list_ = nullptr;
+  global_xheight_ = kUnspecifiedXheight;
+  perform_close_ = false;
+}
+
+// On setting the input image, a clone of it is owned by this class.
+void ShiroRekhaSplitter::set_orig_pix(Pix* pix) {
+  if (orig_pix_) {
+    pixDestroy(&orig_pix_);
+  }
+  orig_pix_ = pixClone(pix);
+}
+
+// Top-level method to perform splitting based on current settings.
+// Returns true if a split was actually performed.
+// split_for_pageseg should be true if the splitting is being done prior to
+// page segmentation. This mode uses the flag
+// pageseg_devanagari_split_strategy to determine the splitting strategy.
+bool ShiroRekhaSplitter::Split(bool split_for_pageseg, DebugPixa* pixa_debug) {
+  SplitStrategy split_strategy = split_for_pageseg ? pageseg_split_strategy_ :
+      ocr_split_strategy_;
+  if (split_strategy == NO_SPLIT) {
+    return false;  // Nothing to do.
+  }
+  ASSERT_HOST(split_strategy == MINIMAL_SPLIT ||
+              split_strategy == MAXIMAL_SPLIT);
+  ASSERT_HOST(orig_pix_);
+  if (devanagari_split_debuglevel > 0) {
+    tprintf("Splitting shiro-rekha ...\n");
+    tprintf("Split strategy = %s\n",
+            split_strategy == MINIMAL_SPLIT ? "Minimal" : "Maximal");
+    tprintf("Initial pageseg available = %s\n",
+            segmentation_block_list_ ? "yes" : "no");
+  }
+  // Create a copy of original image to store the splitting output.
+  pixDestroy(&splitted_image_);
+  splitted_image_ = pixCopy(nullptr, orig_pix_);
+
+  // Initialize debug image if required.
+  if (devanagari_split_debugimage) {
+    pixDestroy(&debug_image_);
+    debug_image_ = pixConvertTo32(orig_pix_);
+  }
+
+  // Determine all connected components in the input image. A close operation
+  // may be required prior to this, depending on the current settings.
+  Pix* pix_for_ccs = pixClone(orig_pix_);
+  if (perform_close_ && global_xheight_ != kUnspecifiedXheight &&
+      !segmentation_block_list_) {
+    if (devanagari_split_debuglevel > 0) {
+      tprintf("Performing a global close operation..\n");
+    }
+    // A global measure is available for xheight, but no local information
+    // exists.
+    pixDestroy(&pix_for_ccs);
+    pix_for_ccs = pixCopy(nullptr, orig_pix_);
+    PerformClose(pix_for_ccs, global_xheight_);
+  }
+  Pixa* ccs;
+  Boxa* tmp_boxa = pixConnComp(pix_for_ccs, &ccs, 8);
+  boxaDestroy(&tmp_boxa);
+  pixDestroy(&pix_for_ccs);
+
+  // Iterate over all connected components. Get their bounding boxes and clip
+  // out the image regions corresponding to these boxes from the original image.
+  // Conditionally run splitting on each of them.
+  Boxa* regions_to_clear = boxaCreate(0);
+  int num_ccs = 0;
+  if (ccs != nullptr) num_ccs = pixaGetCount(ccs);
+  for (int i = 0; i < num_ccs; ++i) {
+    Box* box = ccs->boxa->box[i];
+    Pix* word_pix = pixClipRectangle(orig_pix_, box, nullptr);
+    ASSERT_HOST(word_pix);
+    int xheight = GetXheightForCC(box);
+    if (xheight == kUnspecifiedXheight && segmentation_block_list_ &&
+        devanagari_split_debugimage) {
+      pixRenderBoxArb(debug_image_, box, 1, 255, 0, 0);
+    }
+    // If some xheight measure is available, attempt to pre-eliminate small
+    // blobs from the shiro-rekha process. This is primarily to save the CCs
+    // corresponding to punctuation marks/small dots etc which are part of
+    // larger graphemes.
+    if (xheight == kUnspecifiedXheight ||
+        (box->w > xheight / 3 && box->h > xheight / 2)) {
+      SplitWordShiroRekha(split_strategy, word_pix, xheight,
+                          box->x, box->y, regions_to_clear);
+    } else if (devanagari_split_debuglevel > 0) {
+      tprintf("CC dropped from splitting: %d,%d (%d, %d)\n",
+              box->x, box->y, box->w, box->h);
+    }
+    pixDestroy(&word_pix);
+  }
+  // Actually clear the boxes now.
+  for (int i = 0; i < boxaGetCount(regions_to_clear); ++i) {
+    Box* box = boxaGetBox(regions_to_clear, i, L_CLONE);
+    pixClearInRect(splitted_image_, box);
+    boxDestroy(&box);
+  }
+  boxaDestroy(&regions_to_clear);
+  pixaDestroy(&ccs);
+  if (devanagari_split_debugimage && pixa_debug != nullptr) {
+    pixa_debug->AddPix(debug_image_,
+                       split_for_pageseg ? "pageseg_split" : "ocr_split");
+  }
+  return true;
+}
+
+// Method to perform a close operation on the input image. The xheight
+// estimate decides the size of sel used.
+void ShiroRekhaSplitter::PerformClose(Pix* pix, int xheight_estimate) {
+  pixCloseBrick(pix, pix, xheight_estimate / 8, xheight_estimate / 3);
+}
+
+// This method resolves the cc bbox to a particular row and returns the row's
+// xheight.
+int ShiroRekhaSplitter::GetXheightForCC(Box* cc_bbox) {
+  if (!segmentation_block_list_) {
+    return global_xheight_;
+  }
+  // Compute the box coordinates in Tesseract's coordinate system.
+  TBOX bbox(cc_bbox->x,
+            pixGetHeight(orig_pix_) - cc_bbox->y - cc_bbox->h - 1,
+            cc_bbox->x + cc_bbox->w,
+            pixGetHeight(orig_pix_) - cc_bbox->y - 1);
+  // Iterate over all blocks.
+  BLOCK_IT block_it(segmentation_block_list_);
+  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
+    BLOCK* block = block_it.data();
+    // Iterate over all rows in the block.
+    ROW_IT row_it(block->row_list());
+    for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+      ROW* row = row_it.data();
+      if (!row->bounding_box().major_overlap(bbox)) {
+        continue;
+      }
+      // Row could be skewed, warped, etc. Use the position of the box to
+      // determine the baseline position of the row for that x-coordinate.
+      // Create a square TBOX whose baseline's mid-point lies at this point
+      // and side is row's xheight. Take the overlap of this box with the input
+      // box and check if it is a 'major overlap'. If so, this box lies in this
+      // row. In that case, return the xheight for this row.
+      float box_middle = 0.5 * (bbox.left() + bbox.right());
+      int baseline = static_cast<int>(row->base_line(box_middle) + 0.5);
+      TBOX test_box(box_middle - row->x_height() / 2,
+                    baseline,
+                    box_middle + row->x_height() / 2,
+                    static_cast<int>(baseline + row->x_height()));
+      // Compute overlap. If it is is a major overlap, this is the right row.
+      if (bbox.major_overlap(test_box)) {
+        return row->x_height();
+      }
+    }
+  }
+  // No row found for this bbox.
+  return kUnspecifiedXheight;
+}
+
+// Returns a list of regions (boxes) which should be cleared in the original
+// image so as to perform shiro-rekha splitting. Pix is assumed to carry one
+// (or less) word only. Xheight measure could be the global estimate, the row
+// estimate, or unspecified. If unspecified, over splitting may occur, since a
+// conservative estimate of stroke width along with an associated multiplier
+// is used in its place. It is advisable to have a specified xheight when
+// splitting for classification/training.
+// A vertical projection histogram of all the on-pixels in the input pix is
+// computed. The maxima of this histogram is regarded as an approximate location
+// of the shiro-rekha. By descending on the maxima's peak on both sides,
+// stroke width of shiro-rekha is estimated.
+// A horizontal projection histogram is computed for a sub-image of the input
+// image, which extends from just below the shiro-rekha down to a certain
+// leeway. The leeway depends on the input xheight, if provided, else a
+// conservative multiplier on approximate stroke width is used (which may lead
+// to over-splitting).
+void ShiroRekhaSplitter::SplitWordShiroRekha(SplitStrategy split_strategy,
+                                             Pix* pix,
+                                             int xheight,
+                                             int word_left,
+                                             int word_top,
+                                             Boxa* regions_to_clear) {
+  if (split_strategy == NO_SPLIT) {
+    return;
+  }
+  int width = pixGetWidth(pix);
+  int height = pixGetHeight(pix);
+  // Statistically determine the yextents of the shiro-rekha.
+  int shirorekha_top, shirorekha_bottom, shirorekha_ylevel;
+  GetShiroRekhaYExtents(pix, &shirorekha_top, &shirorekha_bottom,
+                        &shirorekha_ylevel);
+  // Since the shiro rekha is also a stroke, its width is equal to the stroke
+  // width.
+  int stroke_width = shirorekha_bottom - shirorekha_top + 1;
+
+  // Some safeguards to protect CCs we do not want to be split.
+  // These are particularly useful when the word wasn't eliminated earlier
+  // because xheight information was unavailable.
+  if (shirorekha_ylevel > height / 2) {
+    // Shirorekha shouldn't be in the bottom half of the word.
+    if (devanagari_split_debuglevel > 0) {
+      tprintf("Skipping splitting CC at (%d, %d): shirorekha in lower half..\n",
+              word_left, word_top);
+    }
+    return;
+  }
+  if (stroke_width > height / 3) {
+    // Even the boldest of fonts shouldn't do this.
+    if (devanagari_split_debuglevel > 0) {
+      tprintf("Skipping splitting CC at (%d, %d): stroke width too huge..\n",
+              word_left, word_top);
+    }
+    return;
+  }
+
+  // Clear the ascender and descender regions of the word.
+  // Obtain a vertical projection histogram for the resulting image.
+  Box* box_to_clear = boxCreate(0, shirorekha_top - stroke_width / 3,
+                                width, 5 * stroke_width / 3);
+  Pix* word_in_xheight = pixCopy(nullptr, pix);
+  pixClearInRect(word_in_xheight, box_to_clear);
+  // Also clear any pixels which are below shirorekha_bottom + some leeway.
+  // The leeway is set to xheight if the information is available, else it is a
+  // multiplier applied to the stroke width.
+  int leeway_to_keep = stroke_width * 3;
+  if (xheight != kUnspecifiedXheight) {
+    // This is because the xheight-region typically includes the shiro-rekha
+    // inside it, i.e., the top of the xheight range corresponds to the top of
+    // shiro-rekha.
+    leeway_to_keep = xheight - stroke_width;
+  }
+  box_to_clear->y = shirorekha_bottom + leeway_to_keep;
+  box_to_clear->h = height - box_to_clear->y;
+  pixClearInRect(word_in_xheight, box_to_clear);
+  boxDestroy(&box_to_clear);
+
+  PixelHistogram vert_hist;
+  vert_hist.ConstructVerticalCountHist(word_in_xheight);
+  pixDestroy(&word_in_xheight);
+
+  // If the number of black pixel in any column of the image is less than a
+  // fraction of the stroke width, treat it as noise / a stray mark. Perform
+  // these changes inside the vert_hist data itself, as that is used later on as
+  // a bit vector for the final split decision at every column.
+  for (int i = 0; i < width; ++i) {
+    if (vert_hist.hist()[i] <= stroke_width / 4)
+      vert_hist.hist()[i] = 0;
+    else
+      vert_hist.hist()[i] = 1;
+  }
+  // In order to split the line at any point, we make sure that the width of the
+  // gap is at least half the stroke width.
+  int i = 0;
+  int cur_component_width = 0;
+  while (i < width) {
+    if (!vert_hist.hist()[i]) {
+      int j = 0;
+      while (i + j < width && !vert_hist.hist()[i+j])
+        ++j;
+      if (j >= stroke_width / 2 && cur_component_width >= stroke_width / 2) {
+        // Perform a shiro-rekha split. The intervening region lies from i to
+        // i+j-1.
+        // A minimal single-pixel split makes the estimation of intra- and
+        // inter-word spacing easier during page layout analysis,
+        // whereas a maximal split may be needed for OCR, depending on
+        // how the engine was trained.
+        bool minimal_split = (split_strategy == MINIMAL_SPLIT);
+        int split_width = minimal_split ? 1 : j;
+        int split_left = minimal_split ? i + (j / 2) - (split_width / 2) : i;
+        if (!minimal_split || (i != 0 && i + j != width)) {
+          Box* box_to_clear =
+              boxCreate(word_left + split_left,
+                        word_top + shirorekha_top - stroke_width / 3,
+                        split_width,
+                        5 * stroke_width / 3);
+          if (box_to_clear) {
+            boxaAddBox(regions_to_clear, box_to_clear, L_CLONE);
+            // Mark this in the debug image if needed.
+            if (devanagari_split_debugimage) {
+              pixRenderBoxArb(debug_image_, box_to_clear, 1, 128, 255, 128);
+            }
+            boxDestroy(&box_to_clear);
+            cur_component_width = 0;
+          }
+        }
+      }
+      i += j;
+    } else {
+      ++i;
+      ++cur_component_width;
+    }
+  }
+}
+
+// Refreshes the words in the segmentation block list by using blobs in the
+// input block list.
+// The segmentation block list must be set.
+void ShiroRekhaSplitter::RefreshSegmentationWithNewBlobs(
+    C_BLOB_LIST* new_blobs) {
+  // The segmentation block list must have been specified.
+  ASSERT_HOST(segmentation_block_list_);
+  if (devanagari_split_debuglevel > 0) {
+    tprintf("Before refreshing blobs:\n");
+    PrintSegmentationStats(segmentation_block_list_);
+    tprintf("New Blobs found: %d\n", new_blobs->length());
+  }
+
+  C_BLOB_LIST not_found_blobs;
+  RefreshWordBlobsFromNewBlobs(segmentation_block_list_,
+                               new_blobs,
+                               ((devanagari_split_debugimage && debug_image_) ?
+                                &not_found_blobs : nullptr));
+
+  if (devanagari_split_debuglevel > 0) {
+    tprintf("After refreshing blobs:\n");
+    PrintSegmentationStats(segmentation_block_list_);
+  }
+  if (devanagari_split_debugimage && debug_image_) {
+    // Plot out the original blobs for which no match was found in the new
+    // all_blobs list.
+    C_BLOB_IT not_found_it(&not_found_blobs);
+    for (not_found_it.mark_cycle_pt(); !not_found_it.cycled_list();
+         not_found_it.forward()) {
+      C_BLOB* not_found = not_found_it.data();
+      TBOX not_found_box = not_found->bounding_box();
+      Box* box_to_plot = GetBoxForTBOX(not_found_box);
+      pixRenderBoxArb(debug_image_, box_to_plot, 1, 255, 0, 255);
+      boxDestroy(&box_to_plot);
+    }
+
+    // Plot out the blobs unused from all blobs.
+    C_BLOB_IT all_blobs_it(new_blobs);
+    for (all_blobs_it.mark_cycle_pt(); !all_blobs_it.cycled_list();
+         all_blobs_it.forward()) {
+      C_BLOB* a_blob = all_blobs_it.data();
+      Box* box_to_plot = GetBoxForTBOX(a_blob->bounding_box());
+      pixRenderBoxArb(debug_image_, box_to_plot, 3, 0, 127, 0);
+      boxDestroy(&box_to_plot);
+    }
+  }
+}
+
+// Returns a new box object for the corresponding TBOX, based on the original
+// image's coordinate system.
+Box* ShiroRekhaSplitter::GetBoxForTBOX(const TBOX& tbox) const {
+  return boxCreate(tbox.left(), pixGetHeight(orig_pix_) - tbox.top() - 1,
+                   tbox.width(), tbox.height());
+}
+
+// This method returns the computed mode-height of blobs in the pix.
+// It also prunes very small blobs from calculation.
+int ShiroRekhaSplitter::GetModeHeight(Pix* pix) {
+  Boxa* boxa = pixConnComp(pix, nullptr, 8);
+  STATS heights(0, pixGetHeight(pix));
+  heights.clear();
+  for (int i = 0; i < boxaGetCount(boxa); ++i) {
+    Box* box = boxaGetBox(boxa, i, L_CLONE);
+    if (box->h >= 3 || box->w >= 3) {
+      heights.add(box->h, 1);
+    }
+    boxDestroy(&box);
+  }
+  boxaDestroy(&boxa);
+  return heights.mode();
+}
+
+// This method returns y-extents of the shiro-rekha computed from the input
+// word image.
+void ShiroRekhaSplitter::GetShiroRekhaYExtents(Pix* word_pix,
+                                               int* shirorekha_top,
+                                               int* shirorekha_bottom,
+                                               int* shirorekha_ylevel) {
+  // Compute a histogram from projecting the word on a vertical line.
+  PixelHistogram hist_horiz;
+  hist_horiz.ConstructHorizontalCountHist(word_pix);
+  // Get the ylevel where the top-line exists. This is basically the global
+  // maxima in the horizontal histogram.
+  int topline_onpixel_count = 0;
+  int topline_ylevel = hist_horiz.GetHistogramMaximum(&topline_onpixel_count);
+
+  // Get the upper and lower extents of the shiro rekha.
+  int thresh = (topline_onpixel_count * 70) / 100;
+  int ulimit = topline_ylevel;
+  int llimit = topline_ylevel;
+  while (ulimit > 0 && hist_horiz.hist()[ulimit] >= thresh)
+    --ulimit;
+  while (llimit < pixGetHeight(word_pix) && hist_horiz.hist()[llimit] >= thresh)
+    ++llimit;
+
+  if (shirorekha_top) *shirorekha_top = ulimit;
+  if (shirorekha_bottom) *shirorekha_bottom = llimit;
+  if (shirorekha_ylevel) *shirorekha_ylevel = topline_ylevel;
+}
+
+// This method returns the global-maxima for the histogram. The frequency of
+// the global maxima is returned in count, if specified.
+int PixelHistogram::GetHistogramMaximum(int* count) const {
+  int best_value = 0;
+  for (int i = 0; i < length_; ++i) {
+    if (hist_[i] > hist_[best_value]) {
+      best_value = i;
+    }
+  }
+  if (count) {
+    *count = hist_[best_value];
+  }
+  return best_value;
+}
+
+// Methods to construct histograms from images.
+void PixelHistogram::ConstructVerticalCountHist(Pix* pix) {
+  Clear();
+  int width = pixGetWidth(pix);
+  int height = pixGetHeight(pix);
+  hist_ = new int[width];
+  length_ = width;
+  int wpl = pixGetWpl(pix);
+  l_uint32 *data = pixGetData(pix);
+  for (int i = 0; i < width; ++i)
+    hist_[i] = 0;
+  for (int i = 0; i < height; ++i) {
+    l_uint32 *line = data + i * wpl;
+    for (int j = 0; j < width; ++j)
+      if (GET_DATA_BIT(line, j))
+        ++(hist_[j]);
+  }
+}
+
+void PixelHistogram::ConstructHorizontalCountHist(Pix* pix) {
+  Clear();
+  Numa* counts = pixCountPixelsByRow(pix, nullptr);
+  length_ = numaGetCount(counts);
+  hist_ = new int[length_];
+  for (int i = 0; i < length_; ++i) {
+    l_int32 val = 0;
+    numaGetIValue(counts, i, &val);
+    hist_[i] = val;
+  }
+  numaDestroy(&counts);
+}
+
+}  // namespace tesseract.
diff --git a/tesseract/src/textord/devanagari_processing.h b/tesseract/src/textord/devanagari_processing.h
new file mode 100644
index 00000000..cd0bfeb6
--- /dev/null
+++ b/tesseract/src/textord/devanagari_processing.h
@@ -0,0 +1,210 @@
+// Copyright 2008 Google Inc. All Rights Reserved.
+// Author: shobhitsaxena@google.com (Shobhit Saxena)
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
+#define TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
+
+#include "allheaders.h"
+#include "ocrblock.h"
+#include "params.h"
+
+struct Pix;
+struct Box;
+struct Boxa;
+
+namespace tesseract {
+
+extern
+INT_VAR_H(devanagari_split_debuglevel, 0,
+          "Debug level for split shiro-rekha process.");
+
+extern
+BOOL_VAR_H(devanagari_split_debugimage, 0,
+           "Whether to create a debug image for split shiro-rekha process.");
+
+class TBOX;
+class DebugPixa;
+
+class PixelHistogram {
+ public:
+  PixelHistogram() {
+    hist_ = nullptr;
+    length_ = 0;
+  }
+
+  ~PixelHistogram() {
+    Clear();
+  }
+
+  void Clear() {
+    delete[] hist_;
+    length_ = 0;
+  }
+
+  int* hist() const { return hist_; }
+
+  int length() const {
+    return length_;
+  }
+
+  // Methods to construct histograms from images. These clear any existing data.
+  void ConstructVerticalCountHist(Pix* pix);
+  void ConstructHorizontalCountHist(Pix* pix);
+
+  // This method returns the global-maxima for the histogram. The frequency of
+  // the global maxima is returned in count, if specified.
+  int GetHistogramMaximum(int* count) const;
+
+ private:
+  int* hist_;
+  int length_;
+};
+
+class ShiroRekhaSplitter {
+ public:
+  enum SplitStrategy {
+    NO_SPLIT = 0,   // No splitting is performed for the phase.
+    MINIMAL_SPLIT,  // Blobs are split minimally.
+    MAXIMAL_SPLIT   // Blobs are split maximally.
+  };
+
+  ShiroRekhaSplitter();
+  virtual ~ShiroRekhaSplitter();
+
+  // Top-level method to perform splitting based on current settings.
+  // Returns true if a split was actually performed.
+  // If split_for_pageseg is true, the pageseg_split_strategy_ is used for
+  // splitting. If false, the ocr_split_strategy_ is used.
+  bool Split(bool split_for_pageseg, DebugPixa* pixa_debug);
+
+  // Clears the memory held by this object.
+  void Clear();
+
+  // Refreshes the words in the segmentation block list by using blobs in the
+  // input blob list.
+  // The segmentation block list must be set.
+  void RefreshSegmentationWithNewBlobs(C_BLOB_LIST* new_blobs);
+
+  // Returns true if the split strategies for pageseg and ocr are different.
+  bool HasDifferentSplitStrategies() const {
+    return pageseg_split_strategy_ != ocr_split_strategy_;
+  }
+
+  // This only keeps a copy of the block list pointer. At split call, the list
+  // object should still be alive. This block list is used as a golden
+  // segmentation when performing splitting.
+  void set_segmentation_block_list(BLOCK_LIST* block_list) {
+    segmentation_block_list_ = block_list;
+  }
+
+  static const int kUnspecifiedXheight = -1;
+
+  void set_global_xheight(int xheight) {
+    global_xheight_ = xheight;
+  }
+
+  void set_perform_close(bool perform) {
+    perform_close_ = perform;
+  }
+
+  // Returns the image obtained from shiro-rekha splitting. The returned object
+  // is owned by this class. Callers may want to clone the returned pix to keep
+  // it alive beyond the life of ShiroRekhaSplitter object.
+  Pix* splitted_image() {
+    return splitted_image_;
+  }
+
+  // On setting the input image, a clone of it is owned by this class.
+  void set_orig_pix(Pix* pix);
+
+  // Returns the input image provided to the object. This object is owned by
+  // this class. Callers may want to clone the returned pix to work with it.
+  Pix* orig_pix() {
+    return orig_pix_;
+  }
+
+  SplitStrategy ocr_split_strategy() const {
+    return ocr_split_strategy_;
+  }
+
+  void set_ocr_split_strategy(SplitStrategy strategy) {
+    ocr_split_strategy_ = strategy;
+  }
+
+  SplitStrategy pageseg_split_strategy() const {
+    return pageseg_split_strategy_;
+  }
+
+  void set_pageseg_split_strategy(SplitStrategy strategy) {
+    pageseg_split_strategy_ = strategy;
+  }
+
+  BLOCK_LIST* segmentation_block_list() {
+    return segmentation_block_list_;
+  }
+
+  // This method returns the computed mode-height of blobs in the pix.
+  // It also prunes very small blobs from calculation. Could be used to provide
+  // a global xheight estimate for images which have the same point-size text.
+  static int GetModeHeight(Pix* pix);
+
+ private:
+  // Method to perform a close operation on the input image. The xheight
+  // estimate decides the size of sel used.
+  static void PerformClose(Pix* pix, int xheight_estimate);
+
+  // This method resolves the cc bbox to a particular row and returns the row's
+  // xheight. This uses block_list_ if available, else just returns the
+  // global_xheight_ estimate currently set in the object.
+  int GetXheightForCC(Box* cc_bbox);
+
+  // Returns a list of regions (boxes) which should be cleared in the original
+  // image so as to perform shiro-rekha splitting. Pix is assumed to carry one
+  // (or less) word only. Xheight measure could be the global estimate, the row
+  // estimate, or unspecified. If unspecified, over splitting may occur, since a
+  // conservative estimate of stroke width along with an associated multiplier
+  // is used in its place. It is advisable to have a specified xheight when
+  // splitting for classification/training.
+  void SplitWordShiroRekha(SplitStrategy split_strategy,
+                           Pix* pix,
+                           int xheight,
+                           int word_left,
+                           int word_top,
+                           Boxa* regions_to_clear);
+
+  // Returns a new box object for the corresponding TBOX, based on the original
+  // image's coordinate system.
+  Box* GetBoxForTBOX(const TBOX& tbox) const;
+
+  // This method returns y-extents of the shiro-rekha computed from the input
+  // word image.
+  static void GetShiroRekhaYExtents(Pix* word_pix,
+                                    int* shirorekha_top,
+                                    int* shirorekha_bottom,
+                                    int* shirorekha_ylevel);
+
+  Pix* orig_pix_;         // Just a clone of the input image passed.
+  Pix* splitted_image_;   // Image produced after the last splitting round. The
+                          // object is owned by this class.
+  SplitStrategy pageseg_split_strategy_;
+  SplitStrategy ocr_split_strategy_;
+  Pix* debug_image_;
+  // This block list is used as a golden segmentation when performing splitting.
+  BLOCK_LIST* segmentation_block_list_;
+  int global_xheight_;
+  bool perform_close_;  // Whether a morphological close operation should be
+                        // performed before CCs are run through splitting.
+};
+
+}  // namespace tesseract.
+
+#endif  // TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
diff --git a/tesseract/src/textord/drawtord.cpp b/tesseract/src/textord/drawtord.cpp
new file mode 100644
index 00000000..3e02653a
--- /dev/null
+++ b/tesseract/src/textord/drawtord.cpp
@@ -0,0 +1,423 @@
+/**********************************************************************
+ * File:        drawtord.cpp  (Formerly drawto.c)
+ * Description: Draw things to do with textord.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include          "drawtord.h"
+
+#include          "pithsync.h"
+#include          "topitch.h"
+
+namespace tesseract {
+
+#define TO_WIN_XPOS     0       //default window pos
+#define TO_WIN_YPOS     0
+#define TO_WIN_NAME     "Textord"
+                                 //title of window
+
+BOOL_VAR (textord_show_fixed_cuts, false,
+"Draw fixed pitch cell boundaries");
+
+ScrollView* to_win = nullptr;
+
+#ifndef GRAPHICS_DISABLED
+
+/**********************************************************************
+ * create_to_win
+ *
+ * Create the to window used to show the fit.
+ **********************************************************************/
+
+ScrollView* create_to_win(ICOORD page_tr) {
+  if (to_win != nullptr) return to_win;
+  to_win = new ScrollView(TO_WIN_NAME, TO_WIN_XPOS, TO_WIN_YPOS,
+                          page_tr.x() + 1, page_tr.y() + 1,
+                          page_tr.x(), page_tr.y(), true);
+  return to_win;
+}
+
+
+void close_to_win() {
+  // to_win is leaked, but this enables the user to view the contents.
+  if (to_win != nullptr) {
+    to_win->Update();
+  }
+}
+
+
+/**********************************************************************
+ * plot_box_list
+ *
+ * Draw a list of blobs.
+ **********************************************************************/
+
+void plot_box_list(                      //make gradients win
+                   ScrollView* win,           //window to draw in
+                   BLOBNBOX_LIST *list,  //blob list
+                   ScrollView::Color body_colour    //colour to draw
+                  ) {
+  BLOBNBOX_IT it = list;         //iterator
+
+  win->Pen(body_colour);
+  win->Brush(ScrollView::NONE);
+  for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
+    it.data ()->bounding_box ().plot (win);
+  }
+}
+
+
+/**********************************************************************
+ * plot_to_row
+ *
+ * Draw the blobs of a row in a given colour and draw the line fit.
+ **********************************************************************/
+
+void plot_to_row(                 //draw a row
+                 TO_ROW *row,     //row to draw
+                 ScrollView::Color colour,   //colour to draw in
+                 FCOORD rotation  //rotation for line
+                ) {
+  FCOORD plot_pt;                //point to plot
+                                 //blobs
+  BLOBNBOX_IT it = row->blob_list ();
+  float left, right;             //end of row
+
+  if (it.empty ()) {
+    tprintf ("No blobs in row at %g\n", row->parallel_c ());
+    return;
+  }
+  left = it.data ()->bounding_box ().left ();
+  it.move_to_last ();
+  right = it.data ()->bounding_box ().right ();
+  plot_blob_list (to_win, row->blob_list (), colour, ScrollView::BROWN);
+  to_win->Pen(colour);
+  plot_pt = FCOORD (left, row->line_m () * left + row->line_c ());
+  plot_pt.rotate (rotation);
+  to_win->SetCursor(plot_pt.x (), plot_pt.y ());
+  plot_pt = FCOORD (right, row->line_m () * right + row->line_c ());
+  plot_pt.rotate (rotation);
+  to_win->DrawTo(plot_pt.x (), plot_pt.y ());
+}
+
+
+/**********************************************************************
+ * plot_parallel_row
+ *
+ * Draw the blobs of a row in a given colour and draw the line fit.
+ **********************************************************************/
+
+void plot_parallel_row(                 //draw a row
+                       TO_ROW *row,     //row to draw
+                       float gradient,  //gradients of lines
+                       int32_t left,      //edge of block
+                       ScrollView::Color colour,   //colour to draw in
+                       FCOORD rotation  //rotation for line
+                      ) {
+  FCOORD plot_pt;                //point to plot
+                                 //blobs
+  BLOBNBOX_IT it = row->blob_list ();
+  auto fleft = static_cast<float>(left);    //floating version
+  float right;                   //end of row
+
+  //      left=it.data()->bounding_box().left();
+  it.move_to_last ();
+  right = it.data ()->bounding_box ().right ();
+  plot_blob_list (to_win, row->blob_list (), colour, ScrollView::BROWN);
+  to_win->Pen(colour);
+  plot_pt = FCOORD (fleft, gradient * left + row->max_y ());
+  plot_pt.rotate (rotation);
+  to_win->SetCursor(plot_pt.x (), plot_pt.y ());
+  plot_pt = FCOORD (fleft, gradient * left + row->min_y ());
+  plot_pt.rotate (rotation);
+  to_win->DrawTo(plot_pt.x (), plot_pt.y ());
+  plot_pt = FCOORD (fleft, gradient * left + row->parallel_c ());
+  plot_pt.rotate (rotation);
+  to_win->SetCursor(plot_pt.x (), plot_pt.y ());
+  plot_pt = FCOORD (right, gradient * right + row->parallel_c ());
+  plot_pt.rotate (rotation);
+  to_win->DrawTo(plot_pt.x (), plot_pt.y ());
+}
+
+
+/**********************************************************************
+ * draw_occupation
+ *
+ * Draw the row occupation with points above the threshold in white
+ * and points below the threshold in black.
+ **********************************************************************/
+
+void
+draw_occupation (                //draw projection
+int32_t xleft,                     //edge of block
+int32_t ybottom,                   //bottom of block
+int32_t min_y,                     //coordinate limits
+int32_t max_y, int32_t occupation[], //projection counts
+int32_t thresholds[]               //for drop out
+) {
+  int32_t line_index;              //pixel coord
+  ScrollView::Color colour;                 //of histogram
+  auto fleft = static_cast<float>(xleft);   //float version
+
+  colour = ScrollView::WHITE;
+  to_win->Pen(colour);
+  to_win->SetCursor(fleft, static_cast<float>(ybottom));
+  for (line_index = min_y; line_index <= max_y; line_index++) {
+    if (occupation[line_index - min_y] < thresholds[line_index - min_y]) {
+      if (colour != ScrollView::BLUE) {
+        colour = ScrollView::BLUE;
+        to_win->Pen(colour);
+      }
+    }
+    else {
+      if (colour != ScrollView::WHITE) {
+        colour = ScrollView::WHITE;
+        to_win->Pen(colour);
+      }
+    }
+  to_win->DrawTo(fleft + occupation[line_index - min_y] / 10.0,      static_cast<float>(line_index));
+  }
+  colour=ScrollView::STEEL_BLUE;
+  to_win->Pen(colour);
+  to_win->SetCursor(fleft, static_cast<float>(ybottom));
+  for (line_index = min_y; line_index <= max_y; line_index++) {
+     to_win->DrawTo(fleft + thresholds[line_index - min_y] / 10.0,      static_cast<float>(line_index));
+  }
+}
+
+
+/**********************************************************************
+ * draw_meanlines
+ *
+ * Draw the meanlines of the given block in the given colour.
+ **********************************************************************/
+
+void draw_meanlines(                  //draw a block
+                    TO_BLOCK *block,  //block to draw
+                    float gradient,   //gradients of lines
+                    int32_t left,       //edge of block
+                    ScrollView::Color colour,    //colour to draw in
+                    FCOORD rotation   //rotation for line
+                   ) {
+  FCOORD plot_pt;                //point to plot
+                                 //rows
+  TO_ROW_IT row_it = block->get_rows ();
+  TO_ROW *row;                   //current row
+  BLOBNBOX_IT blob_it;           //blobs
+  float right;                   //end of row
+  to_win->Pen(colour);
+  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+    row = row_it.data ();
+    blob_it.set_to_list (row->blob_list ());
+    blob_it.move_to_last ();
+    right = blob_it.data ()->bounding_box ().right ();
+    plot_pt =
+      FCOORD (static_cast<float>(left),
+      gradient * left + row->parallel_c () + row->xheight);
+    plot_pt.rotate (rotation);
+  to_win->SetCursor(plot_pt.x (), plot_pt.y ());
+    plot_pt =
+      FCOORD (right,
+      gradient * right + row->parallel_c () + row->xheight);
+    plot_pt.rotate (rotation);
+    to_win->DrawTo (plot_pt.x (), plot_pt.y ());
+  }
+}
+
+
+/**********************************************************************
+ * plot_word_decisions
+ *
+ * Plot a row with words in different colours and fuzzy spaces
+ * highlighted.
+ **********************************************************************/
+
+void plot_word_decisions(              //draw words
+                         ScrollView* win,   //window tro draw in
+                         int16_t pitch,  //of block
+                         TO_ROW *row   //row to draw
+                        ) {
+  ScrollView::Color colour = ScrollView::MAGENTA;       //current colour
+  ScrollView::Color rect_colour;            //fuzzy colour
+  int32_t prev_x;                  //end of prev blob
+  int16_t blob_count;              //blobs in word
+  BLOBNBOX *blob;                //current blob
+  TBOX blob_box;                  //bounding box
+                                 //iterator
+  BLOBNBOX_IT blob_it = row->blob_list ();
+  BLOBNBOX_IT start_it = blob_it;//word start
+
+  rect_colour = ScrollView::BLACK;
+  prev_x = -INT16_MAX;
+  blob_count = 0;
+  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
+    blob = blob_it.data ();
+    blob_box = blob->bounding_box ();
+    if (!blob->joined_to_prev ()
+    && blob_box.left () - prev_x > row->max_nonspace) {
+      if ((blob_box.left () - prev_x >= row->min_space
+        || blob_box.left () - prev_x > row->space_threshold)
+      && blob_count > 0) {
+        if (pitch > 0 && textord_show_fixed_cuts)
+          plot_fp_cells (win, colour, &start_it, pitch, blob_count,
+            &row->projection, row->projection_left,
+            row->projection_right,
+            row->xheight * textord_projection_scale);
+        blob_count = 0;
+        start_it = blob_it;
+      }
+      if (colour == ScrollView::MAGENTA)
+        colour = ScrollView::RED;
+      else
+        colour = static_cast<ScrollView::Color>(colour + 1);
+      if (blob_box.left () - prev_x < row->min_space) {
+        if (blob_box.left () - prev_x > row->space_threshold)
+          rect_colour = ScrollView::GOLDENROD;
+        else
+          rect_colour = ScrollView::CORAL;
+        //fill_color_index(win, rect_colour);
+        win->Brush(rect_colour);
+        win->Rectangle (prev_x, blob_box.bottom (),
+          blob_box.left (), blob_box.top ());
+      }
+    }
+    if (!blob->joined_to_prev())
+      prev_x = blob_box.right();
+    if (blob->cblob () != nullptr)
+      blob->cblob ()->plot (win, colour, colour);
+    if (!blob->joined_to_prev() && blob->cblob() != nullptr)
+      blob_count++;
+  }
+  if (pitch > 0 && textord_show_fixed_cuts && blob_count > 0)
+    plot_fp_cells (win, colour, &start_it, pitch, blob_count,
+      &row->projection, row->projection_left,
+      row->projection_right,
+      row->xheight * textord_projection_scale);
+}
+
+
+/**********************************************************************
+ * plot_fp_cells
+ *
+ * Make a list of fixed pitch cuts and draw them.
+ **********************************************************************/
+
+void plot_fp_cells(                        //draw words
+                   ScrollView* win,             //window tro draw in
+                   ScrollView::Color colour,          //colour of lines
+                   BLOBNBOX_IT *blob_it,   //blobs
+                   int16_t pitch,            //of block
+                   int16_t blob_count,       //no of real blobs
+                   STATS *projection,      //vertical
+                   int16_t projection_left,  //edges //scale factor
+                   int16_t projection_right,
+                   float projection_scale) {
+  int16_t occupation;              //occupied cells
+  TBOX word_box;                  //bounding box
+  FPSEGPT_LIST seg_list;         //list of cuts
+  FPSEGPT_IT seg_it;
+  FPSEGPT *segpt;                //current point
+
+  if (pitsync_linear_version)
+    check_pitch_sync2 (blob_it, blob_count, pitch, 2, projection,
+      projection_left, projection_right,
+      projection_scale, occupation, &seg_list, 0, 0);
+  else
+    check_pitch_sync (blob_it, blob_count, pitch, 2, projection, &seg_list);
+  word_box = blob_it->data ()->bounding_box ();
+  for (; blob_count > 0; blob_count--)
+    word_box += box_next (blob_it);
+  seg_it.set_to_list (&seg_list);
+  for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
+    segpt = seg_it.data ();
+    if (segpt->faked) {
+         colour = ScrollView::WHITE;
+         win->Pen(colour);  }
+    else {
+      win->Pen(colour); }
+    win->Line(segpt->position (), word_box.bottom (),segpt->position (), word_box.top ());
+  }
+}
+
+
+/**********************************************************************
+ * plot_fp_cells2
+ *
+ * Make a list of fixed pitch cuts and draw them.
+ **********************************************************************/
+
+void plot_fp_cells2(                        //draw words
+                    ScrollView* win,             //window tro draw in
+                    ScrollView::Color colour,          //colour of lines
+                    TO_ROW *row,            //for location
+                    FPSEGPT_LIST *seg_list  //segments to plot
+                   ) {
+  TBOX word_box;                  //bounding box
+  FPSEGPT_IT seg_it = seg_list;
+                                 //blobs in row
+  BLOBNBOX_IT blob_it = row->blob_list ();
+  FPSEGPT *segpt;                //current point
+
+  word_box = blob_it.data ()->bounding_box ();
+  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();)
+    word_box += box_next (&blob_it);
+  for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
+    segpt = seg_it.data ();
+    if (segpt->faked) {
+         colour = ScrollView::WHITE;
+         win->Pen(colour); }
+    else {
+     win->Pen(colour); }
+     win->Line(segpt->position (), word_box.bottom (),segpt->position (), word_box.top ());
+  }
+}
+
+
+/**********************************************************************
+ * plot_row_cells
+ *
+ * Make a list of fixed pitch cuts and draw them.
+ **********************************************************************/
+
+void plot_row_cells(                       //draw words
+                    ScrollView* win,            //window tro draw in
+                    ScrollView::Color colour,         //colour of lines
+                    TO_ROW *row,           //for location
+                    float xshift,          //amount of shift
+                    ICOORDELT_LIST *cells  //cells to draw
+                   ) {
+  TBOX word_box;                  //bounding box
+  ICOORDELT_IT cell_it = cells;
+                                 //blobs in row
+  BLOBNBOX_IT blob_it = row->blob_list ();
+  ICOORDELT *cell;               //current cell
+
+  word_box = blob_it.data ()->bounding_box ();
+  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();)
+    word_box += box_next (&blob_it);
+  win->Pen(colour);
+  for (cell_it.mark_cycle_pt (); !cell_it.cycled_list (); cell_it.forward ()) {
+    cell = cell_it.data ();
+    win->Line(cell->x () + xshift, word_box.bottom (), cell->x () + xshift, word_box.top ());
+  }
+}
+
+#endif // !GRAPHICS_DISABLED
+
+} // namespace tesseract
diff --git a/tesseract/src/textord/drawtord.h b/tesseract/src/textord/drawtord.h
new file mode 100644
index 00000000..e88c4896
--- /dev/null
+++ b/tesseract/src/textord/drawtord.h
@@ -0,0 +1,103 @@
+/**********************************************************************
+ * File:        drawtord.h  (Formerly drawto.h)
+ * Description: Draw things to do with textord.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef           DRAWTORD_H
+#define           DRAWTORD_H
+
+#include          "params.h"
+#include          "scrollview.h"
+#include          "pitsync1.h"
+#include          "blobbox.h"
+
+namespace tesseract {
+
+#define NO_SMD        "none"
+
+extern BOOL_VAR_H (textord_show_fixed_cuts, false,
+"Draw fixed pitch cell boundaries");
+extern STRING_VAR_H (to_debugfile, DEBUG_WIN_NAME, "Name of debugfile");
+extern STRING_VAR_H (to_smdfile, NO_SMD, "Name of SMD file");
+extern ScrollView* to_win;
+extern FILE *to_debug;
+// Creates a static display window for textord, and returns a pointer to it.
+ScrollView* create_to_win(ICOORD page_tr);
+void close_to_win();  // Destroy the textord window.
+void create_todebug_win();  //make gradients win
+void plot_box_list(                      //make gradients win
+                   ScrollView* win,           //window to draw in
+                   BLOBNBOX_LIST *list,  //blob list
+                   ScrollView::Color body_colour    //colour to draw
+                  );
+void plot_to_row(                 //draw a row
+                 TO_ROW *row,     //row to draw
+                 ScrollView::Color colour,   //colour to draw in
+                 FCOORD rotation  //rotation for line
+                );
+void plot_parallel_row(                 //draw a row
+                       TO_ROW *row,     //row to draw
+                       float gradient,  //gradients of lines
+                       int32_t left,      //edge of block
+                       ScrollView::Color colour,   //colour to draw in
+                       FCOORD rotation  //rotation for line
+                      );
+void draw_occupation (           //draw projection
+int32_t xleft,                     //edge of block
+int32_t ybottom,                   //bottom of block
+int32_t min_y,                     //coordinate limits
+int32_t max_y, int32_t occupation[], //projection counts
+int32_t thresholds[]               //for drop out
+);
+void draw_meanlines(                  //draw a block
+                    TO_BLOCK *block,  //block to draw
+                    float gradient,   //gradients of lines
+                    int32_t left,       //edge of block
+                    ScrollView::Color colour,    //colour to draw in
+                    FCOORD rotation   //rotation for line
+                   );
+void plot_word_decisions(              //draw words
+                         ScrollView* win,   //window tro draw in
+                         int16_t pitch,  //of block
+                         TO_ROW *row   //row to draw
+                        );
+void plot_fp_cells(                        //draw words
+                   ScrollView* win,             //window tro draw in
+                   ScrollView::Color colour,          //colour of lines
+                   BLOBNBOX_IT *blob_it,   //blobs
+                   int16_t pitch,            //of block
+                   int16_t blob_count,       //no of real blobs
+                   STATS *projection,      //vertical
+                   int16_t projection_left,  //edges //scale factor
+                   int16_t projection_right,
+                   float projection_scale);
+void plot_fp_cells2(                        //draw words
+                    ScrollView* win,             //window tro draw in
+                    ScrollView::Color colour,          //colour of lines
+                    TO_ROW *row,            //for location
+                    FPSEGPT_LIST *seg_list  //segments to plot
+                   );
+void plot_row_cells(                       //draw words
+                    ScrollView* win,            //window tro draw in
+                    ScrollView::Color colour,         //colour of lines
+                    TO_ROW *row,           //for location
+                    float xshift,          //amount of shift
+                    ICOORDELT_LIST *cells  //cells to draw
+                   );
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/textord/edgblob.cpp b/tesseract/src/textord/edgblob.cpp
new file mode 100644
index 00000000..4383907f
--- /dev/null
+++ b/tesseract/src/textord/edgblob.cpp
@@ -0,0 +1,462 @@
+/**********************************************************************
+ * File:        edgblob.cpp (Formerly edgeloop.c)
+ * Description: Functions to clean up an outline before approximation.
+ * Author:      Ray Smith
+ *
+ *(C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0(the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+ // Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "edgblob.h"
+
+#include "scanedg.h"
+#include "edgloop.h"
+
+namespace tesseract {
+
+// Control parameters used in outline_complexity(), which rejects an outline
+// if any one of the 3 conditions is satisfied:
+//  - number of children exceeds edges_max_children_per_outline
+//  - number of nested layers exceeds edges_max_children_layers
+//  - joint complexity exceeds edges_children_count_limit(as in child_count())
+static BOOL_VAR(edges_use_new_outline_complexity, false,
+                "Use the new outline complexity module");
+static INT_VAR(edges_max_children_per_outline, 10,
+               "Max number of children inside a character outline");
+static INT_VAR(edges_max_children_layers, 5,
+               "Max layers of nested children inside a character outline");
+static BOOL_VAR(edges_debug, false,
+                "turn on debugging for this module");
+
+static INT_VAR(edges_children_per_grandchild, 10,
+               "Importance ratio for chucking outlines");
+static INT_VAR(edges_children_count_limit, 45,
+               "Max holes allowed in blob");
+static BOOL_VAR(edges_children_fix, false,
+                "Remove boxy parents of char-like children");
+static INT_VAR(edges_min_nonhole, 12,
+               "Min pixels for potential char in box");
+static INT_VAR(edges_patharea_ratio, 40,
+               "Max lensq/area for acceptable child outline");
+static double_VAR(edges_childarea, 0.5,
+                  "Min area fraction of child outline");
+static double_VAR(edges_boxarea, 0.875,
+                  "Min area fraction of grandchild for box");
+
+/**
+ * @name OL_BUCKETS::OL_BUCKETS
+ *
+ * Construct an array of buckets for associating outlines into blobs.
+ */
+
+OL_BUCKETS::OL_BUCKETS(
+ICOORD bleft,                    // corners
+ICOORD tright):         bl(bleft), tr(tright) {
+  bxdim =(tright.x() - bleft.x()) / BUCKETSIZE + 1;
+  bydim =(tright.y() - bleft.y()) / BUCKETSIZE + 1;
+                                 // make array
+  buckets.reset(new C_OUTLINE_LIST[bxdim * bydim]);
+  index = 0;
+}
+
+
+/**
+ * @name OL_BUCKETS::operator(
+ *
+ * Return a pointer to a list of C_OUTLINEs corresponding to the
+ * given pixel coordinates.
+ */
+
+C_OUTLINE_LIST *
+OL_BUCKETS::operator()(       // array access
+int16_t x,                      // image coords
+int16_t y) {
+  return &buckets[(y-bl.y()) / BUCKETSIZE * bxdim + (x-bl.x()) / BUCKETSIZE];
+}
+
+
+/**
+ * @name OL_BUCKETS::outline_complexity
+ *
+ * This is the new version of count_child.
+ *
+ * The goal of this function is to determine if an outline and its
+ * interiors could be part of a character blob.  This is done by
+ * computing a "complexity" index for the outline, which is the return
+ * value of this function, and checking it against a threshold.
+ * The max_count is used for short-circuiting the recursion and forcing
+ * a rejection that guarantees to fail the threshold test.
+ * The complexity F for outline X with N children X[i] is
+ *   F(X) = N + sum_i F(X[i]) * edges_children_per_grandchild
+ * so each layer of nesting increases complexity exponentially.
+ * An outline can be rejected as a text blob candidate if its complexity
+ * is too high, has too many children(likely a container), or has too
+ * many layers of nested inner loops.  This has the side-effect of
+ * flattening out boxed or reversed video text regions.
+ */
+
+int32_t OL_BUCKETS::outline_complexity(
+                                     C_OUTLINE *outline,   // parent outline
+                                     int32_t max_count,      // max output
+                                     int16_t depth           // recurion depth
+                                    ) {
+  int16_t xmin, xmax;              // coord limits
+  int16_t ymin, ymax;
+  int16_t xindex, yindex;          // current bucket
+  C_OUTLINE *child;              // current child
+  int32_t child_count;             // no of children
+  int32_t grandchild_count;        // no of grandchildren
+  C_OUTLINE_IT child_it;         // search iterator
+
+  TBOX olbox = outline->bounding_box();
+  xmin =(olbox.left() - bl.x()) / BUCKETSIZE;
+  xmax =(olbox.right() - bl.x()) / BUCKETSIZE;
+  ymin =(olbox.bottom() - bl.y()) / BUCKETSIZE;
+  ymax =(olbox.top() - bl.y()) / BUCKETSIZE;
+  child_count = 0;
+  grandchild_count = 0;
+  if (++depth > edges_max_children_layers)  // nested loops are too deep
+    return max_count + depth;
+
+  for (yindex = ymin; yindex <= ymax; yindex++) {
+    for (xindex = xmin; xindex <= xmax; xindex++) {
+      child_it.set_to_list(&buckets[yindex * bxdim + xindex]);
+      if (child_it.empty())
+        continue;
+      for (child_it.mark_cycle_pt(); !child_it.cycled_list();
+           child_it.forward()) {
+        child = child_it.data();
+        if (child == outline || !(*child < *outline))
+          continue;
+        child_count++;
+
+        if (child_count > edges_max_children_per_outline) {   // too fragmented
+          if (edges_debug)
+            tprintf("Discard outline on child_count=%d > "
+                    "max_children_per_outline=%d\n",
+                    child_count,
+                    static_cast<int32_t>(edges_max_children_per_outline));
+          return max_count + child_count;
+        }
+
+        // Compute the "complexity" of each child recursively
+        int32_t remaining_count = max_count - child_count - grandchild_count;
+        if (remaining_count > 0)
+          grandchild_count += edges_children_per_grandchild *
+                              outline_complexity(child, remaining_count, depth);
+        if (child_count + grandchild_count > max_count) {  // too complex
+          if (edges_debug)
+            tprintf("Disgard outline on child_count=%d + grandchild_count=%d "
+                    "> max_count=%d\n",
+                    child_count, grandchild_count, max_count);
+          return child_count + grandchild_count;
+        }
+      }
+    }
+  }
+  return child_count + grandchild_count;
+}
+
+
+/**
+ * @name OL_BUCKETS::count_children
+ *
+ * Find number of descendants of this outline.
+ */
+// TODO(rays) Merge with outline_complexity.
+int32_t OL_BUCKETS::count_children(                   // recursive count
+                                 C_OUTLINE *outline,  // parent outline
+                                 int32_t max_count    // max output
+                                ) {
+  bool parent_box;              // could it be boxy
+  int16_t xmin, xmax;           // coord limits
+  int16_t ymin, ymax;
+  int16_t xindex, yindex;       // current bucket
+  C_OUTLINE *child;             // current child
+  int32_t child_count;          // no of children
+  int32_t grandchild_count;     // no of grandchildren
+  int32_t parent_area;          // potential box
+  float max_parent_area;        // potential box
+  int32_t child_area;           // current child
+  int32_t child_length;         // current child
+  TBOX olbox;
+  C_OUTLINE_IT child_it;        // search iterator
+
+  olbox = outline->bounding_box();
+  xmin =(olbox.left() - bl.x()) / BUCKETSIZE;
+  xmax =(olbox.right() - bl.x()) / BUCKETSIZE;
+  ymin =(olbox.bottom() - bl.y()) / BUCKETSIZE;
+  ymax =(olbox.top() - bl.y()) / BUCKETSIZE;
+  child_count = 0;
+  grandchild_count = 0;
+  parent_area = 0;
+  max_parent_area = 0;
+  parent_box = true;
+  for (yindex = ymin; yindex <= ymax; yindex++) {
+    for (xindex = xmin; xindex <= xmax; xindex++) {
+      child_it.set_to_list(&buckets[yindex * bxdim + xindex]);
+      if (child_it.empty())
+        continue;
+      for (child_it.mark_cycle_pt(); !child_it.cycled_list();
+           child_it.forward()) {
+        child = child_it.data();
+        if (child != outline && *child < *outline) {
+          child_count++;
+          if (child_count <= max_count) {
+            int max_grand =(max_count - child_count) /
+                            edges_children_per_grandchild;
+            if (max_grand > 0)
+              grandchild_count += count_children(child, max_grand) *
+                                  edges_children_per_grandchild;
+            else
+              grandchild_count += count_children(child, 1);
+          }
+          if (child_count + grandchild_count > max_count) {
+            if (edges_debug)
+              tprintf("Discarding parent with child count=%d, gc=%d\n",
+                      child_count,grandchild_count);
+            return child_count + grandchild_count;
+          }
+          if (parent_area == 0) {
+            parent_area = outline->outer_area();
+            if (parent_area < 0)
+              parent_area = -parent_area;
+            max_parent_area = outline->bounding_box().area() * edges_boxarea;
+            if (parent_area < max_parent_area)
+              parent_box = false;
+          }
+          if (parent_box &&
+              (!edges_children_fix ||
+               child->bounding_box().height() > edges_min_nonhole)) {
+            child_area = child->outer_area();
+            if (child_area < 0)
+              child_area = -child_area;
+            if (edges_children_fix) {
+              if (parent_area - child_area < max_parent_area) {
+                parent_box = false;
+                continue;
+              }
+              if (grandchild_count > 0) {
+                if (edges_debug)
+                  tprintf("Discarding parent of area %d, child area=%d, max%g "
+                          "with gc=%d\n",
+                          parent_area, child_area, max_parent_area,
+                          grandchild_count);
+                return max_count + 1;
+              }
+              child_length = child->pathlength();
+              if (child_length * child_length >
+                  child_area * edges_patharea_ratio) {
+                if (edges_debug)
+                  tprintf("Discarding parent of area %d, child area=%d, max%g "
+                          "with child length=%d\n",
+                          parent_area, child_area, max_parent_area,
+                          child_length);
+                return max_count + 1;
+              }
+            }
+            if (child_area < child->bounding_box().area() * edges_childarea) {
+              if (edges_debug)
+                tprintf("Discarding parent of area %d, child area=%d, max%g "
+                        "with child rect=%d\n",
+                        parent_area, child_area, max_parent_area,
+                        child->bounding_box().area());
+              return max_count + 1;
+            }
+          }
+        }
+      }
+    }
+  }
+  return child_count + grandchild_count;
+}
+
+
+
+
+/**
+ * @name OL_BUCKETS::extract_children
+ *
+ * Find number of descendants of this outline.
+ */
+
+void OL_BUCKETS::extract_children(                     // recursive count
+                                  C_OUTLINE *outline,  // parent outline
+                                  C_OUTLINE_IT *it     // destination iterator
+                                 ) {
+  int16_t xmin, xmax;              // coord limits
+  int16_t ymin, ymax;
+  int16_t xindex, yindex;          // current bucket
+  TBOX olbox;
+  C_OUTLINE_IT child_it;         // search iterator
+
+  olbox = outline->bounding_box();
+  xmin =(olbox.left() - bl.x()) / BUCKETSIZE;
+  xmax =(olbox.right() - bl.x()) / BUCKETSIZE;
+  ymin =(olbox.bottom() - bl.y()) / BUCKETSIZE;
+  ymax =(olbox.top() - bl.y()) / BUCKETSIZE;
+  for (yindex = ymin; yindex <= ymax; yindex++) {
+    for (xindex = xmin; xindex <= xmax; xindex++) {
+      child_it.set_to_list(&buckets[yindex * bxdim + xindex]);
+      for (child_it.mark_cycle_pt(); !child_it.cycled_list();
+           child_it.forward()) {
+        if (*child_it.data() < *outline) {
+          it->add_after_then_move(child_it.extract());
+        }
+      }
+    }
+  }
+}
+
+
+/**
+ * @name extract_edges
+ *
+ * Run the edge detector over the block and return a list of blobs.
+ */
+
+void extract_edges(Pix* pix,  // thresholded image
+                   BLOCK *block) {  // block to scan
+  C_OUTLINE_LIST outlines;       // outlines in block
+  C_OUTLINE_IT out_it = &outlines;
+
+  block_edges(pix, &(block->pdblk), &out_it);
+  ICOORD bleft;                  // block box
+  ICOORD tright;
+  block->pdblk.bounding_box(bleft, tright);
+                                 // make blobs
+  outlines_to_blobs(block, bleft, tright, &outlines);
+}
+
+
+/**
+ * @name outlines_to_blobs
+ *
+ * Gather together outlines into blobs using the usual bucket sort.
+ */
+
+void outlines_to_blobs(               // find blobs
+                       BLOCK *block,  // block to scan
+                       ICOORD bleft,
+                       ICOORD tright,
+                       C_OUTLINE_LIST *outlines) {
+                                 // make buckets
+  OL_BUCKETS buckets(bleft, tright);
+
+  fill_buckets(outlines, &buckets);
+  empty_buckets(block, &buckets);
+}
+
+
+/**
+ * @name fill_buckets
+ *
+ * Run the edge detector over the block and return a list of blobs.
+ */
+
+void fill_buckets(                           // find blobs
+                  C_OUTLINE_LIST *outlines,  // outlines in block
+                  OL_BUCKETS *buckets        // output buckets
+                 ) {
+  TBOX ol_box;                     // outline box
+  C_OUTLINE_IT out_it = outlines;  // iterator
+  C_OUTLINE_IT bucket_it;          // iterator in bucket
+  C_OUTLINE *outline;              // current outline
+
+  for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
+    outline = out_it.extract();  // take off list
+                                 // get box
+    ol_box = outline->bounding_box();
+    bucket_it.set_to_list((*buckets) (ol_box.left(), ol_box.bottom()));
+    bucket_it.add_to_end(outline);
+  }
+}
+
+
+/**
+ * @name empty_buckets
+ *
+ * Run the edge detector over the block and return a list of blobs.
+ */
+
+void empty_buckets(                     // find blobs
+                   BLOCK *block,        // block to scan
+                   OL_BUCKETS *buckets  // output buckets
+                  ) {
+  bool good_blob;               // healthy blob
+  C_OUTLINE_LIST outlines;       // outlines in block
+                                 // iterator
+  C_OUTLINE_IT out_it = &outlines;
+  C_OUTLINE_IT bucket_it = buckets->start_scan();
+  C_OUTLINE_IT parent_it;        // parent outline
+  C_BLOB_IT good_blobs = block->blob_list();
+  C_BLOB_IT junk_blobs = block->reject_blobs();
+
+  while (!bucket_it.empty()) {
+    out_it.set_to_list(&outlines);
+    do {
+      parent_it = bucket_it;     // find outermost
+      do {
+        bucket_it.forward();
+      } while (!bucket_it.at_first() &&
+               !(*parent_it.data() < *bucket_it.data()));
+    } while (!bucket_it.at_first());
+
+                                 // move to new list
+    out_it.add_after_then_move(parent_it.extract());
+    good_blob = capture_children(buckets, &junk_blobs, &out_it);
+    C_BLOB::ConstructBlobsFromOutlines(good_blob, &outlines, &good_blobs,
+                                       &junk_blobs);
+
+    bucket_it.set_to_list(buckets->scan_next());
+  }
+}
+
+
+/**
+ * @name capture_children
+ *
+ * Find all neighbouring outlines that are children of this outline
+ * and either move them to the output list or declare this outline
+ * illegal and return false.
+ */
+
+bool capture_children(                       // find children
+        OL_BUCKETS* buckets,   // bucket sort clanss
+        C_BLOB_IT* reject_it,  // dead grandchildren
+        C_OUTLINE_IT* blob_it  // output outlines
+) {
+  C_OUTLINE *outline;            // master outline
+  int32_t child_count;             // no of children
+
+  outline = blob_it->data();
+  if (edges_use_new_outline_complexity)
+    child_count = buckets->outline_complexity(outline,
+                                               edges_children_count_limit,
+                                               0);
+  else
+    child_count = buckets->count_children(outline,
+                                           edges_children_count_limit);
+  if (child_count > edges_children_count_limit)
+    return false;
+
+  if (child_count > 0)
+    buckets->extract_children(outline, blob_it);
+  return true;
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/textord/edgblob.h b/tesseract/src/textord/edgblob.h
new file mode 100644
index 00000000..a3b7ac1b
--- /dev/null
+++ b/tesseract/src/textord/edgblob.h
@@ -0,0 +1,100 @@
+/**********************************************************************
+ * File:        edgblob.h  (Formerly edgeloop.h)
+ * Description: Functions to clean up an outline before approximation.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef           EDGBLOB_H
+#define           EDGBLOB_H
+
+#include          "scrollview.h"
+#include          "params.h"
+#include          "ocrblock.h"
+#include          "coutln.h"
+#include          "crakedge.h"
+
+#include <memory>
+
+namespace tesseract {
+
+#define BUCKETSIZE      16
+
+class OL_BUCKETS
+{
+  public:
+    OL_BUCKETS(               //constructor
+               ICOORD bleft,  //corners
+               ICOORD tright);
+
+    ~OL_BUCKETS () = default;
+
+    C_OUTLINE_LIST *operator () (//array access
+      int16_t x,                   //image coords
+      int16_t y);
+                                 //first non-empty bucket
+    C_OUTLINE_LIST *start_scan() {
+      for (index = 0; buckets[index].empty () && index < bxdim * bydim - 1;
+        index++);
+      return &buckets[index];
+    }
+                                 //next non-empty bucket
+    C_OUTLINE_LIST *scan_next() {
+      for (; buckets[index].empty () && index < bxdim * bydim - 1; index++);
+      return &buckets[index];
+    }
+    int32_t count_children(                     //recursive sum
+                         C_OUTLINE *outline,  //parent outline
+                         int32_t max_count);    // max output
+    int32_t outline_complexity(                 // new version of count_children
+                         C_OUTLINE *outline,  // parent outline
+                         int32_t max_count,     // max output
+                         int16_t depth);        // level of recursion
+    void extract_children(                     //single level get
+                          C_OUTLINE *outline,  //parent outline
+                          C_OUTLINE_IT *it);   //destination iterator
+
+  private:
+    std::unique_ptr<C_OUTLINE_LIST[]> buckets;    //array of buckets
+    int16_t bxdim;                 //size of array
+    int16_t bydim;
+    ICOORD bl;                   //corners
+    ICOORD tr;
+    int32_t index;                 //for extraction scan
+};
+
+void extract_edges(Pix* pix,        // thresholded image
+                   BLOCK* block);   // block to scan
+void outlines_to_blobs(               //find blobs
+                       BLOCK *block,  //block to scan
+                       ICOORD bleft,  //block box //outlines in block
+                       ICOORD tright,
+                       C_OUTLINE_LIST *outlines);
+void fill_buckets(                           //find blobs
+                  C_OUTLINE_LIST *outlines,  //outlines in block
+                  OL_BUCKETS *buckets        //output buckets
+                 );
+void empty_buckets(                     //find blobs
+                   BLOCK *block,        //block to scan
+                   OL_BUCKETS *buckets  //output buckets
+                  );
+bool capture_children(                       //find children
+        OL_BUCKETS* buckets,   //bucket sort clanss
+        C_BLOB_IT* reject_it,  //dead grandchildren
+        C_OUTLINE_IT* blob_it  //output outlines
+);
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/textord/edgloop.cpp b/tesseract/src/textord/edgloop.cpp
new file mode 100644
index 00000000..33cf3a02
--- /dev/null
+++ b/tesseract/src/textord/edgloop.cpp
@@ -0,0 +1,162 @@
+/**********************************************************************
+ * File:        edgloop.cpp  (Formerly edgeloop.c)
+ * Description: Functions to clean up an outline before approximation.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+ // Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "scanedg.h"
+
+#include "edgloop.h"
+
+namespace tesseract {
+
+#define MINEDGELENGTH   8        // min decent length
+
+/**********************************************************************
+ * complete_edge
+ *
+ * Complete the edge by cleaning it up.
+ **********************************************************************/
+
+void complete_edge(CRACKEDGE *start,  //start of loop
+                   C_OUTLINE_IT* outline_it) {
+  ScrollView::Color colour;                 //colour to draw in
+  int16_t looplength;              //steps in loop
+  ICOORD botleft;                //bounding box
+  ICOORD topright;
+  C_OUTLINE *outline;            //new outline
+
+                                 //check length etc.
+  colour = check_path_legal (start);
+
+  if (colour == ScrollView::RED || colour == ScrollView::BLUE) {
+    looplength = loop_bounding_box (start, botleft, topright);
+    outline = new C_OUTLINE (start, botleft, topright, looplength);
+                                 //add to list
+    outline_it->add_after_then_move (outline);
+  }
+}
+
+
+/**********************************************************************
+ * check_path_legal
+ *
+ * Check that the outline is legal for length and for chaincode sum.
+ * The return value is RED for a normal black-inside outline,
+ * BLUE for a white-inside outline, MAGENTA if it is too short,
+ * YELLOW if it is too long, and GREEN if it is illegal.
+ * These colours are used to draw the raw outline.
+ **********************************************************************/
+
+ScrollView::Color check_path_legal(                  //certify outline
+                        CRACKEDGE *start  //start of loop
+                       ) {
+  int lastchain;              //last chain code
+  int chaindiff;               //chain code diff
+  int32_t length;                  //length of loop
+  int32_t chainsum;                //sum of chain diffs
+  CRACKEDGE *edgept;             //current point
+  constexpr ERRCODE ED_ILLEGAL_SUM("Illegal sum of chain codes");
+
+  length = 0;
+  chainsum = 0;                  //sum of chain codes
+  edgept = start;
+  lastchain = edgept->prev->stepdir; //previous chain code
+  do {
+    length++;
+    if (edgept->stepdir != lastchain) {
+                                 //chain code difference
+      chaindiff = edgept->stepdir - lastchain;
+      if (chaindiff > 2)
+        chaindiff -= 4;
+      else if (chaindiff < -2)
+        chaindiff += 4;
+      chainsum += chaindiff;     //sum differences
+      lastchain = edgept->stepdir;
+    }
+    edgept = edgept->next;
+  }
+  while (edgept != start && length < C_OUTLINE::kMaxOutlineLength);
+
+  if ((chainsum != 4 && chainsum != -4)
+  || edgept != start || length < MINEDGELENGTH) {
+    if (edgept != start) {
+     return ScrollView::YELLOW;
+    } else if (length < MINEDGELENGTH) {
+     return ScrollView::MAGENTA;
+    } else {
+      ED_ILLEGAL_SUM.error ("check_path_legal", TESSLOG, "chainsum=%d",
+        chainsum);
+      return ScrollView::GREEN;
+    }
+  }
+                                 //colour on inside
+  return chainsum < 0 ? ScrollView::BLUE : ScrollView::RED;
+}
+
+/**********************************************************************
+ * loop_bounding_box
+ *
+ * Find the bounding box of the edge loop.
+ **********************************************************************/
+
+int16_t loop_bounding_box(                    //get bounding box
+                        CRACKEDGE *&start,  //edge loop
+                        ICOORD &botleft,    //bounding box
+                        ICOORD &topright) {
+  int16_t length;                  //length of loop
+  int16_t leftmost;                //on top row
+  CRACKEDGE *edgept;             //current point
+  CRACKEDGE *realstart;          //topleft start
+
+  edgept = start;
+  realstart = start;
+  botleft = topright = ICOORD (edgept->pos.x (), edgept->pos.y ());
+  leftmost = edgept->pos.x ();
+  length = 0;                    //coutn length
+  do {
+    edgept = edgept->next;
+    if (edgept->pos.x () < botleft.x ())
+                                 //get bounding box
+      botleft.set_x (edgept->pos.x ());
+    else if (edgept->pos.x () > topright.x ())
+      topright.set_x (edgept->pos.x ());
+    if (edgept->pos.y () < botleft.y ())
+                                 //get bounding box
+      botleft.set_y (edgept->pos.y ());
+    else if (edgept->pos.y () > topright.y ()) {
+      realstart = edgept;
+      leftmost = edgept->pos.x ();
+      topright.set_y (edgept->pos.y ());
+    }
+    else if (edgept->pos.y () == topright.y ()
+    && edgept->pos.x () < leftmost) {
+                                 //leftmost on line
+      leftmost = edgept->pos.x ();
+      realstart = edgept;
+    }
+    length++;                    //count elements
+  }
+  while (edgept != start);
+  start = realstart;             //shift it to topleft
+  return length;
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/textord/edgloop.h b/tesseract/src/textord/edgloop.h
new file mode 100644
index 00000000..26cd2f21
--- /dev/null
+++ b/tesseract/src/textord/edgloop.h
@@ -0,0 +1,44 @@
+/**********************************************************************
+ * File:        edgloop.h  (Formerly edgeloop.h)
+ * Description: Functions to clean up an outline before approximation.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef           EDGLOOP_H
+#define           EDGLOOP_H
+
+#include          "scrollview.h"
+#include          "params.h"
+#include          "pdblock.h"
+#include          "coutln.h"
+#include          "crakedge.h"
+
+namespace tesseract {
+
+#define BUCKETSIZE      16
+
+void complete_edge(CRACKEDGE *start,  //start of loop
+                   C_OUTLINE_IT* outline_it);
+ScrollView::Color check_path_legal(                  //certify outline
+                        CRACKEDGE *start  //start of loop
+                       );
+int16_t loop_bounding_box(                    //get bounding box
+                        CRACKEDGE *&start,  //edge loop
+                        ICOORD &botleft,    //bounding box
+                        ICOORD &topright);
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/textord/equationdetectbase.cpp b/tesseract/src/textord/equationdetectbase.cpp
new file mode 100644
index 00000000..1d40ed8e
--- /dev/null
+++ b/tesseract/src/textord/equationdetectbase.cpp
@@ -0,0 +1,64 @@
+///////////////////////////////////////////////////////////////////////
+// File:        equationdetectbase.cpp
+// Description: The base class equation detection class.
+// Author:      Zongyi (Joe) Liu (joeliu@google.com)
+// Created:     Fri Aug 31 11:13:01 PST 2011
+//
+// (C) Copyright 2011, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "allheaders.h"
+#include "blobbox.h"
+#include "equationdetectbase.h"
+
+namespace tesseract {
+
+// Destructor.
+// It is defined here, so the compiler can create a single vtable
+// instead of weak vtables in every compilation unit.
+EquationDetectBase::~EquationDetectBase() = default;
+
+void EquationDetectBase::RenderSpecialText(Pix* pix,
+                                           BLOBNBOX* blob) {
+  ASSERT_HOST(pix != nullptr && pixGetDepth(pix) == 32 && blob != nullptr);
+  const TBOX& tbox = blob->bounding_box();
+  int height = pixGetHeight(pix);
+  const int box_width = 5;
+
+  // Coordinate translation: tesseract use left bottom as the original, while
+  // leptonica uses left top as the original.
+  Box *box = boxCreate(tbox.left(), height - tbox.top(),
+                         tbox.width(), tbox.height());
+  switch (blob->special_text_type()) {
+    case BSTT_MATH:  // Red box.
+      pixRenderBoxArb(pix, box, box_width, 255, 0, 0);
+      break;
+    case BSTT_DIGIT:  // cyan box.
+      pixRenderBoxArb(pix, box, box_width, 0, 255, 255);
+      break;
+    case BSTT_ITALIC:  // Green box.
+      pixRenderBoxArb(pix, box, box_width, 0, 255, 0);
+      break;
+    case BSTT_UNCLEAR:  // blue box.
+      pixRenderBoxArb(pix, box, box_width, 0, 255, 0);
+      break;
+    case BSTT_NONE:
+    default:
+      // yellow box.
+      pixRenderBoxArb(pix, box, box_width, 255, 255, 0);
+      break;
+  }
+  boxDestroy(&box);
+}
+
+}  // namespace tesseract
diff --git a/tesseract/src/textord/equationdetectbase.h b/tesseract/src/textord/equationdetectbase.h
new file mode 100644
index 00000000..7f84bd09
--- /dev/null
+++ b/tesseract/src/textord/equationdetectbase.h
@@ -0,0 +1,59 @@
+///////////////////////////////////////////////////////////////////////
+// File:        equationdetectbase.h
+// Description: The base class equation detection class.
+// Author:      Zongyi (Joe) Liu (joeliu@google.com)
+// Created:     Fri Aug 31 11:13:01 PST 2011
+//
+// (C) Copyright 2011, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TEXTORD_EQUATIONDETECTBASE_H_
+#define TESSERACT_TEXTORD_EQUATIONDETECTBASE_H_
+
+class BLOBNBOX_LIST;
+class TO_BLOCK;
+struct Pix;
+
+namespace tesseract {
+
+class ColPartitionGrid;
+class ColPartitionSet;
+
+class TESS_API EquationDetectBase {
+ public:
+  EquationDetectBase() = default;
+  virtual ~EquationDetectBase();
+
+  // Iterate over the blobs inside to_block, and set the blobs that we want to
+  // process to BSTT_NONE. (By default, they should be BSTT_SKIP). The function
+  // returns 0 upon success.
+  virtual int LabelSpecialText(TO_BLOCK* to_block) = 0;
+
+  // Interface to find possible equation partition grid from part_grid. This
+  // should be called after IdentifySpecialText function.
+  virtual int FindEquationParts(ColPartitionGrid* part_grid,
+                                ColPartitionSet** best_columns) = 0;
+
+  // Debug function: Render a bounding box on pix based on the value of its
+  // special_text_type, specifically:
+  // BSTT_MATH: red box
+  // BSTT_DIGIT: cyan box
+  // BSTT_ITALIC: green box
+  // BSTT_UNCLEAR: blue box
+  // All others: yellow box
+  static void RenderSpecialText(Pix* pix, BLOBNBOX* blob);
+};
+
+}  // namespace tesseract
+
+#endif  // TESSERACT_TEXTORD_EQUATIONDETECTBASE_H_
diff --git a/tesseract/src/textord/fpchop.cpp b/tesseract/src/textord/fpchop.cpp
new file mode 100644
index 00000000..91444a4d
--- /dev/null
+++ b/tesseract/src/textord/fpchop.cpp
@@ -0,0 +1,890 @@
+/**********************************************************************
+ * File:        fpchop.cpp  (Formerly fp_chop.c)
+ * Description: Code to chop fixed pitch text into character cells.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1993, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+ // Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "fpchop.h"
+
+#include "blobbox.h"
+#include "statistc.h"
+#include "drawtord.h"
+#include "tovars.h"
+#include "topitch.h"
+
+namespace tesseract {
+
+INT_VAR (textord_fp_chop_error, 2,
+"Max allowed bending of chop cells");
+double_VAR (textord_fp_chop_snap, 0.5,
+"Max distance of chop pt from vertex");
+
+ELISTIZE(C_OUTLINE_FRAG)
+
+static WERD* add_repeated_word(
+  WERD_IT* rep_it,
+  int16_t& rep_left,
+  int16_t& prev_chop_coord,
+  uint8_t& blanks,
+  float pitch,
+  WERD_IT* word_it
+);
+
+static void fixed_chop_cblob(
+  C_BLOB* blob,
+  int16_t chop_coord,
+  float pitch_error,
+  C_OUTLINE_LIST* left_outlines,
+  C_OUTLINE_LIST* right_outlines
+);
+
+static void fixed_split_coutline(
+  C_OUTLINE* srcline,
+  int16_t chop_coord,
+  float pitch_error,
+  C_OUTLINE_IT* left_it,
+  C_OUTLINE_IT* right_it
+);
+
+static bool fixed_chop_coutline(
+        C_OUTLINE* srcline,
+        int16_t chop_coord,
+        float pitch_error,
+        C_OUTLINE_FRAG_LIST* left_frags,
+        C_OUTLINE_FRAG_LIST* right_frags
+);
+
+static void save_chop_cfragment(
+  int16_t head_index,
+  ICOORD head_pos,
+  int16_t tail_index,
+  ICOORD tail_pos,
+  C_OUTLINE* srcline,
+  C_OUTLINE_FRAG_LIST* frags
+);
+
+static void add_frag_to_list(
+  C_OUTLINE_FRAG* frag,
+  C_OUTLINE_FRAG_LIST* frags
+);
+
+static void close_chopped_cfragments(
+  C_OUTLINE_FRAG_LIST* frags,
+  C_OUTLINE_LIST* children,
+  float pitch_error,
+  C_OUTLINE_IT* dest_it
+);
+
+static C_OUTLINE* join_chopped_fragments(
+  C_OUTLINE_FRAG* bottom,
+  C_OUTLINE_FRAG* top
+);
+
+static void join_segments(
+  C_OUTLINE_FRAG* bottom,
+  C_OUTLINE_FRAG* top
+);
+
+/**********************************************************************
+ * fixed_pitch_words
+ *
+ * Make a ROW from a fixed pitch TO_ROW.
+ **********************************************************************/
+ROW *fixed_pitch_words(                 //find lines
+                       TO_ROW *row,     //row to do
+                       FCOORD rotation  //for drawing
+                      ) {
+  bool bol;                     //start of line
+  uint8_t blanks;                  //in front of word
+  uint8_t new_blanks;              //blanks in empty cell
+  int16_t chop_coord;              //chop boundary
+  int16_t prev_chop_coord;         //start of cell
+  int16_t rep_left;                //left edge of rep word
+  ROW *real_row;                 //output row
+  C_OUTLINE_LIST left_coutlines;
+  C_OUTLINE_LIST right_coutlines;
+  C_BLOB_LIST cblobs;
+  C_BLOB_IT cblob_it = &cblobs;
+  WERD_LIST words;
+  WERD_IT word_it = &words;      //new words
+                                 //repeated blobs
+  WERD_IT rep_it = &row->rep_words;
+  WERD *word;                    //new word
+  int32_t xstarts[2];              //row ends
+  int32_t prev_x;                  //end of prev blob
+                                 //iterator
+  BLOBNBOX_IT box_it = row->blob_list ();
+                                 //boundaries
+  ICOORDELT_IT cell_it = &row->char_cells;
+
+#ifndef GRAPHICS_DISABLED
+  if (textord_show_page_cuts && to_win != nullptr) {
+    plot_row_cells (to_win, ScrollView::RED, row, 0, &row->char_cells);
+  }
+#endif
+
+  prev_x = -INT16_MAX;
+  bol = true;
+  blanks = 0;
+  if (rep_it.empty ())
+    rep_left = INT16_MAX;
+  else
+    rep_left = rep_it.data ()->bounding_box ().left ();
+  if (box_it.empty ())
+    return nullptr;                 //empty row
+  xstarts[0] = box_it.data ()->bounding_box ().left ();
+  if (rep_left < xstarts[0]) {
+    xstarts[0] = rep_left;
+  }
+  if (cell_it.empty () || row->char_cells.singleton ()) {
+    tprintf ("Row without enough char cells!\n");
+    tprintf ("Leftmost blob is at (%d,%d)\n",
+      box_it.data ()->bounding_box ().left (),
+      box_it.data ()->bounding_box ().bottom ());
+    return nullptr;
+  }
+  ASSERT_HOST (!cell_it.empty () && !row->char_cells.singleton ());
+  prev_chop_coord = cell_it.data ()->x ();
+  word = nullptr;
+  while (rep_left < cell_it.data ()->x ()) {
+    word = add_repeated_word (&rep_it, rep_left, prev_chop_coord,
+      blanks, row->fixed_pitch, &word_it);
+  }
+  cell_it.mark_cycle_pt ();
+  if (prev_chop_coord >= cell_it.data ()->x ())
+    cell_it.forward ();
+  for (; !cell_it.cycled_list (); cell_it.forward ()) {
+    chop_coord = cell_it.data ()->x ();
+    while (!box_it.empty ()
+    && box_it.data ()->bounding_box ().left () <= chop_coord) {
+      if (box_it.data ()->bounding_box ().right () > prev_x)
+        prev_x = box_it.data ()->bounding_box ().right ();
+      split_to_blob (box_it.extract (), chop_coord,
+        textord_fp_chop_error + 0.5f,
+        &left_coutlines,
+        &right_coutlines);
+      box_it.forward ();
+      while (!box_it.empty() && box_it.data()->cblob() == nullptr) {
+        delete box_it.extract();
+        box_it.forward();
+      }
+    }
+    if (!right_coutlines.empty() && left_coutlines.empty())
+      split_to_blob (nullptr, chop_coord,
+        textord_fp_chop_error + 0.5f,
+        &left_coutlines,
+        &right_coutlines);
+    if (!left_coutlines.empty()) {
+      cblob_it.add_after_then_move(new C_BLOB(&left_coutlines));
+    } else {
+      if (rep_left < chop_coord) {
+        if (rep_left > prev_chop_coord)
+          new_blanks = static_cast<uint8_t>(floor ((rep_left - prev_chop_coord)
+            / row->fixed_pitch + 0.5));
+        else
+          new_blanks = 0;
+      }
+      else {
+        if (chop_coord > prev_chop_coord)
+          new_blanks = static_cast<uint8_t>(floor ((chop_coord - prev_chop_coord)
+            / row->fixed_pitch + 0.5));
+        else
+          new_blanks = 0;
+      }
+      if (!cblob_it.empty()) {
+        if (blanks < 1 && word != nullptr && !word->flag (W_REP_CHAR))
+          blanks = 1;
+        word = new WERD (&cblobs, blanks, nullptr);
+        cblob_it.set_to_list (&cblobs);
+        word->set_flag (W_DONT_CHOP, true);
+        word_it.add_after_then_move (word);
+        if (bol) {
+          word->set_flag (W_BOL, true);
+          bol = false;
+        }
+        blanks = new_blanks;
+      }
+      else
+        blanks += new_blanks;
+      while (rep_left < chop_coord) {
+        word = add_repeated_word (&rep_it, rep_left, prev_chop_coord,
+          blanks, row->fixed_pitch, &word_it);
+      }
+    }
+    if (prev_chop_coord < chop_coord)
+      prev_chop_coord = chop_coord;
+  }
+  if (!cblob_it.empty()) {
+    word = new WERD(&cblobs, blanks, nullptr);
+    word->set_flag (W_DONT_CHOP, true);
+    word_it.add_after_then_move (word);
+    if (bol)
+      word->set_flag (W_BOL, true);
+  }
+  ASSERT_HOST (word != nullptr);
+  while (!rep_it.empty ()) {
+    add_repeated_word (&rep_it, rep_left, prev_chop_coord,
+      blanks, row->fixed_pitch, &word_it);
+  }
+                                 //at end of line
+  word_it.data ()->set_flag (W_EOL, true);
+  if (prev_chop_coord > prev_x)
+    prev_x = prev_chop_coord;
+  xstarts[1] = prev_x + 1;
+  real_row = new ROW (row, static_cast<int16_t>(row->kern_size), static_cast<int16_t>(row->space_size));
+  word_it.set_to_list (real_row->word_list ());
+                                 //put words in row
+  word_it.add_list_after (&words);
+  real_row->recalc_bounding_box ();
+  return real_row;
+}
+
+
+/**********************************************************************
+ * add_repeated_word
+ *
+ * Add repeated word into the row at the given point.
+ **********************************************************************/
+
+static
+WERD *add_repeated_word(                         //move repeated word
+                        WERD_IT *rep_it,         //repeated words
+                        int16_t &rep_left,         //left edge of word
+                        int16_t &prev_chop_coord,  //previous word end
+                        uint8_t &blanks,           //no of blanks
+                        float pitch,             //char cell size
+                        WERD_IT *word_it         //list of words
+                       ) {
+  WERD *word;                    //word to move
+  int16_t new_blanks;              //extra blanks
+
+  if (rep_left > prev_chop_coord) {
+    new_blanks = static_cast<uint8_t>(floor ((rep_left - prev_chop_coord) / pitch + 0.5));
+    blanks += new_blanks;
+  }
+  word = rep_it->extract ();
+  prev_chop_coord = word->bounding_box ().right ();
+  word_it->add_after_then_move (word);
+  word->set_blanks (blanks);
+  rep_it->forward ();
+  if (rep_it->empty ())
+    rep_left = INT16_MAX;
+  else
+    rep_left = rep_it->data ()->bounding_box ().left ();
+  blanks = 0;
+  return word;
+}
+
+
+/**********************************************************************
+ * split_to_blob
+ *
+ * Split a BLOBNBOX across a vertical chop line and put the pieces
+ * into a left outline list and a right outline list.
+ **********************************************************************/
+
+void split_to_blob(                                 //split the blob
+                   BLOBNBOX *blob,                  //blob to split
+                   int16_t chop_coord,                //place to chop
+                   float pitch_error,               //allowed deviation
+                   C_OUTLINE_LIST *left_coutlines,  //for cblobs
+                   C_OUTLINE_LIST *right_coutlines) {
+  C_BLOB *real_cblob;            //cblob to chop
+
+  if (blob != nullptr) {
+    real_cblob = blob->cblob();
+  } else {
+    real_cblob = nullptr;
+  }
+  if (!right_coutlines->empty() || real_cblob != nullptr)
+    fixed_chop_cblob(real_cblob,
+                     chop_coord,
+                     pitch_error,
+                     left_coutlines,
+                     right_coutlines);
+
+  delete blob;
+}
+
+/**********************************************************************
+ * fixed_chop_cblob
+ *
+ * Chop the given cblob (if any) and the existing right outlines to
+ * produce a list of outlines left of the chop point and more to the right.
+ **********************************************************************/
+
+static
+void fixed_chop_cblob(                                //split the blob
+                      C_BLOB *blob,                   //blob to split
+                      int16_t chop_coord,               //place to chop
+                      float pitch_error,              //allowed deviation
+                      C_OUTLINE_LIST *left_outlines,  //left half of chop
+                      C_OUTLINE_LIST *right_outlines  //right half of chop
+                     ) {
+  C_OUTLINE *old_right;          //already there
+  C_OUTLINE_LIST new_outlines;   //new right ones
+                                 //output iterator
+  C_OUTLINE_IT left_it = left_outlines;
+                                 //in/out iterator
+  C_OUTLINE_IT right_it = right_outlines;
+  C_OUTLINE_IT new_it = &new_outlines;
+  C_OUTLINE_IT blob_it;          //outlines in blob
+
+  if (!right_it.empty ()) {
+    while (!right_it.empty ()) {
+      old_right = right_it.extract ();
+      right_it.forward ();
+      fixed_split_coutline(old_right,
+                           chop_coord,
+                           pitch_error,
+                           &left_it,
+                           &new_it);
+    }
+    right_it.add_list_before (&new_outlines);
+  }
+  if (blob != nullptr) {
+    blob_it.set_to_list (blob->out_list ());
+    for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
+      blob_it.forward ())
+    fixed_split_coutline (blob_it.extract (), chop_coord, pitch_error,
+        &left_it, &right_it);
+    delete blob;
+  }
+}
+
+
+/**********************************************************************
+ * fixed_split_outline
+ *
+ * Chop the given outline (if necessary) placing the fragments which
+ * fall either side of the chop line into the appropriate list.
+ **********************************************************************/
+
+static
+void fixed_split_coutline(                        //chop the outline
+                          C_OUTLINE *srcline,     //source outline
+                          int16_t chop_coord,       //place to chop
+                          float pitch_error,      //allowed deviation
+                          C_OUTLINE_IT *left_it,  //left half of chop
+                          C_OUTLINE_IT *right_it  //right half of chop
+                         ) {
+  C_OUTLINE *child;              //child outline
+  TBOX srcbox;                    //box of outline
+  C_OUTLINE_LIST left_ch;        //left children
+  C_OUTLINE_LIST right_ch;       //right children
+  C_OUTLINE_FRAG_LIST left_frags;//chopped fragments
+  C_OUTLINE_FRAG_LIST right_frags;;
+  C_OUTLINE_IT left_ch_it = &left_ch;
+                                 //for whole children
+  C_OUTLINE_IT right_ch_it = &right_ch;
+                                 //for holes
+  C_OUTLINE_IT child_it = srcline->child ();
+
+  srcbox = srcline->bounding_box();
+  if (srcbox.left() + srcbox.right() <= chop_coord * 2
+      && srcbox.right() < chop_coord + pitch_error) {
+    // Whole outline is in the left side or not far over the chop_coord,
+    // so put the whole thing on the left.
+    left_it->add_after_then_move(srcline);
+  } else if (srcbox.left() + srcbox.right() > chop_coord * 2
+             && srcbox.left () > chop_coord - pitch_error) {
+    // Whole outline is in the right side or not far over the chop_coord,
+    // so put the whole thing on the right.
+   right_it->add_before_stay_put(srcline);
+  } else {
+    // Needs real chopping.
+    if (fixed_chop_coutline(srcline, chop_coord, pitch_error,
+        &left_frags, &right_frags)) {
+      for (child_it.mark_cycle_pt(); !child_it.cycled_list();
+           child_it.forward()) {
+        child = child_it.extract();
+        srcbox = child->bounding_box();
+        if (srcbox.right() < chop_coord) {
+          // Whole child is on the left.
+          left_ch_it.add_after_then_move(child);
+        } else if (srcbox.left() > chop_coord) {
+          // Whole child is on the right.
+          right_ch_it.add_after_then_move (child);
+        } else {
+          // No pitch_error is allowed when chopping children to prevent
+          // impossible outlines from being created.
+          if (fixed_chop_coutline(child, chop_coord, 0.0f,
+              &left_frags, &right_frags)) {
+            delete child;
+          } else {
+            if (srcbox.left() + srcbox.right() <= chop_coord * 2)
+              left_ch_it.add_after_then_move(child);
+            else
+              right_ch_it.add_after_then_move(child);
+          }
+        }
+      }
+      close_chopped_cfragments(&left_frags, &left_ch, pitch_error, left_it);
+      close_chopped_cfragments(&right_frags, &right_ch, pitch_error, right_it);
+      ASSERT_HOST(left_ch.empty() && right_ch.empty());
+      // No children left.
+      delete srcline;            // Smashed up.
+    } else {
+      // Chop failed. Just use middle coord.
+      if (srcbox.left() + srcbox.right() <= chop_coord * 2)
+        left_it->add_after_then_move(srcline);  // Stick whole in left.
+      else
+        right_it->add_before_stay_put(srcline);
+    }
+  }
+}
+
+
+/**********************************************************************
+ * fixed_chop_coutline
+ *
+ * Chop the given coutline (if necessary) placing the fragments which
+ * fall either side of the chop line into the appropriate list.
+ * If the coutline lies too heavily to one side to chop, false is returned.
+ **********************************************************************/
+
+static
+bool fixed_chop_coutline(                                  //chop the outline
+        C_OUTLINE* srcline,               //source outline
+        int16_t chop_coord,                 //place to chop
+        float pitch_error,                //allowed deviation
+        C_OUTLINE_FRAG_LIST* left_frags,  //left half of chop
+        C_OUTLINE_FRAG_LIST* right_frags  //right half of chop
+) {
+  bool first_frag;              //fragment
+  int16_t left_edge;               //of outline
+  int16_t startindex;              //in first fragment
+  int32_t length;                  //of outline
+  int16_t stepindex;               //into outline
+  int16_t head_index;              //start of fragment
+  ICOORD head_pos;               //start of fragment
+  int16_t tail_index;              //end of fragment
+  ICOORD tail_pos;               //end of fragment
+  ICOORD pos;                    //current point
+  int16_t first_index = 0;         //first tail
+  ICOORD first_pos;              //first tail
+
+  length = srcline->pathlength ();
+  pos = srcline->start_pos ();
+  left_edge = pos.x ();
+  tail_index = 0;
+  tail_pos = pos;
+  for (stepindex = 0; stepindex < length; stepindex++) {
+    if (pos.x () < left_edge) {
+      left_edge = pos.x ();
+      tail_index = stepindex;
+      tail_pos = pos;
+    }
+    pos += srcline->step (stepindex);
+  }
+  if (left_edge >= chop_coord - pitch_error)
+    return false;                //not worth it
+
+  startindex = tail_index;
+  first_frag = true;
+  head_index = tail_index;
+  head_pos = tail_pos;
+  do {
+    do {
+      tail_pos += srcline->step (tail_index);
+      tail_index++;
+      if (tail_index == length)
+        tail_index = 0;
+    }
+    while (tail_pos.x () != chop_coord && tail_index != startindex);
+    if (tail_index == startindex) {
+      if (first_frag)
+        return false;            //doesn't cross line
+      else
+        break;
+    }
+    ASSERT_HOST (head_index != tail_index);
+    if (!first_frag) {
+      save_chop_cfragment(head_index,
+                          head_pos,
+                          tail_index,
+                          tail_pos,
+                          srcline,
+                          left_frags);
+    }
+    else {
+      first_index = tail_index;
+      first_pos = tail_pos;
+      first_frag = false;
+    }
+    while (srcline->step (tail_index).x () == 0) {
+      tail_pos += srcline->step (tail_index);
+      tail_index++;
+      if (tail_index == length)
+        tail_index = 0;
+    }
+    head_index = tail_index;
+    head_pos = tail_pos;
+    while (srcline->step (tail_index).x () > 0) {
+      do {
+        tail_pos += srcline->step (tail_index);
+        tail_index++;
+        if (tail_index == length)
+          tail_index = 0;
+      }
+      while (tail_pos.x () != chop_coord);
+      ASSERT_HOST (head_index != tail_index);
+      save_chop_cfragment(head_index,
+                          head_pos,
+                          tail_index,
+                          tail_pos,
+                          srcline,
+                          right_frags);
+      while (srcline->step (tail_index).x () == 0) {
+        tail_pos += srcline->step (tail_index);
+        tail_index++;
+        if (tail_index == length)
+          tail_index = 0;
+      }
+      head_index = tail_index;
+      head_pos = tail_pos;
+    }
+  }
+  while (tail_index != startindex);
+  save_chop_cfragment(head_index,
+                      head_pos,
+                      first_index,
+                      first_pos,
+                      srcline,
+                      left_frags);
+  return true;                   //did some chopping
+}
+
+/**********************************************************************
+ * save_chop_cfragment
+ *
+ * Store the given fragment in the given fragment list.
+ **********************************************************************/
+
+static
+void save_chop_cfragment(                            //chop the outline
+                         int16_t head_index,           //head of fragment
+                         ICOORD head_pos,            //head of fragment
+                         int16_t tail_index,           //tail of fragment
+                         ICOORD tail_pos,            //tail of fragment
+                         C_OUTLINE *srcline,         //source of edgesteps
+                         C_OUTLINE_FRAG_LIST *frags  //fragment list
+                        ) {
+  int16_t jump;                    //gap across end
+  int16_t stepcount;               //total steps
+  C_OUTLINE_FRAG *head;          //head of fragment
+  C_OUTLINE_FRAG *tail;          //tail of fragment
+  int16_t tail_y;                  //ycoord of tail
+
+  ASSERT_HOST (tail_pos.x () == head_pos.x ());
+  ASSERT_HOST (tail_index != head_index);
+  stepcount = tail_index - head_index;
+  if (stepcount < 0)
+    stepcount += srcline->pathlength ();
+  jump = tail_pos.y () - head_pos.y ();
+  if (jump < 0)
+    jump = -jump;
+  if (jump == stepcount)
+    return;                      //its a nop
+  tail_y = tail_pos.y ();
+  head = new C_OUTLINE_FRAG (head_pos, tail_pos, srcline,
+    head_index, tail_index);
+  tail = new C_OUTLINE_FRAG (head, tail_y);
+  head->other_end = tail;
+  add_frag_to_list(head, frags);
+  add_frag_to_list(tail, frags);
+}
+
+
+/**********************************************************************
+ * C_OUTLINE_FRAG::C_OUTLINE_FRAG
+ *
+ * Constructors for C_OUTLINE_FRAG.
+ **********************************************************************/
+
+C_OUTLINE_FRAG::C_OUTLINE_FRAG(                     //record fragment
+                               ICOORD start_pt,     //start coord
+                               ICOORD end_pt,       //end coord
+                               C_OUTLINE *outline,  //source of steps
+                               int16_t start_index,
+                               int16_t end_index) {
+  start = start_pt;
+  end = end_pt;
+  ycoord = start_pt.y ();
+  stepcount = end_index - start_index;
+  if (stepcount < 0)
+    stepcount += outline->pathlength ();
+  ASSERT_HOST (stepcount > 0);
+  steps = new DIR128[stepcount];
+  if (end_index > start_index) {
+    for (int i = start_index; i < end_index; ++i)
+      steps[i - start_index] = outline->step_dir(i);
+  }
+  else {
+    int len = outline->pathlength();
+    int i = start_index;
+    for (; i < len; ++i)
+      steps[i - start_index] = outline->step_dir(i);
+    if (end_index > 0)
+      for (; i < end_index + len; ++i)
+        steps[i - start_index] = outline->step_dir(i - len);
+  }
+  other_end = nullptr;
+  delete close();
+}
+
+
+C_OUTLINE_FRAG::C_OUTLINE_FRAG(                       //record fragment
+                               C_OUTLINE_FRAG *head,  //other end
+                               int16_t tail_y) {
+  ycoord = tail_y;
+  other_end = head;
+  start = head->start;
+  end = head->end;
+  steps = nullptr;
+  stepcount = 0;
+}
+
+
+/**********************************************************************
+ * add_frag_to_list
+ *
+ * Insert the fragment in the list at the appropriate place to keep
+ * them in ascending ycoord order.
+ **********************************************************************/
+
+static
+void add_frag_to_list(                            //ordered add
+                      C_OUTLINE_FRAG *frag,       //fragment to add
+                      C_OUTLINE_FRAG_LIST *frags  //fragment list
+                     ) {
+                                 //output list
+  C_OUTLINE_FRAG_IT frag_it = frags;
+
+  if (!frags->empty ()) {
+    for (frag_it.mark_cycle_pt (); !frag_it.cycled_list ();
+    frag_it.forward ()) {
+      if (frag_it.data ()->ycoord > frag->ycoord
+        || (frag_it.data ()->ycoord == frag->ycoord
+         && frag->other_end->ycoord < frag->ycoord)) {
+        frag_it.add_before_then_move (frag);
+        return;
+      }
+    }
+  }
+  frag_it.add_to_end (frag);
+}
+
+
+/**********************************************************************
+ * close_chopped_cfragments
+ *
+ * Clear the given list of fragments joining them up into outlines.
+ * Each outline made soaks up any of the child outlines which it encloses.
+ **********************************************************************/
+
+static
+void close_chopped_cfragments(                             //chop the outline
+                              C_OUTLINE_FRAG_LIST *frags,  //list to clear
+                              C_OUTLINE_LIST *children,    //potential children
+                              float pitch_error,           //allowed shrinkage
+                              C_OUTLINE_IT *dest_it        //output list
+                             ) {
+                                 //iterator
+  C_OUTLINE_FRAG_IT frag_it = frags;
+  C_OUTLINE_FRAG *bottom_frag;   //bottom of cut
+  C_OUTLINE_FRAG *top_frag;      //top of cut
+  C_OUTLINE *outline;            //new outline
+  C_OUTLINE *child;              //current child
+  C_OUTLINE_IT child_it = children;
+  C_OUTLINE_IT olchild_it;       //children of outline
+
+  while (!frag_it.empty()) {
+    frag_it.move_to_first();
+                                 // get bottom one
+    bottom_frag = frag_it.extract();
+    frag_it.forward();
+    top_frag = frag_it.data();  // look at next
+    if ((bottom_frag->steps == nullptr && top_frag->steps == nullptr)
+    || (bottom_frag->steps != nullptr && top_frag->steps != nullptr)) {
+      if (frag_it.data_relative(1)->ycoord == top_frag->ycoord)
+        frag_it.forward();
+    }
+    top_frag = frag_it.extract();
+    if (top_frag->other_end != bottom_frag) {
+      outline = join_chopped_fragments(bottom_frag, top_frag);
+      ASSERT_HOST(outline == nullptr);
+    } else {
+      outline = join_chopped_fragments(bottom_frag, top_frag);
+      if (outline != nullptr) {
+        olchild_it.set_to_list(outline->child());
+        for (child_it.mark_cycle_pt(); !child_it.cycled_list();
+             child_it.forward()) {
+          child = child_it.data();
+          if (*child < *outline)
+            olchild_it.add_to_end(child_it.extract());
+        }
+        if (outline->bounding_box().width() > pitch_error)
+          dest_it->add_after_then_move(outline);
+        else
+          delete outline;          // Make it disappear.
+      }
+    }
+  }
+  while (!child_it.empty ()) {
+    dest_it->add_after_then_move (child_it.extract ());
+    child_it.forward ();
+  }
+}
+
+
+/**********************************************************************
+ * join_chopped_fragments
+ *
+ * Join the two lists of POLYPTs such that neither OUTLINE_FRAG
+ * operand keeps responsibility for the fragment.
+ **********************************************************************/
+
+static
+C_OUTLINE *join_chopped_fragments(                         //join pieces
+                                  C_OUTLINE_FRAG *bottom,  //bottom of cut
+                                  C_OUTLINE_FRAG *top      //top of cut
+                                 ) {
+  C_OUTLINE *outline;            //closed loop
+
+  if (bottom->other_end == top) {
+    if (bottom->steps == nullptr)
+      outline = top->close ();   //turn to outline
+    else
+      outline = bottom->close ();
+    delete top;
+    delete bottom;
+    return outline;
+  }
+  if (bottom->steps == nullptr) {
+    ASSERT_HOST (top->steps != nullptr);
+    join_segments (bottom->other_end, top);
+  }
+  else {
+    ASSERT_HOST (top->steps == nullptr);
+    join_segments (top->other_end, bottom);
+  }
+  top->other_end->other_end = bottom->other_end;
+  bottom->other_end->other_end = top->other_end;
+  delete bottom;
+  delete top;
+  return nullptr;
+}
+
+/**********************************************************************
+ * join_segments
+ *
+ * Join the two edgestep fragments such that the second comes after
+ * the first and the gap between them is closed.
+ **********************************************************************/
+
+static
+void join_segments(                         //join pieces
+                   C_OUTLINE_FRAG *bottom,  //bottom of cut
+                   C_OUTLINE_FRAG *top      //top of cut
+                  ) {
+  DIR128 *steps;                  //new steps
+  int32_t stepcount;               //no of steps
+  int16_t fake_count;              //fake steps
+  DIR128 fake_step;               //step entry
+
+  ASSERT_HOST (bottom->end.x () == top->start.x ());
+  fake_count = top->start.y () - bottom->end.y ();
+  if (fake_count < 0) {
+    fake_count = -fake_count;
+    fake_step = 32;
+  }
+  else
+    fake_step = 96;
+
+  stepcount = bottom->stepcount + fake_count + top->stepcount;
+  steps = new DIR128[stepcount];
+  memmove (steps, bottom->steps, bottom->stepcount);
+  memset (steps + bottom->stepcount, fake_step.get_dir(), fake_count);
+  memmove (steps + bottom->stepcount + fake_count, top->steps,
+    top->stepcount);
+  delete [] bottom->steps;
+  bottom->steps = steps;
+  bottom->stepcount = stepcount;
+  bottom->end = top->end;
+  bottom->other_end->end = top->end;
+}
+
+
+/**********************************************************************
+ * C_OUTLINE_FRAG::close
+ *
+ * Join the ends of this fragment and turn it into an outline.
+ **********************************************************************/
+
+C_OUTLINE *C_OUTLINE_FRAG::close() {  //join pieces
+  DIR128 *new_steps;              //new steps
+  int32_t new_stepcount;           //no of steps
+  int16_t fake_count;              //fake steps
+  DIR128 fake_step;               //step entry
+
+  ASSERT_HOST (start.x () == end.x ());
+  fake_count = start.y () - end.y ();
+  if (fake_count < 0) {
+    fake_count = -fake_count;
+    fake_step = 32;
+  }
+  else
+    fake_step = 96;
+
+  new_stepcount = stepcount + fake_count;
+  if (new_stepcount > C_OUTLINE::kMaxOutlineLength)
+    return nullptr;  // Can't join them
+  new_steps = new DIR128[new_stepcount];
+  memmove(new_steps, steps, stepcount);
+  memset (new_steps + stepcount, fake_step.get_dir(), fake_count);
+  auto* result = new C_OUTLINE (start, new_steps, new_stepcount);
+  delete [] new_steps;
+  return result;
+}
+
+
+/**********************************************************************
+ * C_OUTLINE_FRAG::operator=
+ *
+ * Copy this fragment.
+ **********************************************************************/
+
+                                 //join pieces
+C_OUTLINE_FRAG & C_OUTLINE_FRAG::operator= (
+const C_OUTLINE_FRAG & src       //fragment to copy
+) {
+  delete [] steps;
+
+  stepcount = src.stepcount;
+  steps = new DIR128[stepcount];
+  memmove (steps, src.steps, stepcount);
+  start = src.start;
+  end = src.end;
+  ycoord = src.ycoord;
+  return *this;
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/textord/fpchop.h b/tesseract/src/textord/fpchop.h
new file mode 100644
index 00000000..cc938ba9
--- /dev/null
+++ b/tesseract/src/textord/fpchop.h
@@ -0,0 +1,84 @@
+/**********************************************************************
+ * File:        fpchop.h  (Formerly fp_chop.h)
+ * Description: Code to chop fixed pitch text into character cells.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1993, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef           FPCHOP_H
+#define           FPCHOP_H
+
+#include          "params.h"
+#include          "blobbox.h"
+
+namespace tesseract {
+
+class C_OUTLINE_FRAG : public ELIST_LINK
+{
+  public:
+    C_OUTLINE_FRAG() {  //empty constructor
+      steps = nullptr;
+      stepcount = 0;
+    }
+    ~C_OUTLINE_FRAG () {
+      delete [] steps;
+    }
+                                 //start coord
+    C_OUTLINE_FRAG(ICOORD start_pt,
+                   ICOORD end_pt,       //end coord
+                   C_OUTLINE *outline,  //source of steps
+                   int16_t start_index,
+                   int16_t end_index);
+                                 //other end
+    C_OUTLINE_FRAG(C_OUTLINE_FRAG *head, int16_t tail_y);
+    C_OUTLINE *close();  //copy to outline
+    C_OUTLINE_FRAG & operator= ( //assign
+      const C_OUTLINE_FRAG & src);
+
+    ICOORD start;                //start coord
+    ICOORD end;                  //end coord
+    DIR128 *steps;                //step array
+    int32_t stepcount;             //no of steps
+    C_OUTLINE_FRAG *other_end;   //head if a tail
+    int16_t ycoord;                //coord of cut pt
+
+  private:
+    // Copy constructor (currently unused, therefore private).
+    C_OUTLINE_FRAG(const C_OUTLINE_FRAG& other);
+};
+
+ELISTIZEH(C_OUTLINE_FRAG)
+
+extern
+INT_VAR_H (textord_fp_chop_error, 2,
+"Max allowed bending of chop cells");
+extern
+double_VAR_H (textord_fp_chop_snap, 0.5,
+"Max distance of chop pt from vertex");
+
+ROW *fixed_pitch_words(                 //find lines
+                       TO_ROW *row,     //row to do
+                       FCOORD rotation  //for drawing
+                      );
+
+void split_to_blob(                                 //split the blob
+                   BLOBNBOX *blob,                  //blob to split
+                   int16_t chop_coord,              //place to chop
+                   float pitch_error,               //allowed deviation
+                   C_OUTLINE_LIST *left_coutlines,  //for cblobs
+                   C_OUTLINE_LIST *right_coutlines);
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/textord/gap_map.cpp b/tesseract/src/textord/gap_map.cpp
new file mode 100644
index 00000000..e31328f8
--- /dev/null
+++ b/tesseract/src/textord/gap_map.cpp
@@ -0,0 +1,189 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "gap_map.h"
+
+#include "statistc.h"
+
+namespace tesseract {
+
+BOOL_VAR(gapmap_debug, false, "Say which blocks have tables");
+BOOL_VAR(gapmap_use_ends, false, "Use large space at start and end of rows");
+BOOL_VAR(gapmap_no_isolated_quanta, false,
+"Ensure gaps not less than 2quanta wide");
+double_VAR(gapmap_big_gaps, 1.75, "xht multiplier");
+
+/*************************************************************************
+ * A block gap map is a quantised histogram of whitespace regions in the
+ * block. It is a vertical projection of wide gaps WITHIN lines
+ *
+ * The map is held as an array of counts of rows which have a wide gap
+ * covering that region of the row. Each bucket in the map represents a width
+ * of about half an xheight - (The median of the xhts in the rows is used.)
+ *
+ * The block is considered RECTANGULAR - delimited by the left and right
+ * extremes of the rows in the block. However, ONLY wide gaps WITHIN a row are
+ * counted.
+ *
+ *************************************************************************/
+
+GAPMAP::GAPMAP(                 //Constructor
+               TO_BLOCK *block  //block
+              ) {
+  TO_ROW *row;                   //current row
+  BLOBNBOX_IT blob_it;           //iterator
+  TBOX blob_box;
+  TBOX prev_blob_box;
+  int16_t gap_width;
+  int16_t start_of_row;
+  int16_t end_of_row;
+  STATS xht_stats (0, 128);
+  int16_t min_quantum;
+  int16_t max_quantum;
+  int16_t i;
+
+  /*
+    Find left and right extremes and bucket size
+  */
+  map = nullptr;
+  min_left = INT16_MAX;
+  max_right = -INT16_MAX;
+  total_rows = 0;
+  any_tabs = false;
+
+  // row iterator
+  TO_ROW_IT row_it(block->get_rows());
+  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+    row = row_it.data ();
+    if (!row->blob_list ()->empty ()) {
+      total_rows++;
+      xht_stats.add (static_cast<int16_t>(floor (row->xheight + 0.5)), 1);
+      blob_it.set_to_list (row->blob_list ());
+      start_of_row = blob_it.data ()->bounding_box ().left ();
+      end_of_row = blob_it.data_relative (-1)->bounding_box ().right ();
+      if (min_left > start_of_row)
+        min_left = start_of_row;
+      if (max_right < end_of_row)
+        max_right = end_of_row;
+    }
+  }
+  if ((total_rows < 3) || (min_left >= max_right)) {
+    bucket_size = 0;
+    map_max = 0;
+    total_rows = 0;
+    min_left = max_right = 0;
+    return;
+  }
+  bucket_size = static_cast<int16_t>(floor (xht_stats.median () + 0.5)) / 2;
+  map_max = (max_right - min_left) / bucket_size;
+  map = new int16_t[map_max + 1];
+  for (i = 0; i <= map_max; i++)
+    map[i] = 0;
+
+  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+    row = row_it.data ();
+    if (!row->blob_list ()->empty ()) {
+      blob_it.set_to_list (row->blob_list ());
+      blob_it.mark_cycle_pt ();
+      blob_box = box_next (&blob_it);
+      prev_blob_box = blob_box;
+      if (gapmap_use_ends) {
+        /* Leading space */
+        gap_width = blob_box.left () - min_left;
+        if ((gap_width > gapmap_big_gaps * row->xheight)
+        && gap_width > 2) {
+          max_quantum = (blob_box.left () - min_left) / bucket_size;
+          if (max_quantum > map_max) max_quantum = map_max;
+            for (i = 0; i <= max_quantum; i++)
+            map[i]++;
+        }
+      }
+      while (!blob_it.cycled_list ()) {
+        blob_box = box_next (&blob_it);
+        gap_width = blob_box.left () - prev_blob_box.right ();
+        if ((gap_width > gapmap_big_gaps * row->xheight)
+        && gap_width > 2) {
+          min_quantum =
+            (prev_blob_box.right () - min_left) / bucket_size;
+          max_quantum = (blob_box.left () - min_left) / bucket_size;
+          if (max_quantum > map_max) max_quantum = map_max;
+          for (i = min_quantum; i <= max_quantum; i++)
+            map[i]++;
+        }
+        prev_blob_box = blob_box;
+      }
+      if (gapmap_use_ends) {
+        /* Trailing space */
+        gap_width = max_right - prev_blob_box.right ();
+        if ((gap_width > gapmap_big_gaps * row->xheight)
+        && gap_width > 2) {
+          min_quantum =
+            (prev_blob_box.right () - min_left) / bucket_size;
+          if (min_quantum < 0) min_quantum = 0;
+          for (i = min_quantum; i <= map_max; i++)
+            map[i]++;
+        }
+      }
+    }
+  }
+  for (i = 0; i <= map_max; i++) {
+    if (map[i] > total_rows / 2) {
+      if (gapmap_no_isolated_quanta &&
+        (((i == 0) &&
+        (map[i + 1] <= total_rows / 2)) ||
+        ((i == map_max) &&
+        (map[i - 1] <= total_rows / 2)) ||
+        ((i > 0) &&
+        (i < map_max) &&
+        (map[i - 1] <= total_rows / 2) &&
+      (map[i + 1] <= total_rows / 2)))) {
+        map[i] = 0;              //prevent isolated quantum
+      }
+      else
+        any_tabs = true;
+    }
+  }
+  if (gapmap_debug && any_tabs)
+    tprintf ("Table found\n");
+}
+
+
+/*************************************************************************
+ * GAPMAP::table_gap()
+ * Is there a bucket in the specified range where more than half the rows in the
+ * block have a wide gap?
+ *************************************************************************/
+
+bool GAPMAP::table_gap(             //Is gap a table?
+        int16_t left,  //From here
+        int16_t right  //To here
+) {
+  int16_t min_quantum;
+  int16_t max_quantum;
+  int16_t i;
+  bool tab_found = false;
+
+  if (!any_tabs)
+    return false;
+
+  min_quantum = (left - min_left) / bucket_size;
+  max_quantum = (right - min_left) / bucket_size;
+  // Clip to the bounds of the array. In some circumstances (big blob followed
+  // by small blob) max_quantum can exceed the map_max bounds, but we clip
+  // here instead, as it provides better long-term safety.
+  if (min_quantum < 0) min_quantum = 0;
+  if (max_quantum > map_max) max_quantum = map_max;
+  for (i = min_quantum; (!tab_found && (i <= max_quantum)); i++)
+    if (map[i] > total_rows / 2)
+      tab_found = true;
+  return tab_found;
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/textord/gap_map.h b/tesseract/src/textord/gap_map.h
new file mode 100644
index 00000000..7ed9aae6
--- /dev/null
+++ b/tesseract/src/textord/gap_map.h
@@ -0,0 +1,53 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GAP_MAP_H
+#define GAP_MAP_H
+
+#include "blobbox.h"
+
+namespace tesseract {
+
+class GAPMAP
+{
+  public:
+    GAPMAP(  //constructor
+           TO_BLOCK *block);
+
+    ~GAPMAP () {                 //destructor
+      delete[] map;
+    }
+
+    bool table_gap(               //Is gap a table?
+            int16_t left,    //From here
+            int16_t right);  //To here
+
+  private:
+    int16_t total_rows;            //in block
+    int16_t min_left;              //Left extreme
+    int16_t max_right;             //Right extreme
+    int16_t bucket_size;           // half an x ht
+    int16_t *map;                  //empty counts
+    int16_t map_max;               //map[0..max_map] defined
+    bool any_tabs;
+};
+
+/*-----------------------------*/
+
+extern BOOL_VAR_H (gapmap_debug, false, "Say which blocks have tables");
+extern BOOL_VAR_H (gapmap_use_ends, false,
+"Use large space at start and end of rows");
+extern BOOL_VAR_H (gapmap_no_isolated_quanta, false,
+"Ensure gaps not less than 2quanta wide");
+extern double_VAR_H (gapmap_big_gaps, 1.75, "xht multiplier");
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/textord/imagefind.cpp b/tesseract/src/textord/imagefind.cpp
new file mode 100644
index 00000000..dc5f19b9
--- /dev/null
+++ b/tesseract/src/textord/imagefind.cpp
@@ -0,0 +1,1366 @@
+///////////////////////////////////////////////////////////////////////
+// File:        imagefind.cpp
+// Description: Function to find image and drawing regions in an image
+//              and create a corresponding list of empty blobs.
+// Author:      Ray Smith
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "imagefind.h"
+
+#include "colpartitiongrid.h"
+#include "linlsq.h"
+#include "statistc.h"
+#include "params.h"
+
+#include "allheaders.h"
+
+#include <algorithm>
+
+namespace tesseract {
+
+static INT_VAR(textord_tabfind_show_images, false, "Show image blobs");
+
+// Fraction of width or height of on pixels that can be discarded from a
+// roughly rectangular image.
+const double kMinRectangularFraction = 0.125;
+// Fraction of width or height to consider image completely used.
+const double kMaxRectangularFraction = 0.75;
+// Fraction of width or height to allow transition from kMinRectangularFraction
+// to kMaxRectangularFraction, equivalent to a dy/dx skew.
+const double kMaxRectangularGradient = 0.1;  // About 6 degrees.
+// Minimum image size to be worth looking for images on.
+const int kMinImageFindSize = 100;
+// Scale factor for the rms color fit error.
+const double kRMSFitScaling = 8.0;
+// Min color difference to call it two colors.
+const int kMinColorDifference = 16;
+// Pixel padding for noise blobs and partitions when rendering on the image
+// mask to encourage them to join together. Make it too big and images
+// will fatten out too much and have to be clipped to text.
+const int kNoisePadding = 4;
+
+// Finds image regions within the BINARY source pix (page image) and returns
+// the image regions as a mask image.
+// The returned pix may be nullptr, meaning no images found.
+// If not nullptr, it must be PixDestroyed by the caller.
+// If textord_tabfind_show_images, debug images are appended to pixa_debug.
+Pix* ImageFind::FindImages(Pix* pix, DebugPixa* pixa_debug) {
+  // Not worth looking at small images.
+  if (pixGetWidth(pix) < kMinImageFindSize ||
+      pixGetHeight(pix) < kMinImageFindSize)
+    return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
+
+  // Reduce by factor 2.
+  Pix *pixr = pixReduceRankBinaryCascade(pix, 1, 0, 0, 0);
+  if (textord_tabfind_show_images && pixa_debug != nullptr)
+    pixa_debug->AddPix(pixr, "CascadeReduced");
+
+  // Get the halftone mask directly from Leptonica.
+  //
+  // Leptonica will print an error message and return nullptr if we call
+  // pixGenHalftoneMask(pixr, nullptr, ...) with too small image, so we
+  // want to bypass that.
+  if (pixGetWidth(pixr) < kMinImageFindSize ||
+      pixGetHeight(pixr) < kMinImageFindSize) {
+    pixDestroy(&pixr);
+    return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
+  }
+  // Get the halftone mask.
+  l_int32 ht_found = 0;
+  Pixa* pixadb = (textord_tabfind_show_images && pixa_debug != nullptr)
+                     ? pixaCreate(0)
+                     : nullptr;
+  Pix* pixht2 = pixGenerateHalftoneMask(pixr, nullptr, &ht_found, pixadb);
+  if (pixadb) {
+    Pix* pixdb = pixaDisplayTiledInColumns(pixadb, 3, 1.0, 20, 2);
+    if (textord_tabfind_show_images && pixa_debug != nullptr)
+      pixa_debug->AddPix(pixdb, "HalftoneMask");
+    pixDestroy(&pixdb);
+    pixaDestroy(&pixadb);
+  }
+  pixDestroy(&pixr);
+  if (!ht_found && pixht2 != nullptr)
+    pixDestroy(&pixht2);
+  if (pixht2 == nullptr)
+    return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
+
+  // Expand back up again.
+  Pix *pixht = pixExpandReplicate(pixht2, 2);
+  if (textord_tabfind_show_images && pixa_debug != nullptr)
+    pixa_debug->AddPix(pixht, "HalftoneReplicated");
+  pixDestroy(&pixht2);
+
+  // Fill to capture pixels near the mask edges that were missed
+  Pix *pixt = pixSeedfillBinary(nullptr, pixht, pix, 8);
+  pixOr(pixht, pixht, pixt);
+  pixDestroy(&pixt);
+
+  // Eliminate lines and bars that may be joined to images.
+  Pix* pixfinemask = pixReduceRankBinaryCascade(pixht, 1, 1, 3, 3);
+  pixDilateBrick(pixfinemask, pixfinemask, 5, 5);
+  if (textord_tabfind_show_images && pixa_debug != nullptr)
+    pixa_debug->AddPix(pixfinemask, "FineMask");
+  Pix* pixreduced = pixReduceRankBinaryCascade(pixht, 1, 1, 1, 1);
+  Pix* pixreduced2 = pixReduceRankBinaryCascade(pixreduced, 3, 3, 3, 0);
+  pixDestroy(&pixreduced);
+  pixDilateBrick(pixreduced2, pixreduced2, 5, 5);
+  Pix* pixcoarsemask = pixExpandReplicate(pixreduced2, 8);
+  pixDestroy(&pixreduced2);
+  if (textord_tabfind_show_images && pixa_debug != nullptr)
+    pixa_debug->AddPix(pixcoarsemask, "CoarseMask");
+  // Combine the coarse and fine image masks.
+  pixAnd(pixcoarsemask, pixcoarsemask, pixfinemask);
+  pixDestroy(&pixfinemask);
+  // Dilate a bit to make sure we get everything.
+  pixDilateBrick(pixcoarsemask, pixcoarsemask, 3, 3);
+  Pix* pixmask = pixExpandReplicate(pixcoarsemask, 16);
+  pixDestroy(&pixcoarsemask);
+  if (textord_tabfind_show_images && pixa_debug != nullptr)
+    pixa_debug->AddPix(pixmask, "MaskDilated");
+  // And the image mask with the line and bar remover.
+  pixAnd(pixht, pixht, pixmask);
+  pixDestroy(&pixmask);
+  if (textord_tabfind_show_images && pixa_debug != nullptr)
+    pixa_debug->AddPix(pixht, "FinalMask");
+  // Make the result image the same size as the input.
+  Pix* result = pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
+  pixOr(result, result, pixht);
+  pixDestroy(&pixht);
+  return result;
+}
+
+// Generates a Boxa, Pixa pair from the input binary (image mask) pix,
+// analogous to pixConnComp, except that connected components which are nearly
+// rectangular are replaced with solid rectangles.
+// The returned boxa, pixa may be nullptr, meaning no images found.
+// If not nullptr, they must be destroyed by the caller.
+// Resolution of pix should match the source image (Tesseract::pix_binary_)
+// so the output coordinate systems match.
+void ImageFind::ConnCompAndRectangularize(Pix* pix, DebugPixa* pixa_debug,
+                                          Boxa** boxa, Pixa** pixa) {
+  *boxa = nullptr;
+  *pixa = nullptr;
+
+  if (textord_tabfind_show_images && pixa_debug != nullptr)
+    pixa_debug->AddPix(pix, "Conncompimage");
+  // Find the individual image regions in the mask image.
+  *boxa = pixConnComp(pix, pixa, 8);
+  // Rectangularize the individual images. If a sharp edge in vertical and/or
+  // horizontal occupancy can be found, it indicates a probably rectangular
+  // image with unwanted bits merged on, so clip to the approximate rectangle.
+  int npixes = 0;
+  if (*boxa != nullptr && *pixa != nullptr) npixes = pixaGetCount(*pixa);
+  for (int i = 0; i < npixes; ++i) {
+    int x_start, x_end, y_start, y_end;
+    Pix* img_pix = pixaGetPix(*pixa, i, L_CLONE);
+    if (textord_tabfind_show_images && pixa_debug != nullptr)
+      pixa_debug->AddPix(img_pix, "A component");
+    if (pixNearlyRectangular(img_pix, kMinRectangularFraction,
+                             kMaxRectangularFraction,
+                             kMaxRectangularGradient,
+                             &x_start, &y_start, &x_end, &y_end)) {
+      Pix* simple_pix = pixCreate(x_end - x_start, y_end - y_start, 1);
+      pixSetAll(simple_pix);
+      pixDestroy(&img_pix);
+      // pixaReplacePix takes ownership of the simple_pix.
+      pixaReplacePix(*pixa, i, simple_pix, nullptr);
+      img_pix = pixaGetPix(*pixa, i, L_CLONE);
+      // Fix the box to match the new pix.
+      l_int32 x, y, width, height;
+      boxaGetBoxGeometry(*boxa, i, &x, &y, &width, &height);
+      Box* simple_box = boxCreate(x + x_start, y + y_start,
+                                  x_end - x_start, y_end - y_start);
+      boxaReplaceBox(*boxa, i, simple_box);
+    }
+    pixDestroy(&img_pix);
+  }
+}
+
+// Scans horizontally on x=[x_start,x_end), starting with y=*y_start,
+// stepping y+=y_step, until y=y_end. *ystart is input/output.
+// If the number of black pixels in a row, pix_count fits this pattern:
+// 0 or more rows with pix_count < min_count then
+// <= mid_width rows with min_count <= pix_count <= max_count then
+// a row with pix_count > max_count then
+// true is returned, and *y_start = the first y with pix_count >= min_count.
+static bool HScanForEdge(uint32_t* data, int wpl, int x_start, int x_end,
+                         int min_count, int mid_width, int max_count,
+                         int y_end, int y_step, int* y_start) {
+  int mid_rows = 0;
+  for (int y = *y_start; y != y_end; y += y_step) {
+    // Need pixCountPixelsInRow(pix, y, &pix_count, nullptr) to count in a subset.
+    int pix_count = 0;
+    uint32_t* line = data + wpl * y;
+    for (int x = x_start; x < x_end; ++x) {
+      if (GET_DATA_BIT(line, x))
+        ++pix_count;
+    }
+    if (mid_rows == 0 && pix_count < min_count)
+      continue;      // In the min phase.
+    if (mid_rows == 0)
+      *y_start = y;  // Save the y_start where we came out of the min phase.
+    if (pix_count > max_count)
+      return true;   // Found the pattern.
+    ++mid_rows;
+    if (mid_rows > mid_width)
+      break;         // Middle too big.
+  }
+  return false;      // Never found max_count.
+}
+
+// Scans vertically on y=[y_start,y_end), starting with x=*x_start,
+// stepping x+=x_step, until x=x_end. *x_start is input/output.
+// If the number of black pixels in a column, pix_count fits this pattern:
+// 0 or more cols with pix_count < min_count then
+// <= mid_width cols with min_count <= pix_count <= max_count then
+// a column with pix_count > max_count then
+// true is returned, and *x_start = the first x with pix_count >= min_count.
+static bool VScanForEdge(uint32_t* data, int wpl, int y_start, int y_end,
+                         int min_count, int mid_width, int max_count,
+                         int x_end, int x_step, int* x_start) {
+  int mid_cols = 0;
+  for (int x = *x_start; x != x_end; x += x_step) {
+    int pix_count = 0;
+    uint32_t* line = data + y_start * wpl;
+    for (int y = y_start; y < y_end; ++y, line += wpl) {
+      if (GET_DATA_BIT(line, x))
+        ++pix_count;
+    }
+    if (mid_cols == 0 && pix_count < min_count)
+      continue;      // In the min phase.
+    if (mid_cols == 0)
+      *x_start = x;  // Save the place where we came out of the min phase.
+    if (pix_count > max_count)
+      return true;   // found the pattern.
+    ++mid_cols;
+    if (mid_cols > mid_width)
+      break;         // Middle too big.
+  }
+  return false;      // Never found max_count.
+}
+
+// Returns true if there is a rectangle in the source pix, such that all
+// pixel rows and column slices outside of it have less than
+// min_fraction of the pixels black, and within max_skew_gradient fraction
+// of the pixels on the inside, there are at least max_fraction of the
+// pixels black. In other words, the inside of the rectangle looks roughly
+// rectangular, and the outside of it looks like extra bits.
+// On return, the rectangle is defined by x_start, y_start, x_end and y_end.
+// Note: the algorithm is iterative, allowing it to slice off pixels from
+// one edge, allowing it to then slice off more pixels from another edge.
+bool ImageFind::pixNearlyRectangular(Pix* pix,
+                                     double min_fraction, double max_fraction,
+                                     double max_skew_gradient,
+                                     int* x_start, int* y_start,
+                                     int* x_end, int* y_end) {
+  ASSERT_HOST(pix != nullptr);
+  *x_start = 0;
+  *x_end = pixGetWidth(pix);
+  *y_start = 0;
+  *y_end = pixGetHeight(pix);
+
+  uint32_t* data = pixGetData(pix);
+  int wpl = pixGetWpl(pix);
+  bool any_cut = false;
+  bool left_done = false;
+  bool right_done = false;
+  bool top_done = false;
+  bool bottom_done = false;
+  do {
+    any_cut = false;
+    // Find the top/bottom edges.
+    int width = *x_end - *x_start;
+    int min_count = static_cast<int>(width * min_fraction);
+    int max_count = static_cast<int>(width * max_fraction);
+    int edge_width = static_cast<int>(width * max_skew_gradient);
+    if (HScanForEdge(data, wpl, *x_start, *x_end, min_count, edge_width,
+                     max_count, *y_end, 1, y_start) && !top_done) {
+      top_done = true;
+      any_cut = true;
+    }
+    --(*y_end);
+    if (HScanForEdge(data, wpl, *x_start, *x_end, min_count, edge_width,
+                     max_count, *y_start, -1, y_end) && !bottom_done) {
+      bottom_done = true;
+      any_cut = true;
+    }
+    ++(*y_end);
+
+    // Find the left/right edges.
+    int height = *y_end - *y_start;
+    min_count = static_cast<int>(height * min_fraction);
+    max_count = static_cast<int>(height * max_fraction);
+    edge_width = static_cast<int>(height * max_skew_gradient);
+    if (VScanForEdge(data, wpl, *y_start, *y_end, min_count, edge_width,
+                     max_count, *x_end, 1, x_start) && !left_done) {
+      left_done = true;
+      any_cut = true;
+    }
+    --(*x_end);
+    if (VScanForEdge(data, wpl, *y_start, *y_end, min_count, edge_width,
+                     max_count, *x_start, -1, x_end) && !right_done) {
+      right_done = true;
+      any_cut = true;
+    }
+    ++(*x_end);
+  } while (any_cut);
+
+  // All edges must satisfy the condition of sharp gradient in pixel density
+  // in order for the full rectangle to be present.
+  return left_done && right_done && top_done && bottom_done;
+}
+
+// Given an input pix, and a bounding rectangle, the sides of the rectangle
+// are shrunk inwards until they bound any black pixels found within the
+// original rectangle. Returns false if the rectangle contains no black
+// pixels at all.
+bool ImageFind::BoundsWithinRect(Pix* pix, int* x_start, int* y_start,
+                                 int* x_end, int* y_end) {
+  Box* input_box = boxCreate(*x_start, *y_start, *x_end - *x_start,
+                             *y_end - *y_start);
+  Box* output_box = nullptr;
+  pixClipBoxToForeground(pix, input_box, nullptr, &output_box);
+  bool result = output_box != nullptr;
+  if (result) {
+    l_int32 x, y, width, height;
+    boxGetGeometry(output_box, &x, &y, &width, &height);
+    *x_start = x;
+    *y_start = y;
+    *x_end = x + width;
+    *y_end = y + height;
+    boxDestroy(&output_box);
+  }
+  boxDestroy(&input_box);
+  return result;
+}
+
+// Given a point in 3-D (RGB) space, returns the squared Euclidean distance
+// of the point from the given line, defined by a pair of points in the 3-D
+// (RGB) space, line1 and line2.
+double ImageFind::ColorDistanceFromLine(const uint8_t* line1,
+                                        const uint8_t* line2,
+                                        const uint8_t* point) {
+  int line_vector[kRGBRMSColors];
+  int point_vector[kRGBRMSColors];
+  for (int i = 0; i < kRGBRMSColors; ++i) {
+    line_vector[i] = static_cast<int>(line2[i]) - static_cast<int>(line1[i]);
+    point_vector[i] = static_cast<int>(point[i]) - static_cast<int>(line1[i]);
+  }
+  line_vector[L_ALPHA_CHANNEL] = 0;
+  // Now the cross product in 3d.
+  int cross[kRGBRMSColors];
+  cross[COLOR_RED] = line_vector[COLOR_GREEN] * point_vector[COLOR_BLUE]
+                   - line_vector[COLOR_BLUE] * point_vector[COLOR_GREEN];
+  cross[COLOR_GREEN] = line_vector[COLOR_BLUE] * point_vector[COLOR_RED]
+                   - line_vector[COLOR_RED] * point_vector[COLOR_BLUE];
+  cross[COLOR_BLUE] = line_vector[COLOR_RED] * point_vector[COLOR_GREEN]
+                   - line_vector[COLOR_GREEN] * point_vector[COLOR_RED];
+  cross[L_ALPHA_CHANNEL] = 0;
+  // Now the sums of the squares.
+  double cross_sq = 0.0;
+  double line_sq = 0.0;
+  for (int j = 0; j < kRGBRMSColors; ++j) {
+    cross_sq += static_cast<double>(cross[j]) * cross[j];
+    line_sq += static_cast<double>(line_vector[j]) * line_vector[j];
+  }
+  if (line_sq == 0.0) {
+    return 0.0;
+  }
+  return cross_sq / line_sq;  // This is the squared distance.
+}
+
+
+// Returns the leptonica combined code for the given RGB triplet.
+uint32_t ImageFind::ComposeRGB(uint32_t r, uint32_t g, uint32_t b) {
+  l_uint32 result;
+  composeRGBPixel(r, g, b, &result);
+  return result;
+}
+
+// Returns the input value clipped to a uint8_t.
+uint8_t ImageFind::ClipToByte(double pixel) {
+  if (pixel < 0.0)
+    return 0;
+  else if (pixel >= 255.0)
+    return 255;
+  return static_cast<uint8_t>(pixel);
+}
+
+// Computes the light and dark extremes of color in the given rectangle of
+// the given pix, which is factor smaller than the coordinate system in rect.
+// The light and dark points are taken to be the upper and lower 8th-ile of
+// the most deviant of R, G and B. The value of the other 2 channels are
+// computed by linear fit against the most deviant.
+// The colors of the two points are returned in color1 and color2, with the
+// alpha channel set to a scaled mean rms of the fits.
+// If color_map1 is not null then it and color_map2 get rect pasted in them
+// with the two calculated colors, and rms map gets a pasted rect of the rms.
+// color_map1, color_map2 and rms_map are assumed to be the same scale as pix.
+void ImageFind::ComputeRectangleColors(const TBOX& rect, Pix* pix, int factor,
+                                       Pix* color_map1, Pix* color_map2,
+                                       Pix* rms_map,
+                                       uint8_t* color1, uint8_t* color2) {
+  ASSERT_HOST(pix != nullptr && pixGetDepth(pix) == 32);
+  // Pad the rectangle outwards by 2 (scaled) pixels if possible to get more
+  // background.
+  int width = pixGetWidth(pix);
+  int height = pixGetHeight(pix);
+  int left_pad = std::max(rect.left() - 2 * factor, 0) / factor;
+  int top_pad = (rect.top() + 2 * factor + (factor - 1)) / factor;
+  top_pad = std::min(height, top_pad);
+  int right_pad = (rect.right() + 2 * factor + (factor - 1)) / factor;
+  right_pad = std::min(width, right_pad);
+  int bottom_pad = std::max(rect.bottom() - 2 * factor, 0) / factor;
+  int width_pad = right_pad - left_pad;
+  int height_pad = top_pad - bottom_pad;
+  if (width_pad < 1 || height_pad < 1 || width_pad + height_pad < 4)
+    return;
+  // Now crop the pix to the rectangle.
+  Box* scaled_box = boxCreate(left_pad, height - top_pad,
+                              width_pad, height_pad);
+  Pix* scaled = pixClipRectangle(pix, scaled_box, nullptr);
+
+  // Compute stats over the whole image.
+  STATS red_stats(0, 256);
+  STATS green_stats(0, 256);
+  STATS blue_stats(0, 256);
+  uint32_t* data = pixGetData(scaled);
+  ASSERT_HOST(pixGetWpl(scaled) == width_pad);
+  for (int y = 0; y < height_pad; ++y) {
+    for (int x = 0; x < width_pad; ++x, ++data) {
+      int r = GET_DATA_BYTE(data, COLOR_RED);
+      int g = GET_DATA_BYTE(data, COLOR_GREEN);
+      int b = GET_DATA_BYTE(data, COLOR_BLUE);
+      red_stats.add(r, 1);
+      green_stats.add(g, 1);
+      blue_stats.add(b, 1);
+    }
+  }
+  // Find the RGB component with the greatest 8th-ile-range.
+  // 8th-iles are used instead of quartiles to get closer to the true
+  // foreground color, which is going to be faint at best because of the
+  // pre-scaling of the input image.
+  int best_l8 = static_cast<int>(red_stats.ile(0.125f));
+  int best_u8 = static_cast<int>(ceil(red_stats.ile(0.875f)));
+  int best_i8r = best_u8 - best_l8;
+  int x_color = COLOR_RED;
+  int y1_color = COLOR_GREEN;
+  int y2_color = COLOR_BLUE;
+  int l8 = static_cast<int>(green_stats.ile(0.125f));
+  int u8 = static_cast<int>(ceil(green_stats.ile(0.875f)));
+  if (u8 - l8 > best_i8r) {
+    best_i8r = u8 - l8;
+    best_l8 = l8;
+    best_u8 = u8;
+    x_color = COLOR_GREEN;
+    y1_color = COLOR_RED;
+  }
+  l8 = static_cast<int>(blue_stats.ile(0.125f));
+  u8 = static_cast<int>(ceil(blue_stats.ile(0.875f)));
+  if (u8 - l8 > best_i8r) {
+    best_i8r = u8 - l8;
+    best_l8 = l8;
+    best_u8 = u8;
+    x_color = COLOR_BLUE;
+    y1_color = COLOR_GREEN;
+    y2_color = COLOR_RED;
+  }
+  if (best_i8r >= kMinColorDifference) {
+    LLSQ line1;
+    LLSQ line2;
+    uint32_t* data = pixGetData(scaled);
+    for (int im_y = 0; im_y < height_pad; ++im_y) {
+      for (int im_x = 0; im_x < width_pad; ++im_x, ++data) {
+        int x = GET_DATA_BYTE(data, x_color);
+        int y1 = GET_DATA_BYTE(data, y1_color);
+        int y2 = GET_DATA_BYTE(data, y2_color);
+        line1.add(x, y1);
+        line2.add(x, y2);
+      }
+    }
+    double m1 = line1.m();
+    double c1 = line1.c(m1);
+    double m2 = line2.m();
+    double c2 = line2.c(m2);
+    double rms = line1.rms(m1, c1) + line2.rms(m2, c2);
+    rms *= kRMSFitScaling;
+    // Save the results.
+    color1[x_color] = ClipToByte(best_l8);
+    color1[y1_color] = ClipToByte(m1 * best_l8 + c1 + 0.5);
+    color1[y2_color] = ClipToByte(m2 * best_l8 + c2 + 0.5);
+    color1[L_ALPHA_CHANNEL] = ClipToByte(rms);
+    color2[x_color] = ClipToByte(best_u8);
+    color2[y1_color] = ClipToByte(m1 * best_u8 + c1 + 0.5);
+    color2[y2_color] = ClipToByte(m2 * best_u8 + c2 + 0.5);
+    color2[L_ALPHA_CHANNEL] = ClipToByte(rms);
+  } else {
+    // There is only one color.
+    color1[COLOR_RED] = ClipToByte(red_stats.median());
+    color1[COLOR_GREEN] = ClipToByte(green_stats.median());
+    color1[COLOR_BLUE] = ClipToByte(blue_stats.median());
+    color1[L_ALPHA_CHANNEL] = 0;
+    memcpy(color2, color1, 4);
+  }
+  if (color_map1 != nullptr) {
+    pixSetInRectArbitrary(color_map1, scaled_box,
+                          ComposeRGB(color1[COLOR_RED],
+                              color1[COLOR_GREEN],
+                              color1[COLOR_BLUE]));
+    pixSetInRectArbitrary(color_map2, scaled_box,
+                          ComposeRGB(color2[COLOR_RED],
+                              color2[COLOR_GREEN],
+                              color2[COLOR_BLUE]));
+    pixSetInRectArbitrary(rms_map, scaled_box, color1[L_ALPHA_CHANNEL]);
+  }
+  pixDestroy(&scaled);
+  boxDestroy(&scaled_box);
+}
+
+// ================ CUTTING POLYGONAL IMAGES FROM A RECTANGLE ================
+// The following functions are responsible for cutting a polygonal image from
+// a rectangle: CountPixelsInRotatedBox, AttemptToShrinkBox, CutChunkFromParts
+// with DivideImageIntoParts as the master.
+// Problem statement:
+// We start with a single connected component from the image mask: we get
+// a Pix of the component, and its location on the page (im_box).
+// The objective of cutting a polygonal image from its rectangle is to avoid
+// interfering text, but not text that completely overlaps the image.
+//     ------------------------------      ------------------------------
+//     |   Single input partition   |      | 1 Cut up output partitions |
+//     |                            |      ------------------------------
+//   Av|oid                         |    Avoid |                        |
+//     |                            |          |________________________|
+//  Int|erfering                    |   Interfering  |                  |
+//     |                            |           _____|__________________|
+//    T|ext                         |     Text |                        |
+//     |        Text-on-image       |          |     Text-on-image      |
+//     ------------------------------          --------------------------
+// DivideImageIntoParts does this by building a ColPartition_LIST (not in the
+// grid) with each ColPartition representing one of the rectangles needed,
+// starting with a single rectangle for the whole image component, and cutting
+// bits out of it with CutChunkFromParts as needed to avoid text. The output
+// ColPartitions are supposed to be ordered from top to bottom.
+
+// The problem is complicated by the fact that we have rotated the coordinate
+// system to make text lines horizontal, so if we need to look at the component
+// image, we have to rotate the coordinates. Throughout the functions in this
+// section im_box is the rectangle representing the image component in the
+// rotated page coordinates (where we are building our output ColPartitions),
+// rotation is the rotation that we used to get there, and rerotation is the
+// rotation required to get back to original page image coordinates.
+// To get to coordinates in the component image, pix, we rotate the im_box,
+// the point we want to locate, and subtract the rotated point from the top-left
+// of the rotated im_box.
+// im_box is therefore essential to calculating coordinates within the pix.
+
+// Returns true if there are no black pixels in between the boxes.
+// The im_box must represent the bounding box of the pix in tesseract
+// coordinates, which may be negative, due to rotations to make the textlines
+// horizontal. The boxes are rotated by rotation, which should undo such
+// rotations, before mapping them onto the pix.
+bool ImageFind::BlankImageInBetween(const TBOX& box1, const TBOX& box2,
+                                    const TBOX& im_box, const FCOORD& rotation,
+                                    Pix* pix) {
+  TBOX search_box(box1);
+  search_box += box2;
+  if (box1.x_gap(box2) >= box1.y_gap(box2)) {
+    if (box1.x_gap(box2) <= 0)
+      return true;
+    search_box.set_left(std::min(box1.right(), box2.right()));
+    search_box.set_right(std::max(box1.left(), box2.left()));
+  } else {
+    if (box1.y_gap(box2) <= 0)
+      return true;
+    search_box.set_top(std::max(box1.bottom(), box2.bottom()));
+    search_box.set_bottom(std::min(box1.top(), box2.top()));
+  }
+  return CountPixelsInRotatedBox(search_box, im_box, rotation, pix) == 0;
+}
+
+// Returns the number of pixels in box in the pix.
+// rotation, pix and im_box are defined in the large comment above.
+int ImageFind::CountPixelsInRotatedBox(TBOX box, const TBOX& im_box,
+                                       const FCOORD& rotation, Pix* pix) {
+  // Intersect it with the image box.
+  box &= im_box;  // This is in-place box intersection.
+  if (box.null_box())
+    return 0;
+  box.rotate(rotation);
+  TBOX rotated_im_box(im_box);
+  rotated_im_box.rotate(rotation);
+  Pix* rect_pix = pixCreate(box.width(), box.height(), 1);
+  pixRasterop(rect_pix, 0, 0, box.width(), box.height(),
+              PIX_SRC, pix, box.left() - rotated_im_box.left(),
+              rotated_im_box.top() - box.top());
+  l_int32 result;
+  pixCountPixels(rect_pix, &result, nullptr);
+  pixDestroy(&rect_pix);
+  return result;
+}
+
+// The box given by slice contains some black pixels, but not necessarily
+// over the whole box. Shrink the x bounds of slice, but not the y bounds
+// until there is at least one black pixel in the outermost columns.
+// rotation, rerotation, pix and im_box are defined in the large comment above.
+static void AttemptToShrinkBox(const FCOORD& rotation, const FCOORD& rerotation,
+                               const TBOX& im_box, Pix* pix, TBOX* slice) {
+  TBOX rotated_box(*slice);
+  rotated_box.rotate(rerotation);
+  TBOX rotated_im_box(im_box);
+  rotated_im_box.rotate(rerotation);
+  int left = rotated_box.left() - rotated_im_box.left();
+  int right = rotated_box.right() - rotated_im_box.left();
+  int top = rotated_im_box.top() - rotated_box.top();
+  int bottom = rotated_im_box.top() - rotated_box.bottom();
+  ImageFind::BoundsWithinRect(pix, &left, &top, &right, &bottom);
+  top = rotated_im_box.top() - top;
+  bottom = rotated_im_box.top() - bottom;
+  left += rotated_im_box.left();
+  right += rotated_im_box.left();
+  rotated_box.set_to_given_coords(left, bottom, right, top);
+  rotated_box.rotate(rotation);
+  slice->set_left(rotated_box.left());
+  slice->set_right(rotated_box.right());
+}
+
+// The meat of cutting a polygonal image around text.
+// This function covers the general case of cutting a box out of a box
+// as shown:
+// Input                               Output
+// ------------------------------      ------------------------------
+// |   Single input partition   |      | 1 Cut up output partitions |
+// |                            |      ------------------------------
+// |         ----------         |      ---------           ----------
+// |         |  box   |         |      |   2   |   box     |    3   |
+// |         |        |         |      |       |  is cut   |        |
+// |         ----------         |      ---------   out     ----------
+// |                            |      ------------------------------
+// |                            |      |   4                        |
+// ------------------------------      ------------------------------
+// In the context that this function is used, at most 3 of the above output
+// boxes will be created, as the overlapping box is never contained by the
+// input.
+// The above cutting operation is executed for each element of part_list that
+// is overlapped by the input box. Each modified ColPartition is replaced
+// in place in the list by the output of the cutting operation in the order
+// shown above, so iff no holes are ever created, the output will be in
+// top-to-bottom order, but in extreme cases, hole creation is possible.
+// In such cases, the output order may cause strange block polygons.
+// rotation, rerotation, pix and im_box are defined in the large comment above.
+static void CutChunkFromParts(const TBOX& box, const TBOX& im_box,
+                              const FCOORD& rotation, const FCOORD& rerotation,
+                              Pix* pix, ColPartition_LIST* part_list) {
+  ASSERT_HOST(!part_list->empty());
+  ColPartition_IT part_it(part_list);
+  do {
+    ColPartition* part = part_it.data();
+    TBOX part_box = part->bounding_box();
+    if (part_box.overlap(box)) {
+      // This part must be cut and replaced with the remains. There are
+      // up to 4 pieces to be made. Start with the first one and use
+      // add_before_stay_put. For each piece if it has no black pixels
+      // left, just don't make the box.
+      // Above box.
+      if (box.top() < part_box.top()) {
+        TBOX slice(part_box);
+        slice.set_bottom(box.top());
+        if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation,
+                                               pix) > 0) {
+          AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice);
+          part_it.add_before_stay_put(
+              ColPartition::FakePartition(slice, PT_UNKNOWN, BRT_POLYIMAGE,
+                                          BTFT_NONTEXT));
+        }
+      }
+      // Left of box.
+      if (box.left() > part_box.left()) {
+        TBOX slice(part_box);
+        slice.set_right(box.left());
+        if (box.top() < part_box.top())
+          slice.set_top(box.top());
+        if (box.bottom() > part_box.bottom())
+          slice.set_bottom(box.bottom());
+        if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation,
+                                               pix) > 0) {
+          AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice);
+          part_it.add_before_stay_put(
+              ColPartition::FakePartition(slice, PT_UNKNOWN, BRT_POLYIMAGE,
+                                          BTFT_NONTEXT));
+        }
+      }
+      // Right of box.
+      if (box.right() < part_box.right()) {
+        TBOX slice(part_box);
+        slice.set_left(box.right());
+        if (box.top() < part_box.top())
+          slice.set_top(box.top());
+        if (box.bottom() > part_box.bottom())
+          slice.set_bottom(box.bottom());
+        if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation,
+                                               pix) > 0) {
+          AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice);
+          part_it.add_before_stay_put(
+              ColPartition::FakePartition(slice, PT_UNKNOWN, BRT_POLYIMAGE,
+                                          BTFT_NONTEXT));
+        }
+      }
+      // Below box.
+      if (box.bottom() > part_box.bottom()) {
+        TBOX slice(part_box);
+        slice.set_top(box.bottom());
+        if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation,
+                                               pix) > 0) {
+          AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice);
+          part_it.add_before_stay_put(
+              ColPartition::FakePartition(slice, PT_UNKNOWN, BRT_POLYIMAGE,
+                                          BTFT_NONTEXT));
+        }
+      }
+      part->DeleteBoxes();
+      delete part_it.extract();
+    }
+    part_it.forward();
+  } while (!part_it.at_first());
+}
+
+// Starts with the bounding box of the image component and cuts it up
+// so that it doesn't intersect text where possible.
+// Strong fully contained horizontal text is marked as text on image,
+// and does not cause a division of the image.
+// For more detail see the large comment above on cutting polygonal images
+// from a rectangle.
+// rotation, rerotation, pix and im_box are defined in the large comment above.
+static void DivideImageIntoParts(const TBOX& im_box, const FCOORD& rotation,
+                                 const FCOORD& rerotation, Pix* pix,
+                                 ColPartitionGridSearch* rectsearch,
+                                 ColPartition_LIST* part_list) {
+  // Add the full im_box partition to the list to begin with.
+  ColPartition* pix_part = ColPartition::FakePartition(im_box, PT_UNKNOWN,
+                                                       BRT_RECTIMAGE,
+                                                       BTFT_NONTEXT);
+  ColPartition_IT part_it(part_list);
+  part_it.add_after_then_move(pix_part);
+
+  rectsearch->StartRectSearch(im_box);
+  ColPartition* part;
+  while ((part = rectsearch->NextRectSearch()) != nullptr) {
+    TBOX part_box = part->bounding_box();
+    if (part_box.contains(im_box) && part->flow() >= BTFT_CHAIN) {
+      // This image is completely covered by an existing text partition.
+      for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
+        ColPartition* pix_part = part_it.extract();
+        pix_part->DeleteBoxes();
+        delete pix_part;
+      }
+    } else if (part->flow() == BTFT_STRONG_CHAIN) {
+      // Text intersects the box.
+      TBOX overlap_box = part_box.intersection(im_box);
+      // Intersect it with the image box.
+      int black_area = ImageFind::CountPixelsInRotatedBox(overlap_box, im_box,
+                                                          rerotation, pix);
+      if (black_area * 2 < part_box.area() || !im_box.contains(part_box)) {
+        // Eat a piece out of the image.
+        // Pad it so that pieces eaten out look decent.
+        int padding = part->blob_type() == BRT_VERT_TEXT
+                    ? part_box.width() : part_box.height();
+        part_box.set_top(part_box.top() + padding / 2);
+        part_box.set_bottom(part_box.bottom() - padding / 2);
+        CutChunkFromParts(part_box, im_box, rotation, rerotation,
+                          pix, part_list);
+      } else {
+        // Strong overlap with the black area, so call it text on image.
+        part->set_flow(BTFT_TEXT_ON_IMAGE);
+      }
+    }
+    if (part_list->empty()) {
+      break;
+    }
+  }
+}
+
+// Search for the rightmost text that overlaps vertically and is to the left
+// of the given box, but within the given left limit.
+static int ExpandImageLeft(const TBOX& box, int left_limit,
+                           ColPartitionGrid* part_grid) {
+  ColPartitionGridSearch search(part_grid);
+  ColPartition* part;
+  // Search right to left for any text that overlaps.
+  search.StartSideSearch(box.left(), box.bottom(), box.top());
+  while ((part = search.NextSideSearch(true)) != nullptr) {
+    if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
+      const TBOX& part_box(part->bounding_box());
+      if (part_box.y_gap(box) < 0) {
+        if (part_box.right() > left_limit && part_box.right() < box.left())
+          left_limit = part_box.right();
+        break;
+      }
+    }
+  }
+  if (part != nullptr) {
+    // Search for the nearest text up to the one we already found.
+    TBOX search_box(left_limit, box.bottom(), box.left(), box.top());
+    search.StartRectSearch(search_box);
+    while ((part = search.NextRectSearch()) != nullptr) {
+      if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
+        const TBOX& part_box(part->bounding_box());
+        if (part_box.y_gap(box) < 0) {
+          if (part_box.right() > left_limit && part_box.right() < box.left()) {
+            left_limit = part_box.right();
+          }
+        }
+      }
+    }
+  }
+  return left_limit;
+}
+
+// Search for the leftmost text that overlaps vertically and is to the right
+// of the given box, but within the given right limit.
+static int ExpandImageRight(const TBOX& box, int right_limit,
+                            ColPartitionGrid* part_grid) {
+  ColPartitionGridSearch search(part_grid);
+  ColPartition* part;
+  // Search left to right for any text that overlaps.
+  search.StartSideSearch(box.right(), box.bottom(), box.top());
+  while ((part = search.NextSideSearch(false)) != nullptr) {
+    if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
+      const TBOX& part_box(part->bounding_box());
+      if (part_box.y_gap(box) < 0) {
+        if (part_box.left() < right_limit && part_box.left() > box.right())
+          right_limit = part_box.left();
+        break;
+      }
+    }
+  }
+  if (part != nullptr) {
+    // Search for the nearest text up to the one we already found.
+    TBOX search_box(box.left(), box.bottom(), right_limit, box.top());
+    search.StartRectSearch(search_box);
+    while ((part = search.NextRectSearch()) != nullptr) {
+      if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
+        const TBOX& part_box(part->bounding_box());
+        if (part_box.y_gap(box) < 0) {
+          if (part_box.left() < right_limit && part_box.left() > box.right())
+            right_limit = part_box.left();
+        }
+      }
+    }
+  }
+  return right_limit;
+}
+
+// Search for the topmost text that overlaps horizontally and is below
+// the given box, but within the given bottom limit.
+static int ExpandImageBottom(const TBOX& box, int bottom_limit,
+                             ColPartitionGrid* part_grid) {
+  ColPartitionGridSearch search(part_grid);
+  ColPartition* part;
+  // Search right to left for any text that overlaps.
+  search.StartVerticalSearch(box.left(), box.right(), box.bottom());
+  while ((part = search.NextVerticalSearch(true)) != nullptr) {
+    if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
+      const TBOX& part_box(part->bounding_box());
+      if (part_box.x_gap(box) < 0) {
+        if (part_box.top() > bottom_limit && part_box.top() < box.bottom())
+          bottom_limit = part_box.top();
+        break;
+      }
+    }
+  }
+  if (part != nullptr) {
+    // Search for the nearest text up to the one we already found.
+    TBOX search_box(box.left(), bottom_limit, box.right(), box.bottom());
+    search.StartRectSearch(search_box);
+    while ((part = search.NextRectSearch()) != nullptr) {
+      if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
+        const TBOX& part_box(part->bounding_box());
+        if (part_box.x_gap(box) < 0) {
+          if (part_box.top() > bottom_limit && part_box.top() < box.bottom())
+            bottom_limit = part_box.top();
+        }
+      }
+    }
+  }
+  return bottom_limit;
+}
+
+// Search for the bottommost text that overlaps horizontally and is above
+// the given box, but within the given top limit.
+static int ExpandImageTop(const TBOX& box, int top_limit,
+                          ColPartitionGrid* part_grid) {
+  ColPartitionGridSearch search(part_grid);
+  ColPartition* part;
+  // Search right to left for any text that overlaps.
+  search.StartVerticalSearch(box.left(), box.right(), box.top());
+  while ((part = search.NextVerticalSearch(false)) != nullptr) {
+    if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
+      const TBOX& part_box(part->bounding_box());
+      if (part_box.x_gap(box) < 0) {
+        if (part_box.bottom() < top_limit && part_box.bottom() > box.top())
+          top_limit = part_box.bottom();
+        break;
+      }
+    }
+  }
+  if (part != nullptr) {
+    // Search for the nearest text up to the one we already found.
+    TBOX search_box(box.left(), box.top(), box.right(), top_limit);
+    search.StartRectSearch(search_box);
+    while ((part = search.NextRectSearch()) != nullptr) {
+      if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
+        const TBOX& part_box(part->bounding_box());
+        if (part_box.x_gap(box) < 0) {
+          if (part_box.bottom() < top_limit && part_box.bottom() > box.top())
+            top_limit = part_box.bottom();
+        }
+      }
+    }
+  }
+  return top_limit;
+}
+
+// Expands the image box in the given direction until it hits text,
+// limiting the expansion to the given limit box, returning the result
+// in the expanded box, and
+// returning the increase in area resulting from the expansion.
+static int ExpandImageDir(BlobNeighbourDir dir, const TBOX& im_box,
+                          const TBOX& limit_box,
+                          ColPartitionGrid* part_grid, TBOX* expanded_box) {
+  *expanded_box = im_box;
+  switch (dir) {
+    case BND_LEFT:
+      expanded_box->set_left(ExpandImageLeft(im_box, limit_box.left(),
+                                             part_grid));
+      break;
+    case BND_RIGHT:
+      expanded_box->set_right(ExpandImageRight(im_box, limit_box.right(),
+                                               part_grid));
+      break;
+    case BND_ABOVE:
+      expanded_box->set_top(ExpandImageTop(im_box, limit_box.top(), part_grid));
+      break;
+    case BND_BELOW:
+      expanded_box->set_bottom(ExpandImageBottom(im_box, limit_box.bottom(),
+                                                 part_grid));
+      break;
+    default:
+      return 0;
+  }
+  return expanded_box->area() - im_box.area();
+}
+
+// Expands the image partition into any non-text until it touches text.
+// The expansion proceeds in the order of increasing increase in area
+// as a heuristic to find the best rectangle by expanding in the most
+// constrained direction first.
+static void MaximalImageBoundingBox(ColPartitionGrid* part_grid, TBOX* im_box) {
+  bool dunnit[BND_COUNT];
+  memset(dunnit, 0, sizeof(dunnit));
+  TBOX limit_box(part_grid->bleft().x(), part_grid->bleft().y(),
+                 part_grid->tright().x(), part_grid->tright().y());
+  TBOX text_box(*im_box);
+  for (int iteration = 0; iteration < BND_COUNT; ++iteration) {
+    // Find the direction with least area increase.
+    int best_delta = -1;
+    BlobNeighbourDir best_dir = BND_LEFT;
+    TBOX expanded_boxes[BND_COUNT];
+    for (int dir = 0; dir < BND_COUNT; ++dir) {
+      auto bnd = static_cast<BlobNeighbourDir>(dir);
+      if (!dunnit[bnd]) {
+        TBOX expanded_box;
+        int area_delta = ExpandImageDir(bnd, text_box, limit_box, part_grid,
+                                        &expanded_boxes[bnd]);
+        if (best_delta < 0 || area_delta < best_delta) {
+          best_delta = area_delta;
+          best_dir = bnd;
+        }
+      }
+    }
+    // Run the best and remember the direction.
+    dunnit[best_dir] = true;
+    text_box = expanded_boxes[best_dir];
+  }
+  *im_box = text_box;
+}
+
+// Helper deletes the given partition but first marks up all the blobs as
+// noise, so they get deleted later, and disowns them.
+// If the initial type of the partition is image, then it actually deletes
+// the blobs, as the partition owns them in that case.
+static void DeletePartition(ColPartition* part) {
+  BlobRegionType type = part->blob_type();
+  if (type == BRT_RECTIMAGE || type == BRT_POLYIMAGE) {
+    // The partition owns the boxes of these types, so just delete them.
+    part->DeleteBoxes();  // From a previous iteration.
+  } else {
+    // Once marked, the blobs will be swept up by TidyBlobs.
+    part->set_flow(BTFT_NONTEXT);
+    part->set_blob_type(BRT_NOISE);
+    part->SetBlobTypes();
+    part->DisownBoxes();  // Created before FindImagePartitions.
+  }
+  delete part;
+}
+
+// The meat of joining fragmented images and consuming ColPartitions of
+// uncertain type.
+// *part_ptr is an input/output BRT_RECTIMAGE ColPartition that is to be
+// expanded to consume overlapping and nearby ColPartitions of uncertain type
+// and other BRT_RECTIMAGE partitions, but NOT to be expanded beyond
+// max_image_box. *part_ptr is NOT in the part_grid.
+// rectsearch is already constructed on the part_grid, and is used for
+// searching for overlapping and nearby ColPartitions.
+// ExpandImageIntoParts is called iteratively until it returns false. Each
+// time it absorbs the nearest non-contained candidate, and everything that
+// is fully contained within part_ptr's bounding box.
+// TODO(rays) what if it just eats everything inside max_image_box in one go?
+static bool ExpandImageIntoParts(const TBOX& max_image_box,
+                                 ColPartitionGridSearch* rectsearch,
+                                 ColPartitionGrid* part_grid,
+                                 ColPartition** part_ptr) {
+  ColPartition* image_part = *part_ptr;
+  TBOX im_part_box = image_part->bounding_box();
+  if (textord_tabfind_show_images > 1) {
+    tprintf("Searching for merge with image part:");
+    im_part_box.print();
+    tprintf("Text box=");
+    max_image_box.print();
+  }
+  rectsearch->StartRectSearch(max_image_box);
+  ColPartition* part;
+  ColPartition* best_part = nullptr;
+  int best_dist = 0;
+  while ((part = rectsearch->NextRectSearch()) != nullptr) {
+    if (textord_tabfind_show_images > 1) {
+      tprintf("Considering merge with part:");
+      part->Print();
+      if (im_part_box.contains(part->bounding_box()))
+        tprintf("Fully contained\n");
+      else if (!max_image_box.contains(part->bounding_box()))
+        tprintf("Not within text box\n");
+      else if (part->flow() == BTFT_STRONG_CHAIN)
+        tprintf("Too strong text\n");
+      else
+        tprintf("Real candidate\n");
+    }
+    if (part->flow() == BTFT_STRONG_CHAIN ||
+        part->flow() == BTFT_TEXT_ON_IMAGE ||
+        part->blob_type() == BRT_POLYIMAGE)
+      continue;
+    TBOX box = part->bounding_box();
+    if (max_image_box.contains(box) && part->blob_type() != BRT_NOISE) {
+      if (im_part_box.contains(box)) {
+        // Eat it completely.
+        rectsearch->RemoveBBox();
+        DeletePartition(part);
+        continue;
+      }
+      int x_dist = std::max(0, box.x_gap(im_part_box));
+      int y_dist = std::max(0, box.y_gap(im_part_box));
+      int dist = x_dist * x_dist + y_dist * y_dist;
+      if (dist > box.area() || dist > im_part_box.area())
+        continue;  // Not close enough.
+      if (best_part == nullptr || dist < best_dist) {
+        // We keep the nearest qualifier, which is not necessarily the nearest.
+        best_part = part;
+        best_dist = dist;
+      }
+    }
+  }
+  if (best_part != nullptr) {
+    // It needs expanding. We can do it without touching text.
+    TBOX box = best_part->bounding_box();
+    if (textord_tabfind_show_images > 1) {
+      tprintf("Merging image part:");
+      im_part_box.print();
+      tprintf("with part:");
+      box.print();
+    }
+    im_part_box += box;
+    *part_ptr = ColPartition::FakePartition(im_part_box, PT_UNKNOWN,
+                                            BRT_RECTIMAGE,
+                                            BTFT_NONTEXT);
+    DeletePartition(image_part);
+    part_grid->RemoveBBox(best_part);
+    DeletePartition(best_part);
+    rectsearch->RepositionIterator();
+    return true;
+  }
+  return false;
+}
+
+// Helper function to compute the overlap area between the box and the
+// given list of partitions.
+static int IntersectArea(const TBOX& box, ColPartition_LIST* part_list) {
+  int intersect_area = 0;
+  ColPartition_IT part_it(part_list);
+  // Iterate the parts and subtract intersecting area.
+  for (part_it.mark_cycle_pt(); !part_it.cycled_list();
+       part_it.forward()) {
+    ColPartition* image_part = part_it.data();
+    TBOX intersect = box.intersection(image_part->bounding_box());
+    intersect_area += intersect.area();
+  }
+  return intersect_area;
+}
+
+// part_list is a set of ColPartitions representing a polygonal image, and
+// im_box is the union of the bounding boxes of all the parts in part_list.
+// Tests whether part is to be consumed by the polygonal image.
+// Returns true if part is weak text and more than half of its area is
+// intersected by parts from the part_list, and it is contained within im_box.
+static bool TestWeakIntersectedPart(const TBOX& im_box,
+                                    ColPartition_LIST* part_list,
+                                    ColPartition* part) {
+  if (part->flow() < BTFT_STRONG_CHAIN) {
+    // A weak partition intersects the box.
+    const TBOX& part_box = part->bounding_box();
+    if (im_box.contains(part_box)) {
+      int area = part_box.area();
+      int intersect_area = IntersectArea(part_box, part_list);
+      if (area < 2 * intersect_area) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+// A rectangular or polygonal image has been completed, in part_list, bounding
+// box in im_box. We want to eliminate weak text or other uncertain partitions
+// (basically anything that is not BRT_STRONG_CHAIN or better) from both the
+// part_grid and the big_parts list that are contained within im_box and
+// overlapped enough by the possibly polygonal image.
+static void EliminateWeakParts(const TBOX& im_box,
+                               ColPartitionGrid* part_grid,
+                               ColPartition_LIST* big_parts,
+                               ColPartition_LIST* part_list) {
+  ColPartitionGridSearch rectsearch(part_grid);
+  ColPartition* part;
+  rectsearch.StartRectSearch(im_box);
+  while ((part = rectsearch.NextRectSearch()) != nullptr) {
+    if (TestWeakIntersectedPart(im_box, part_list, part)) {
+      BlobRegionType type = part->blob_type();
+      if (type == BRT_POLYIMAGE || type == BRT_RECTIMAGE) {
+        rectsearch.RemoveBBox();
+        DeletePartition(part);
+      } else {
+        // The part is mostly covered, so mark it. Non-image partitions are
+        // kept hanging around to mark the image for pass2
+        part->set_flow(BTFT_NONTEXT);
+        part->set_blob_type(BRT_NOISE);
+        part->SetBlobTypes();
+      }
+    }
+  }
+  ColPartition_IT big_it(big_parts);
+  for (big_it.mark_cycle_pt(); !big_it.cycled_list(); big_it.forward()) {
+    part = big_it.data();
+    if (TestWeakIntersectedPart(im_box, part_list, part)) {
+      // Once marked, the blobs will be swept up by TidyBlobs.
+      DeletePartition(big_it.extract());
+    }
+  }
+}
+
+// Helper scans for good text partitions overlapping the given box.
+// If there are no good text partitions overlapping an expanded box, then
+// the box is expanded, otherwise, the original box is returned.
+// If good text overlaps the box, true is returned.
+static bool ScanForOverlappingText(ColPartitionGrid* part_grid, TBOX* box) {
+  ColPartitionGridSearch rectsearch(part_grid);
+  TBOX padded_box(*box);
+  padded_box.pad(kNoisePadding, kNoisePadding);
+  rectsearch.StartRectSearch(padded_box);
+  ColPartition* part;
+  bool any_text_in_padded_rect = false;
+  while ((part = rectsearch.NextRectSearch()) != nullptr) {
+    if (part->flow() == BTFT_CHAIN ||
+        part->flow() == BTFT_STRONG_CHAIN) {
+      // Text intersects the box.
+      any_text_in_padded_rect = true;
+      const TBOX& part_box = part->bounding_box();
+      if (box->overlap(part_box)) {
+        return true;
+      }
+    }
+  }
+  if (!any_text_in_padded_rect)
+    *box = padded_box;
+  return false;
+}
+
+// Renders the boxes of image parts from the supplied list onto the image_pix,
+// except where they interfere with existing strong text in the part_grid,
+// and then deletes them.
+// Box coordinates are rotated by rerotate to match the image.
+static void MarkAndDeleteImageParts(const FCOORD& rerotate,
+                                    ColPartitionGrid* part_grid,
+                                    ColPartition_LIST* image_parts,
+                                    Pix* image_pix) {
+  if (image_pix == nullptr)
+    return;
+  int imageheight = pixGetHeight(image_pix);
+  ColPartition_IT part_it(image_parts);
+  for (; !part_it.empty(); part_it.forward()) {
+    ColPartition* part = part_it.extract();
+    TBOX part_box = part->bounding_box();
+    BlobRegionType type = part->blob_type();
+    if (!ScanForOverlappingText(part_grid, &part_box) ||
+        type == BRT_RECTIMAGE || type == BRT_POLYIMAGE) {
+      // Mark the box on the image.
+      // All coords need to be rotated to match the image.
+      part_box.rotate(rerotate);
+      int left = part_box.left();
+      int top = part_box.top();
+      pixRasterop(image_pix, left, imageheight - top,
+                  part_box.width(), part_box.height(), PIX_SET, nullptr, 0, 0);
+    }
+    DeletePartition(part);
+  }
+}
+
+// Locates all the image partitions in the part_grid, that were found by a
+// previous call to FindImagePartitions, marks them in the image_mask,
+// removes them from the grid, and deletes them. This makes it possible to
+// call FindImagePartitions again to produce less broken-up and less
+// overlapping image partitions.
+// rerotation specifies how to rotate the partition coords to match
+// the image_mask, since this function is used after orientation correction.
+void ImageFind::TransferImagePartsToImageMask(const FCOORD& rerotation,
+                                              ColPartitionGrid* part_grid,
+                                              Pix* image_mask) {
+  // Extract the noise parts from the grid and put them on a temporary list.
+  ColPartition_LIST parts_list;
+  ColPartition_IT part_it(&parts_list);
+  ColPartitionGridSearch gsearch(part_grid);
+  gsearch.StartFullSearch();
+  ColPartition* part;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    BlobRegionType type = part->blob_type();
+    if (type  == BRT_NOISE || type == BRT_RECTIMAGE || type == BRT_POLYIMAGE) {
+      part_it.add_after_then_move(part);
+      gsearch.RemoveBBox();
+    }
+  }
+  // Render listed noise partitions to the image mask.
+  MarkAndDeleteImageParts(rerotation, part_grid, &parts_list, image_mask);
+}
+
+// Removes and deletes all image partitions that are too small to be worth
+// keeping. We have to do this as a separate phase after creating the image
+// partitions as the small images are needed to join the larger ones together.
+static void DeleteSmallImages(ColPartitionGrid* part_grid) {
+  if (part_grid != nullptr) return;
+  ColPartitionGridSearch gsearch(part_grid);
+  gsearch.StartFullSearch();
+  ColPartition* part;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    // Only delete rectangular images, since if it became a poly image, it
+    // is more evidence that it is somehow important.
+    if (part->blob_type() == BRT_RECTIMAGE) {
+      const TBOX& part_box = part->bounding_box();
+      if (part_box.width() < kMinImageFindSize ||
+          part_box.height() < kMinImageFindSize) {
+        // It is too small to keep. Just make it disappear.
+        gsearch.RemoveBBox();
+        DeletePartition(part);
+      }
+    }
+  }
+}
+
+// Runs a CC analysis on the image_pix mask image, and creates
+// image partitions from them, cutting out strong text, and merging with
+// nearby image regions such that they don't interfere with text.
+// Rotation and rerotation specify how to rotate image coords to match
+// the blob and partition coords and back again.
+// The input/output part_grid owns all the created partitions, and
+// the partitions own all the fake blobs that belong in the partitions.
+// Since the other blobs in the other partitions will be owned by the block,
+// ColPartitionGrid::ReTypeBlobs must be called afterwards to fix this
+// situation and collect the image blobs.
+void ImageFind::FindImagePartitions(Pix* image_pix, const FCOORD& rotation,
+                                    const FCOORD& rerotation, TO_BLOCK* block,
+                                    TabFind* tab_grid, DebugPixa* pixa_debug,
+                                    ColPartitionGrid* part_grid,
+                                    ColPartition_LIST* big_parts) {
+  int imageheight = pixGetHeight(image_pix);
+  Boxa* boxa;
+  Pixa* pixa;
+  ConnCompAndRectangularize(image_pix, pixa_debug, &boxa, &pixa);
+  // Iterate the connected components in the image regions mask.
+  int nboxes = 0;
+  if (boxa != nullptr && pixa != nullptr) nboxes = boxaGetCount(boxa);
+  for (int i = 0; i < nboxes; ++i) {
+    l_int32 x, y, width, height;
+    boxaGetBoxGeometry(boxa, i, &x, &y, &width, &height);
+    Pix* pix = pixaGetPix(pixa, i, L_CLONE);
+    TBOX im_box(x, imageheight -y - height, x + width, imageheight - y);
+    im_box.rotate(rotation);  // Now matches all partitions and blobs.
+    ColPartitionGridSearch rectsearch(part_grid);
+    rectsearch.SetUniqueMode(true);
+    ColPartition_LIST part_list;
+    DivideImageIntoParts(im_box, rotation, rerotation, pix,
+                         &rectsearch, &part_list);
+    if (textord_tabfind_show_images && pixa_debug != nullptr) {
+      pixa_debug->AddPix(pix, "ImageComponent");
+      tprintf("Component has %d parts\n", part_list.length());
+    }
+    pixDestroy(&pix);
+    if (!part_list.empty()) {
+      ColPartition_IT part_it(&part_list);
+      if (part_list.singleton()) {
+        // We didn't have to chop it into a polygon to fit around text, so
+        // try expanding it to merge fragmented image parts, as long as it
+        // doesn't touch strong text.
+        ColPartition* part = part_it.extract();
+        TBOX text_box(im_box);
+        MaximalImageBoundingBox(part_grid, &text_box);
+        while (ExpandImageIntoParts(text_box, &rectsearch, part_grid, &part));
+        part_it.set_to_list(&part_list);
+        part_it.add_after_then_move(part);
+        im_box = part->bounding_box();
+      }
+      EliminateWeakParts(im_box, part_grid, big_parts, &part_list);
+      // Iterate the part_list and put the parts into the grid.
+      for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
+        ColPartition* image_part = part_it.extract();
+        im_box = image_part->bounding_box();
+        part_grid->InsertBBox(true, true, image_part);
+        if (!part_it.at_last()) {
+          ColPartition* neighbour = part_it.data_relative(1);
+          image_part->AddPartner(false, neighbour);
+          neighbour->AddPartner(true, image_part);
+        }
+      }
+    }
+  }
+  boxaDestroy(&boxa);
+  pixaDestroy(&pixa);
+  DeleteSmallImages(part_grid);
+#ifndef GRAPHICS_DISABLED
+  if (textord_tabfind_show_images) {
+    ScrollView* images_win_ = part_grid->MakeWindow(1000, 400, "With Images");
+    part_grid->DisplayBoxes(images_win_);
+  }
+#endif
+}
+
+}  // namespace tesseract.
diff --git a/tesseract/src/textord/imagefind.h b/tesseract/src/textord/imagefind.h
new file mode 100644
index 00000000..57be6990
--- /dev/null
+++ b/tesseract/src/textord/imagefind.h
@@ -0,0 +1,159 @@
+///////////////////////////////////////////////////////////////////////
+// File:        imagefind.h
+// Description: Class to find image and drawing regions in an image
+//              and create a corresponding list of empty blobs.
+// Author:      Ray Smith
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TEXTORD_IMAGEFIND_H_
+#define TESSERACT_TEXTORD_IMAGEFIND_H_
+
+#include "debugpixa.h"
+
+#include <cstdint>
+
+struct Boxa;
+struct Pix;
+struct Pixa;
+
+namespace tesseract {
+
+class ColPartitionGrid;
+class ColPartition_LIST;
+class TabFind;
+class TBOX;
+class FCOORD;
+class TO_BLOCK;
+class BLOBNBOX_LIST;
+
+// The ImageFind class is a simple static function wrapper class that
+// exposes the FindImages function and some useful helper functions.
+class ImageFind {
+ public:
+  // Finds image regions within the BINARY source pix (page image) and returns
+  // the image regions as a mask image.
+  // The returned pix may be nullptr, meaning no images found.
+  // If not nullptr, it must be PixDestroyed by the caller.
+  // If textord_tabfind_show_images, debug images are appended to pixa_debug.
+  static Pix* FindImages(Pix* pix, DebugPixa* pixa_debug);
+
+  // Generates a Boxa, Pixa pair from the input binary (image mask) pix,
+  // analogous to pixConnComp, except that connected components which are nearly
+  // rectangular are replaced with solid rectangles.
+  // The returned boxa, pixa may be nullptr, meaning no images found.
+  // If not nullptr, they must be destroyed by the caller.
+  // Resolution of pix should match the source image (Tesseract::pix_binary_)
+  // so the output coordinate systems match.
+  static void ConnCompAndRectangularize(Pix* pix, DebugPixa* pixa_debug,
+                                        Boxa** boxa, Pixa** pixa);
+
+  // Returns true if there is a rectangle in the source pix, such that all
+  // pixel rows and column slices outside of it have less than
+  // min_fraction of the pixels black, and within max_skew_gradient fraction
+  // of the pixels on the inside, there are at least max_fraction of the
+  // pixels black. In other words, the inside of the rectangle looks roughly
+  // rectangular, and the outside of it looks like extra bits.
+  // On return, the rectangle is defined by x_start, y_start, x_end and y_end.
+  // Note: the algorithm is iterative, allowing it to slice off pixels from
+  // one edge, allowing it to then slice off more pixels from another edge.
+  static bool pixNearlyRectangular(Pix* pix,
+                                   double min_fraction, double max_fraction,
+                                   double max_skew_gradient,
+                                   int* x_start, int* y_start,
+                                   int* x_end, int* y_end);
+
+  // Given an input pix, and a bounding rectangle, the sides of the rectangle
+  // are shrunk inwards until they bound any black pixels found within the
+  // original rectangle. Returns false if the rectangle contains no black
+  // pixels at all.
+  static bool BoundsWithinRect(Pix* pix, int* x_start, int* y_start,
+                               int* x_end, int* y_end);
+
+  // Given a point in 3-D (RGB) space, returns the squared Euclidean distance
+  // of the point from the given line, defined by a pair of points in the 3-D
+  // (RGB) space, line1 and line2.
+  static double ColorDistanceFromLine(const uint8_t* line1, const uint8_t* line2,
+                                      const uint8_t* point);
+
+  // Returns the leptonica combined code for the given RGB triplet.
+  static uint32_t ComposeRGB(uint32_t r, uint32_t g, uint32_t b);
+
+  // Returns the input value clipped to a uint8_t.
+  static uint8_t ClipToByte(double pixel);
+
+  // Computes the light and dark extremes of color in the given rectangle of
+  // the given pix, which is factor smaller than the coordinate system in rect.
+  // The light and dark points are taken to be the upper and lower 8th-ile of
+  // the most deviant of R, G and B. The value of the other 2 channels are
+  // computed by linear fit against the most deviant.
+  // The colors of the two point are returned in color1 and color2, with the
+  // alpha channel set to a scaled mean rms of the fits.
+  // If color_map1 is not null then it and color_map2 get rect pasted in them
+  // with the two calculated colors, and rms map gets a pasted rect of the rms.
+  // color_map1, color_map2 and rms_map are assumed to be the same scale as pix.
+  static void ComputeRectangleColors(const TBOX& rect, Pix* pix, int factor,
+                                     Pix* color_map1, Pix* color_map2,
+                                     Pix* rms_map,
+                                     uint8_t* color1, uint8_t* color2);
+
+  // Returns true if there are no black pixels in between the boxes.
+  // The im_box must represent the bounding box of the pix in tesseract
+  // coordinates, which may be negative, due to rotations to make the textlines
+  // horizontal. The boxes are rotated by rotation, which should undo such
+  // rotations, before mapping them onto the pix.
+  static bool BlankImageInBetween(const TBOX& box1, const TBOX& box2,
+                                  const TBOX& im_box, const FCOORD& rotation,
+                                  Pix* pix);
+
+  // Returns the number of pixels in box in the pix.
+  // The im_box must represent the bounding box of the pix in tesseract
+  // coordinates, which may be negative, due to rotations to make the textlines
+  // horizontal. The boxes are rotated by rotation, which should undo such
+  // rotations, before mapping them onto the pix.
+  static int CountPixelsInRotatedBox(TBOX box, const TBOX& im_box,
+                                     const FCOORD& rotation, Pix* pix);
+
+
+  // Locates all the image partitions in the part_grid, that were found by a
+  // previous call to FindImagePartitions, marks them in the image_mask,
+  // removes them from the grid, and deletes them. This makes it possible to
+  // call FindImagePartitions again to produce less broken-up and less
+  // overlapping image partitions.
+  // rerotation specifies how to rotate the partition coords to match
+  // the image_mask, since this function is used after orientation correction.
+  static void TransferImagePartsToImageMask(const FCOORD& rerotation,
+                                            ColPartitionGrid* part_grid,
+                                            Pix* image_mask);
+
+  // Runs a CC analysis on the image_pix mask image, and creates
+  // image partitions from them, cutting out strong text, and merging with
+  // nearby image regions such that they don't interfere with text.
+  // Rotation and rerotation specify how to rotate image coords to match
+  // the blob and partition coords and back again.
+  // The input/output part_grid owns all the created partitions, and
+  // the partitions own all the fake blobs that belong in the partitions.
+  // Since the other blobs in the other partitions will be owned by the block,
+  // ColPartitionGrid::ReTypeBlobs must be called afterwards to fix this
+  // situation and collect the image blobs.
+  static void FindImagePartitions(Pix* image_pix, const FCOORD& rotation,
+                                  const FCOORD& rerotation, TO_BLOCK* block,
+                                  TabFind* tab_grid, DebugPixa* pixa_debug,
+                                  ColPartitionGrid* part_grid,
+                                  ColPartition_LIST* big_parts);
+};
+
+}  // namespace tesseract.
+
+#endif  // TESSERACT_TEXTORD_LINEFIND_H_
diff --git a/tesseract/src/textord/linefind.cpp b/tesseract/src/textord/linefind.cpp
new file mode 100644
index 00000000..d3763f31
--- /dev/null
+++ b/tesseract/src/textord/linefind.cpp
@@ -0,0 +1,769 @@
+///////////////////////////////////////////////////////////////////////
+// File:        linefind.cpp
+// Description: Class to find vertical lines in an image and create
+//              a corresponding list of empty blobs.
+// Author:      Ray Smith
+// Created:     Thu Mar 20 09:49:01 PDT 2008
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "linefind.h"
+#include "alignedblob.h"
+#include "tabvector.h"
+#include "blobbox.h"
+#include "edgblob.h"
+#if defined(USE_OPENCL)
+#include "openclwrapper.h" // for OpenclDevice
+#endif
+
+#include "allheaders.h"
+
+#include <algorithm>
+
+namespace tesseract {
+
+/// Denominator of resolution makes max pixel width to allow thin lines.
+const int kThinLineFraction = 20;
+/// Denominator of resolution makes min pixels to demand line lengths to be.
+const int kMinLineLengthFraction = 4;
+/// Spacing of cracks across the page to break up tall vertical lines.
+const int kCrackSpacing = 100;
+/// Grid size used by line finder. Not very critical.
+const int kLineFindGridSize = 50;
+// Min width of a line in pixels to be considered thick.
+const int kMinThickLineWidth = 12;
+// Max size of line residue. (The pixels that fail the long thin opening, and
+// therefore don't make it to the candidate line mask, but are nevertheless
+// part of the line.)
+const int kMaxLineResidue = 6;
+// Min length in inches of a line segment that exceeds kMinThickLineWidth in
+// thickness. (Such lines shouldn't break by simple image degradation.)
+const double kThickLengthMultiple = 0.75;
+// Max fraction of line box area that can be occupied by non-line pixels.
+const double kMaxNonLineDensity = 0.25;
+// Max height of a music stave in inches.
+const double kMaxStaveHeight = 1.0;
+// Minimum fraction of pixels in a music rectangle connected to the staves.
+const double kMinMusicPixelFraction = 0.75;
+
+// Erases the unused blobs from the line_pix image, taking into account
+// whether this was a horizontal or vertical line set.
+static void RemoveUnusedLineSegments(bool horizontal_lines,
+                                     BLOBNBOX_LIST* line_bblobs,
+                                     Pix* line_pix) {
+  int height = pixGetHeight(line_pix);
+  BLOBNBOX_IT bbox_it(line_bblobs);
+  for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
+    BLOBNBOX* blob = bbox_it.data();
+    if (blob->left_tab_type() != TT_VLINE) {
+      const TBOX& box = blob->bounding_box();
+      Box* pixbox = nullptr;
+      if (horizontal_lines) {
+        // Horizontal lines are in tess format and also have x and y flipped
+        // (to use FindVerticalAlignment) so we have to flip x and y and then
+        // convert to Leptonica by height - flipped x (ie the right edge).
+        // See GetLineBoxes for more explanation.
+        pixbox = boxCreate(box.bottom(), height - box.right(),
+                           box.height(), box.width());
+      } else {
+        // For vertical lines, just flip upside-down to convert to Leptonica.
+        // The y position of the box in Leptonica terms is the distance from
+        // the top of the image to the top of the box.
+        pixbox = boxCreate(box.left(), height - box.top(),
+                           box.width(), box.height());
+      }
+      pixClearInRect(line_pix, pixbox);
+      boxDestroy(&pixbox);
+    }
+  }
+}
+
+// Helper subtracts the line_pix image from the src_pix, and removes residue
+// as well by removing components that touch the line, but are not in the
+// non_line_pix mask. It is assumed that the non_line_pix mask has already
+// been prepared to required accuracy.
+static void SubtractLinesAndResidue(Pix* line_pix, Pix* non_line_pix,
+                                    int resolution, Pix* src_pix) {
+  // First remove the lines themselves.
+  pixSubtract(src_pix, src_pix, line_pix);
+  // Subtract the non-lines from the image to get the residue.
+  Pix* residue_pix = pixSubtract(nullptr, src_pix, non_line_pix);
+  // Dilate the lines so they touch the residue.
+  Pix* fat_line_pix = pixDilateBrick(nullptr, line_pix, 3, 3);
+  // Seed fill the fat lines to get all the residue.
+  pixSeedfillBinary(fat_line_pix, fat_line_pix, residue_pix, 8);
+  // Subtract the residue from the original image.
+  pixSubtract(src_pix, src_pix, fat_line_pix);
+  pixDestroy(&fat_line_pix);
+  pixDestroy(&residue_pix);
+}
+
+// Returns the maximum strokewidth in the given binary image by doubling
+// the maximum of the distance function.
+static int MaxStrokeWidth(Pix* pix) {
+  Pix* dist_pix = pixDistanceFunction(pix, 4, 8, L_BOUNDARY_BG);
+  int width = pixGetWidth(dist_pix);
+  int height = pixGetHeight(dist_pix);
+  int wpl = pixGetWpl(dist_pix);
+  l_uint32* data = pixGetData(dist_pix);
+  // Find the maximum value in the distance image.
+  int max_dist = 0;
+  for (int y = 0; y < height; ++y) {
+    for (int x = 0; x < width; ++x) {
+      int pixel = GET_DATA_BYTE(data, x);
+      if (pixel > max_dist)
+        max_dist = pixel;
+    }
+    data += wpl;
+  }
+  pixDestroy(&dist_pix);
+  return max_dist * 2;
+}
+
+// Returns the number of components in the intersection_pix touched by line_box.
+static int NumTouchingIntersections(Box* line_box, Pix* intersection_pix) {
+  if (intersection_pix == nullptr) return 0;
+  Pix* rect_pix = pixClipRectangle(intersection_pix, line_box, nullptr);
+  Boxa* boxa = pixConnComp(rect_pix, nullptr, 8);
+  pixDestroy(&rect_pix);
+  if (boxa == nullptr) return false;
+  int result = boxaGetCount(boxa);
+  boxaDestroy(&boxa);
+  return result;
+}
+
+// Returns the number of black pixels found in the box made by adding the line
+// width to both sides of the line bounding box. (Increasing the smallest
+// dimension of the bounding box.)
+static int CountPixelsAdjacentToLine(int line_width, Box* line_box,
+                                     Pix* nonline_pix) {
+  l_int32 x, y, box_width, box_height;
+  boxGetGeometry(line_box, &x, &y, &box_width, &box_height);
+  if (box_width > box_height) {
+    // horizontal line.
+    int bottom = std::min(pixGetHeight(nonline_pix), y + box_height + line_width);
+    y = std::max(0, y - line_width);
+    box_height = bottom - y;
+  } else {
+    // Vertical line.
+    int right = std::min(pixGetWidth(nonline_pix), x + box_width + line_width);
+    x = std::max(0, x - line_width);
+    box_width = right - x;
+  }
+  Box* box = boxCreate(x, y, box_width, box_height);
+  Pix* rect_pix = pixClipRectangle(nonline_pix, box, nullptr);
+  boxDestroy(&box);
+  l_int32 result;
+  pixCountPixels(rect_pix, &result, nullptr);
+  pixDestroy(&rect_pix);
+  return result;
+}
+
+// Helper erases false-positive line segments from the input/output line_pix.
+// 1. Since thick lines shouldn't really break up, we can eliminate some false
+//    positives by marking segments that are at least kMinThickLineWidth
+//    thickness, yet have a length less than min_thick_length.
+// 2. Lines that don't have at least 2 intersections with other lines and have
+//    a lot of neighbouring non-lines are probably not lines (perhaps arabic
+//    or Hindi words, or underlines.)
+// Bad line components are erased from line_pix.
+// Returns the number of remaining connected components.
+static int FilterFalsePositives(int resolution, Pix* nonline_pix,
+                                Pix* intersection_pix, Pix* line_pix) {
+  int min_thick_length = static_cast<int>(resolution * kThickLengthMultiple);
+  Pixa* pixa = nullptr;
+  Boxa* boxa = pixConnComp(line_pix, &pixa, 8);
+  // Iterate over the boxes to remove false positives.
+  int nboxes = boxaGetCount(boxa);
+  int remaining_boxes = nboxes;
+  for (int i = 0; i < nboxes; ++i) {
+    Box* box = boxaGetBox(boxa, i, L_CLONE);
+    l_int32 x, y, box_width, box_height;
+    boxGetGeometry(box, &x, &y, &box_width, &box_height);
+    Pix* comp_pix = pixaGetPix(pixa, i, L_CLONE);
+    int max_width = MaxStrokeWidth(comp_pix);
+    pixDestroy(&comp_pix);
+    bool bad_line = false;
+    // If the length is too short to stand-alone as a line, and the box width
+    // is thick enough, and the stroke width is thick enough it is bad.
+    if (box_width >= kMinThickLineWidth && box_height >= kMinThickLineWidth &&
+        box_width < min_thick_length && box_height < min_thick_length &&
+        max_width > kMinThickLineWidth) {
+      // Too thick for the length.
+      bad_line = true;
+    }
+    if (!bad_line &&
+        (intersection_pix == nullptr ||
+        NumTouchingIntersections(box, intersection_pix) < 2)) {
+      // Test non-line density near the line.
+      int nonline_count = CountPixelsAdjacentToLine(max_width, box,
+                                                    nonline_pix);
+      if (nonline_count > box_height * box_width * kMaxNonLineDensity)
+        bad_line = true;
+    }
+    if (bad_line) {
+      // Not a good line.
+      pixClearInRect(line_pix, box);
+      --remaining_boxes;
+    }
+    boxDestroy(&box);
+  }
+  pixaDestroy(&pixa);
+  boxaDestroy(&boxa);
+  return remaining_boxes;
+}
+
+// Finds vertical and horizontal line objects in the given pix.
+// Uses the given resolution to determine size thresholds instead of any
+// that may be present in the pix.
+// The output vertical_x and vertical_y contain a sum of the output vectors,
+// thereby giving the mean vertical direction.
+// If pix_music_mask != nullptr, and music is detected, a mask of the staves
+// and anything that is connected (bars, notes etc.) will be returned in
+// pix_music_mask, the mask subtracted from pix, and the lines will not
+// appear in v_lines or h_lines.
+// The output vectors are owned by the list and Frozen (cannot refit) by
+// having no boxes, as there is no need to refit or merge separator lines.
+// The detected lines are removed from the pix.
+void LineFinder::FindAndRemoveLines(int resolution, bool debug, Pix* pix,
+                                    int* vertical_x, int* vertical_y,
+                                    Pix** pix_music_mask,
+                                    TabVector_LIST* v_lines,
+                                    TabVector_LIST* h_lines) {
+  if (pix == nullptr || vertical_x == nullptr || vertical_y == nullptr) {
+    tprintf("Error in parameters for LineFinder::FindAndRemoveLines\n");
+    return;
+  }
+  Pix* pix_vline = nullptr;
+  Pix* pix_non_vline = nullptr;
+  Pix* pix_hline = nullptr;
+  Pix* pix_non_hline = nullptr;
+  Pix* pix_intersections = nullptr;
+  Pixa* pixa_display = debug ? pixaCreate(0) : nullptr;
+  GetLineMasks(resolution, pix, &pix_vline, &pix_non_vline, &pix_hline,
+               &pix_non_hline, &pix_intersections, pix_music_mask,
+               pixa_display);
+  // Find lines, convert to TabVector_LIST and remove those that are used.
+  FindAndRemoveVLines(resolution, pix_intersections, vertical_x, vertical_y,
+                      &pix_vline, pix_non_vline, pix, v_lines);
+  if (pix_hline != nullptr) {
+    // Recompute intersections and re-filter false positive h-lines.
+    if (pix_vline != nullptr)
+      pixAnd(pix_intersections, pix_vline, pix_hline);
+    else
+      pixDestroy(&pix_intersections);
+    if (!FilterFalsePositives(resolution, pix_non_hline, pix_intersections,
+                              pix_hline)) {
+      pixDestroy(&pix_hline);
+    }
+  }
+  FindAndRemoveHLines(resolution, pix_intersections, *vertical_x, *vertical_y,
+                      &pix_hline, pix_non_hline, pix, h_lines);
+  if (pixa_display != nullptr && pix_vline != nullptr)
+    pixaAddPix(pixa_display, pix_vline, L_CLONE);
+  if (pixa_display != nullptr && pix_hline != nullptr)
+    pixaAddPix(pixa_display, pix_hline, L_CLONE);
+  if (pix_vline != nullptr && pix_hline != nullptr) {
+    // Remove joins (intersections) where lines cross, and the residue.
+    // Recalculate the intersections, since some lines have been deleted.
+    pixAnd(pix_intersections, pix_vline, pix_hline);
+    // Fatten up the intersections and seed-fill to get the intersection
+    // residue.
+    Pix* pix_join_residue = pixDilateBrick(nullptr, pix_intersections, 5, 5);
+    pixSeedfillBinary(pix_join_residue, pix_join_residue, pix, 8);
+    // Now remove the intersection residue.
+    pixSubtract(pix, pix, pix_join_residue);
+    pixDestroy(&pix_join_residue);
+  }
+  // Remove any detected music.
+  if (pix_music_mask != nullptr && *pix_music_mask != nullptr) {
+    if (pixa_display != nullptr)
+      pixaAddPix(pixa_display, *pix_music_mask, L_CLONE);
+    pixSubtract(pix, pix, *pix_music_mask);
+  }
+  if (pixa_display != nullptr)
+    pixaAddPix(pixa_display, pix, L_CLONE);
+
+  pixDestroy(&pix_vline);
+  pixDestroy(&pix_non_vline);
+  pixDestroy(&pix_hline);
+  pixDestroy(&pix_non_hline);
+  pixDestroy(&pix_intersections);
+  if (pixa_display != nullptr) {
+    pixaConvertToPdf(pixa_display, resolution, 1.0f, 0, 0, "LineFinding",
+                     "vhlinefinding.pdf");
+    pixaDestroy(&pixa_display);
+  }
+}
+
+// Converts the Boxa array to a list of C_BLOB, getting rid of severely
+// overlapping outlines and those that are children of a bigger one.
+// The output is a list of C_BLOBs that are owned by the list.
+// The C_OUTLINEs in the C_BLOBs contain no outline data - just empty
+// bounding boxes. The Boxa is consumed and destroyed.
+void LineFinder::ConvertBoxaToBlobs(int image_width, int image_height,
+                                    Boxa** boxes, C_BLOB_LIST* blobs) {
+  C_OUTLINE_LIST outlines;
+  C_OUTLINE_IT ol_it = &outlines;
+  // Iterate the boxes to convert to outlines.
+  int nboxes = boxaGetCount(*boxes);
+  for (int i = 0; i < nboxes; ++i) {
+    l_int32 x, y, width, height;
+    boxaGetBoxGeometry(*boxes, i, &x, &y, &width, &height);
+    // Make a C_OUTLINE from the leptonica box. This is a bit of a hack,
+    // as there is no outline, just a bounding box, but with some very
+    // small changes to coutln.cpp, it works nicely.
+    ICOORD top_left(x, y);
+    ICOORD bot_right(x + width, y + height);
+    CRACKEDGE startpt;
+    startpt.pos = top_left;
+    auto* outline = new C_OUTLINE(&startpt, top_left, bot_right, 0);
+    ol_it.add_after_then_move(outline);
+  }
+  // Use outlines_to_blobs to convert the outlines to blobs and find
+  // overlapping and contained objects. The output list of blobs in the block
+  // has all the bad ones filtered out and deleted.
+  BLOCK block;
+  ICOORD page_tl(0, 0);
+  ICOORD page_br(image_width, image_height);
+  outlines_to_blobs(&block, page_tl, page_br, &outlines);
+  // Transfer the created blobs to the output list.
+  C_BLOB_IT blob_it(blobs);
+  blob_it.add_list_after(block.blob_list());
+  // The boxes aren't needed any more.
+  boxaDestroy(boxes);
+}
+
+// Finds vertical line objects in pix_vline and removes the from src_pix.
+// Uses the given resolution to determine size thresholds instead of any
+// that may be present in the pix.
+// The output vertical_x and vertical_y contain a sum of the output vectors,
+// thereby giving the mean vertical direction.
+// The output vectors are owned by the list and Frozen (cannot refit) by
+// having no boxes, as there is no need to refit or merge separator lines.
+// If no good lines are found, pix_vline is destroyed.
+// None of the input pointers may be nullptr, and if *pix_vline is nullptr then
+// the function does nothing.
+void LineFinder::FindAndRemoveVLines(int resolution,
+                                     Pix* pix_intersections,
+                                     int* vertical_x, int* vertical_y,
+                                     Pix** pix_vline, Pix* pix_non_vline,
+                                     Pix* src_pix, TabVector_LIST* vectors) {
+  if (pix_vline == nullptr || *pix_vline == nullptr) return;
+  C_BLOB_LIST line_cblobs;
+  BLOBNBOX_LIST line_bblobs;
+  GetLineBoxes(false, *pix_vline, pix_intersections,
+               &line_cblobs, &line_bblobs);
+  int width = pixGetWidth(src_pix);
+  int height = pixGetHeight(src_pix);
+  ICOORD bleft(0, 0);
+  ICOORD tright(width, height);
+  FindLineVectors(bleft, tright, &line_bblobs, vertical_x, vertical_y, vectors);
+  if (!vectors->empty()) {
+    RemoveUnusedLineSegments(false, &line_bblobs, *pix_vline);
+    SubtractLinesAndResidue(*pix_vline, pix_non_vline, resolution, src_pix);
+    ICOORD vertical;
+    vertical.set_with_shrink(*vertical_x, *vertical_y);
+    TabVector::MergeSimilarTabVectors(vertical, vectors, nullptr);
+  } else {
+    pixDestroy(pix_vline);
+  }
+}
+
+// Finds horizontal line objects in pix_hline and removes them from src_pix.
+// Uses the given resolution to determine size thresholds instead of any
+// that may be present in the pix.
+// The output vertical_x and vertical_y contain a sum of the output vectors,
+// thereby giving the mean vertical direction.
+// The output vectors are owned by the list and Frozen (cannot refit) by
+// having no boxes, as there is no need to refit or merge separator lines.
+// If no good lines are found, pix_hline is destroyed.
+// None of the input pointers may be nullptr, and if *pix_hline is nullptr then
+// the function does nothing.
+void LineFinder::FindAndRemoveHLines(int resolution,
+                                     Pix* pix_intersections,
+                                     int vertical_x, int vertical_y,
+                                     Pix** pix_hline, Pix* pix_non_hline,
+                                     Pix* src_pix, TabVector_LIST* vectors) {
+  if (pix_hline == nullptr || *pix_hline == nullptr) return;
+  C_BLOB_LIST line_cblobs;
+  BLOBNBOX_LIST line_bblobs;
+  GetLineBoxes(true, *pix_hline, pix_intersections, &line_cblobs, &line_bblobs);
+  int width = pixGetWidth(src_pix);
+  int height = pixGetHeight(src_pix);
+  ICOORD bleft(0, 0);
+  ICOORD tright(height, width);
+  FindLineVectors(bleft, tright, &line_bblobs, &vertical_x, &vertical_y,
+                  vectors);
+  if (!vectors->empty()) {
+    RemoveUnusedLineSegments(true, &line_bblobs, *pix_hline);
+    SubtractLinesAndResidue(*pix_hline, pix_non_hline, resolution, src_pix);
+    ICOORD vertical;
+    vertical.set_with_shrink(vertical_x, vertical_y);
+    TabVector::MergeSimilarTabVectors(vertical, vectors, nullptr);
+    // Iterate the vectors to flip them. x and y were flipped for horizontal
+    // lines, so FindLineVectors can work just with the vertical case.
+    // See GetLineBoxes for more on the flip.
+    TabVector_IT h_it(vectors);
+    for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
+      h_it.data()->XYFlip();
+    }
+  } else {
+    pixDestroy(pix_hline);
+  }
+}
+
+// Finds vertical lines in the given list of BLOBNBOXes. bleft and tright
+// are the bounds of the image on which the input line_bblobs were found.
+// The input line_bblobs list is const really.
+// The output vertical_x and vertical_y are the total of all the vectors.
+// The output list of TabVector makes no reference to the input BLOBNBOXes.
+void LineFinder::FindLineVectors(const ICOORD& bleft, const ICOORD& tright,
+                                 BLOBNBOX_LIST* line_bblobs,
+                                 int* vertical_x, int* vertical_y,
+                                 TabVector_LIST* vectors) {
+  BLOBNBOX_IT bbox_it(line_bblobs);
+  int b_count = 0;
+  // Put all the blobs into the grid to find the lines, and move the blobs
+  // to the output lists.
+  AlignedBlob blob_grid(kLineFindGridSize, bleft, tright);
+  for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
+    BLOBNBOX* bblob = bbox_it.data();
+    bblob->set_left_tab_type(TT_MAYBE_ALIGNED);
+    bblob->set_left_rule(bleft.x());
+    bblob->set_right_rule(tright.x());
+    bblob->set_left_crossing_rule(bleft.x());
+    bblob->set_right_crossing_rule(tright.x());
+    blob_grid.InsertBBox(false, true, bblob);
+    ++b_count;
+  }
+  if (b_count == 0)
+    return;
+
+  // Search the entire grid, looking for vertical line vectors.
+  BlobGridSearch lsearch(&blob_grid);
+  BLOBNBOX* bbox;
+  TabVector_IT vector_it(vectors);
+  *vertical_x = 0;
+  *vertical_y = 1;
+  lsearch.StartFullSearch();
+  while ((bbox = lsearch.NextFullSearch()) != nullptr) {
+    if (bbox->left_tab_type() == TT_MAYBE_ALIGNED) {
+      const TBOX& box = bbox->bounding_box();
+      if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom()))
+        tprintf("Finding line vector starting at bbox (%d,%d)\n",
+                box.left(), box.bottom());
+      AlignedBlobParams align_params(*vertical_x, *vertical_y, box.width());
+      TabVector* vector = blob_grid.FindVerticalAlignment(align_params, bbox,
+                                                          vertical_x,
+                                                          vertical_y);
+      if (vector != nullptr) {
+        vector->Freeze();
+        vector_it.add_to_end(vector);
+      }
+    }
+  }
+}
+
+// Returns a Pix music mask if music is detected.
+// Any vertical line that has at least 5 intersections in sufficient density
+// is taken to be a bar. Bars are used as a seed and the entire touching
+// component is added to the output music mask and subtracted from the lines.
+// Returns nullptr and does minimal work if no music is found.
+static Pix* FilterMusic(int resolution, Pix* pix_closed,
+                        Pix* pix_vline, Pix* pix_hline,
+                        l_int32* v_empty, l_int32* h_empty) {
+  int max_stave_height = static_cast<int>(resolution * kMaxStaveHeight);
+  Pix* intersection_pix = pixAnd(nullptr, pix_vline, pix_hline);
+  Boxa* boxa = pixConnComp(pix_vline, nullptr, 8);
+  // Iterate over the boxes to find music bars.
+  int nboxes = boxaGetCount(boxa);
+  Pix* music_mask = nullptr;
+  for (int i = 0; i < nboxes; ++i) {
+    Box* box = boxaGetBox(boxa, i, L_CLONE);
+    l_int32 x, y, box_width, box_height;
+    boxGetGeometry(box, &x, &y, &box_width, &box_height);
+    int joins = NumTouchingIntersections(box, intersection_pix);
+    // Test for the join density being at least 5 per max_stave_height,
+    // ie (joins-1)/box_height >= (5-1)/max_stave_height.
+    if (joins >= 5 && (joins - 1) * max_stave_height >= 4 * box_height) {
+      // This is a music bar. Add to the mask.
+      if (music_mask == nullptr)
+        music_mask = pixCreate(pixGetWidth(pix_vline), pixGetHeight(pix_vline),
+                               1);
+      pixSetInRect(music_mask, box);
+    }
+    boxDestroy(&box);
+  }
+  boxaDestroy(&boxa);
+  pixDestroy(&intersection_pix);
+  if (music_mask != nullptr) {
+    // The mask currently contains just the bars. Use the mask as a seed
+    // and the pix_closed as the mask for a seedfill to get all the
+    // intersecting staves.
+    pixSeedfillBinary(music_mask, music_mask, pix_closed, 8);
+    // Filter out false positives. CCs in the music_mask should be the vast
+    // majority of the pixels in their bounding boxes, as we expect just a
+    // tiny amount of text, a few phrase marks, and crescendo etc left.
+    Boxa* boxa = pixConnComp(music_mask, nullptr, 8);
+    // Iterate over the boxes to find music components.
+    int nboxes = boxaGetCount(boxa);
+    for (int i = 0; i < nboxes; ++i) {
+      Box* box = boxaGetBox(boxa, i, L_CLONE);
+      Pix* rect_pix = pixClipRectangle(music_mask, box, nullptr);
+      l_int32 music_pixels;
+      pixCountPixels(rect_pix, &music_pixels, nullptr);
+      pixDestroy(&rect_pix);
+      rect_pix = pixClipRectangle(pix_closed, box, nullptr);
+      l_int32 all_pixels;
+      pixCountPixels(rect_pix, &all_pixels, nullptr);
+      pixDestroy(&rect_pix);
+      if (music_pixels < kMinMusicPixelFraction * all_pixels) {
+        // False positive. Delete from the music mask.
+        pixClearInRect(music_mask, box);
+      }
+      boxDestroy(&box);
+    }
+    l_int32 no_remaining_music;
+    boxaDestroy(&boxa);
+    pixZero(music_mask, &no_remaining_music);
+    if (no_remaining_music) {
+      pixDestroy(&music_mask);
+    } else {
+      pixSubtract(pix_vline, pix_vline, music_mask);
+      pixSubtract(pix_hline, pix_hline, music_mask);
+      // We may have deleted all the lines
+      pixZero(pix_vline, v_empty);
+      pixZero(pix_hline, h_empty);
+    }
+  }
+  return music_mask;
+}
+
+// Most of the heavy lifting of line finding. Given src_pix and its separate
+// resolution, returns image masks:
+// pix_vline           candidate vertical lines.
+// pix_non_vline       pixels that didn't look like vertical lines.
+// pix_hline           candidate horizontal lines.
+// pix_non_hline       pixels that didn't look like horizontal lines.
+// pix_intersections   pixels where vertical and horizontal lines meet.
+// pix_music_mask      candidate music staves.
+// This function promises to initialize all the output (2nd level) pointers,
+// but any of the returns that are empty will be nullptr on output.
+// None of the input (1st level) pointers may be nullptr except pix_music_mask,
+// which will disable music detection, and pixa_display.
+void LineFinder::GetLineMasks(int resolution, Pix* src_pix,
+                              Pix** pix_vline, Pix** pix_non_vline,
+                              Pix** pix_hline, Pix** pix_non_hline,
+                              Pix** pix_intersections, Pix** pix_music_mask,
+                              Pixa* pixa_display) {
+  Pix* pix_closed = nullptr;
+  Pix* pix_hollow = nullptr;
+
+  int max_line_width = resolution / kThinLineFraction;
+  int min_line_length = resolution / kMinLineLengthFraction;
+  if (pixa_display != nullptr) {
+    tprintf("Image resolution = %d, max line width = %d, min length=%d\n",
+            resolution, max_line_width, min_line_length);
+  }
+  int closing_brick = max_line_width / 3;
+
+// only use opencl if compiled w/ OpenCL and selected device is opencl
+#ifdef USE_OPENCL
+  if (OpenclDevice::selectedDeviceIsOpenCL()) {
+    // OpenCL pixGetLines Operation
+    int clStatus = OpenclDevice::initMorphCLAllocations(pixGetWpl(src_pix),
+                                                        pixGetHeight(src_pix),
+                                                        src_pix);
+    bool getpixclosed = pix_music_mask != nullptr;
+    OpenclDevice::pixGetLinesCL(nullptr, src_pix, pix_vline, pix_hline,
+                                &pix_closed, getpixclosed, closing_brick,
+                                closing_brick, max_line_width, max_line_width,
+                                min_line_length, min_line_length);
+  } else {
+#endif
+  // Close up small holes, making it less likely that false alarms are found
+  // in thickened text (as it will become more solid) and also smoothing over
+  // some line breaks and nicks in the edges of the lines.
+  pix_closed = pixCloseBrick(nullptr, src_pix, closing_brick, closing_brick);
+  if (pixa_display != nullptr)
+    pixaAddPix(pixa_display, pix_closed, L_CLONE);
+  // Open up with a big box to detect solid areas, which can then be subtracted.
+  // This is very generous and will leave in even quite wide lines.
+  Pix* pix_solid = pixOpenBrick(nullptr, pix_closed, max_line_width,
+                                max_line_width);
+  if (pixa_display != nullptr)
+    pixaAddPix(pixa_display, pix_solid, L_CLONE);
+  pix_hollow = pixSubtract(nullptr, pix_closed, pix_solid);
+
+  pixDestroy(&pix_solid);
+
+  // Now open up in both directions independently to find lines of at least
+  // 1 inch/kMinLineLengthFraction in length.
+  if (pixa_display != nullptr)
+    pixaAddPix(pixa_display, pix_hollow, L_CLONE);
+  *pix_vline = pixOpenBrick(nullptr, pix_hollow, 1, min_line_length);
+  *pix_hline = pixOpenBrick(nullptr, pix_hollow, min_line_length, 1);
+
+  pixDestroy(&pix_hollow);
+#ifdef USE_OPENCL
+  }
+#endif
+
+  // Lines are sufficiently rare, that it is worth checking for a zero image.
+  l_int32 v_empty = 0;
+  l_int32 h_empty = 0;
+  pixZero(*pix_vline, &v_empty);
+  pixZero(*pix_hline, &h_empty);
+  if (pix_music_mask != nullptr) {
+    if (!v_empty && !h_empty) {
+      *pix_music_mask = FilterMusic(resolution, pix_closed,
+                                    *pix_vline, *pix_hline,
+                                    &v_empty, &h_empty);
+    } else {
+      *pix_music_mask = nullptr;
+    }
+  }
+  pixDestroy(&pix_closed);
+  Pix* pix_nonlines = nullptr;
+  *pix_intersections = nullptr;
+  Pix* extra_non_hlines = nullptr;
+  if (!v_empty) {
+    // Subtract both line candidates from the source to get definite non-lines.
+    pix_nonlines = pixSubtract(nullptr, src_pix, *pix_vline);
+    if (!h_empty) {
+      pixSubtract(pix_nonlines, pix_nonlines, *pix_hline);
+      // Intersections are a useful indicator for likelihood of being a line.
+      *pix_intersections = pixAnd(nullptr, *pix_vline, *pix_hline);
+      // Candidate vlines are not hlines (apart from the intersections)
+      // and vice versa.
+      extra_non_hlines = pixSubtract(nullptr, *pix_vline, *pix_intersections);
+    }
+    *pix_non_vline = pixErodeBrick(nullptr, pix_nonlines, kMaxLineResidue, 1);
+    pixSeedfillBinary(*pix_non_vline, *pix_non_vline, pix_nonlines, 8);
+    if (!h_empty) {
+      // Candidate hlines are not vlines.
+      pixOr(*pix_non_vline, *pix_non_vline, *pix_hline);
+      pixSubtract(*pix_non_vline, *pix_non_vline, *pix_intersections);
+    }
+    if (!FilterFalsePositives(resolution, *pix_non_vline, *pix_intersections,
+                              *pix_vline))
+      pixDestroy(pix_vline);  // No candidates left.
+  } else {
+    // No vertical lines.
+    pixDestroy(pix_vline);
+    *pix_non_vline = nullptr;
+    if (!h_empty) {
+      pix_nonlines = pixSubtract(nullptr, src_pix, *pix_hline);
+    }
+  }
+  if (h_empty) {
+    pixDestroy(pix_hline);
+    *pix_non_hline = nullptr;
+    if (v_empty) {
+      return;
+    }
+  } else {
+    *pix_non_hline = pixErodeBrick(nullptr, pix_nonlines, 1, kMaxLineResidue);
+    pixSeedfillBinary(*pix_non_hline, *pix_non_hline, pix_nonlines, 8);
+    if (extra_non_hlines != nullptr) {
+      pixOr(*pix_non_hline, *pix_non_hline, extra_non_hlines);
+      pixDestroy(&extra_non_hlines);
+    }
+    if (!FilterFalsePositives(resolution, *pix_non_hline, *pix_intersections,
+                              *pix_hline))
+      pixDestroy(pix_hline);  // No candidates left.
+  }
+  if (pixa_display != nullptr) {
+    if (*pix_vline != nullptr) pixaAddPix(pixa_display, *pix_vline, L_CLONE);
+    if (*pix_hline != nullptr) pixaAddPix(pixa_display, *pix_hline, L_CLONE);
+    if (pix_nonlines != nullptr) pixaAddPix(pixa_display, pix_nonlines, L_CLONE);
+    if (*pix_non_vline != nullptr)
+      pixaAddPix(pixa_display, *pix_non_vline, L_CLONE);
+    if (*pix_non_hline != nullptr)
+      pixaAddPix(pixa_display, *pix_non_hline, L_CLONE);
+    if (*pix_intersections != nullptr)
+      pixaAddPix(pixa_display, *pix_intersections, L_CLONE);
+    if (pix_music_mask != nullptr && *pix_music_mask != nullptr)
+      pixaAddPix(pixa_display, *pix_music_mask, L_CLONE);
+  }
+  pixDestroy(&pix_nonlines);
+}
+
+// Returns a list of boxes corresponding to the candidate line segments. Sets
+// the line_crossings member of the boxes so we can later determine the number
+// of intersections touched by a full line.
+void LineFinder::GetLineBoxes(bool horizontal_lines,
+                              Pix* pix_lines, Pix* pix_intersections,
+                              C_BLOB_LIST* line_cblobs,
+                              BLOBNBOX_LIST* line_bblobs) {
+  // Put a single pixel crack in every line at an arbitrary spacing,
+  // so they break up and the bounding boxes can be used to get the
+  // direction accurately enough without needing outlines.
+  int wpl = pixGetWpl(pix_lines);
+  int width = pixGetWidth(pix_lines);
+  int height = pixGetHeight(pix_lines);
+  l_uint32* data = pixGetData(pix_lines);
+  if (horizontal_lines) {
+    for (int y = 0; y < height; ++y, data += wpl) {
+      for (int x = kCrackSpacing; x < width; x += kCrackSpacing) {
+        CLEAR_DATA_BIT(data, x);
+      }
+    }
+  } else {
+    for (int y = kCrackSpacing; y < height; y += kCrackSpacing) {
+      memset(data + wpl * y, 0, wpl * sizeof(*data));
+    }
+  }
+  // Get the individual connected components
+  Boxa* boxa = pixConnComp(pix_lines, nullptr, 8);
+  ConvertBoxaToBlobs(width, height, &boxa, line_cblobs);
+  // Make the BLOBNBOXes from the C_BLOBs.
+  C_BLOB_IT blob_it(line_cblobs);
+  BLOBNBOX_IT bbox_it(line_bblobs);
+  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+    C_BLOB* cblob = blob_it.data();
+    auto* bblob = new BLOBNBOX(cblob);
+    bbox_it.add_to_end(bblob);
+    // Determine whether the line segment touches two intersections.
+    const TBOX& bbox = bblob->bounding_box();
+    Box* box = boxCreate(bbox.left(), bbox.bottom(),
+                         bbox.width(), bbox.height());
+    bblob->set_line_crossings(NumTouchingIntersections(box, pix_intersections));
+    boxDestroy(&box);
+    // Transform the bounding box prior to finding lines. To save writing
+    // two line finders, flip x and y for horizontal lines and re-use the
+    // tab-stop detection code. For vertical lines we still have to flip the
+    // y-coordinates to switch from leptonica coords to tesseract coords.
+    if (horizontal_lines) {
+      // Note that we have Leptonica coords stored in a Tesseract box, so that
+      // bbox.bottom(), being the MIN y coord, is actually the top, so to get
+      // back to Leptonica coords in RemoveUnusedLineSegments, we have to
+      // use height - box.right() as the top, which looks very odd.
+      TBOX new_box(height - bbox.top(), bbox.left(),
+                   height - bbox.bottom(), bbox.right());
+      bblob->set_bounding_box(new_box);
+    } else {
+      TBOX new_box(bbox.left(), height - bbox.top(),
+                   bbox.right(), height - bbox.bottom());
+      bblob->set_bounding_box(new_box);
+    }
+  }
+}
+
+}  // namespace tesseract.
diff --git a/tesseract/src/textord/linefind.h b/tesseract/src/textord/linefind.h
new file mode 100644
index 00000000..93b58e1f
--- /dev/null
+++ b/tesseract/src/textord/linefind.h
@@ -0,0 +1,149 @@
+///////////////////////////////////////////////////////////////////////
+// File:        linefind.h
+// Description: Class to find vertical lines in an image and create
+//              a corresponding list of empty blobs.
+// Author:      Ray Smith
+// Created:     Thu Mar 20 09:49:01 PDT 2008
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TEXTORD_LINEFIND_H_
+#define TESSERACT_TEXTORD_LINEFIND_H_
+
+struct Boxa;
+struct Pix;
+struct Pixa;
+
+namespace tesseract {
+
+class TabVector_LIST;
+class C_BLOB_LIST;
+class BLOBNBOX_LIST;
+class ICOORD;
+
+/**
+ * The LineFinder class is a simple static function wrapper class that mainly
+ * exposes the FindVerticalLines function.
+ */
+class LineFinder {
+ public:
+  /**
+   * Finds vertical and horizontal line objects in the given pix and removes
+   * them.
+   *
+   * Uses the given resolution to determine size thresholds instead of any
+   * that may be present in the pix.
+   *
+   * The output vertical_x and vertical_y contain a sum of the output vectors,
+   * thereby giving the mean vertical direction.
+   *
+   * If pix_music_mask != nullptr, and music is detected, a mask of the staves
+   * and anything that is connected (bars, notes etc.) will be returned in
+   * pix_music_mask, the mask subtracted from pix, and the lines will not
+   * appear in v_lines or h_lines.
+   *
+   * The output vectors are owned by the list and Frozen (cannot refit) by
+   * having no boxes, as there is no need to refit or merge separator lines.
+   *
+   * The detected lines are removed from the pix.
+   */
+  static void FindAndRemoveLines(int resolution,  bool debug, Pix* pix,
+                                 int* vertical_x, int* vertical_y,
+                                 Pix** pix_music_mask,
+                                 TabVector_LIST* v_lines,
+                                 TabVector_LIST* h_lines);
+
+  /**
+   * Converts the Boxa array to a list of C_BLOB, getting rid of severely
+   * overlapping outlines and those that are children of a bigger one.
+   *
+   * The output is a list of C_BLOBs that are owned by the list.
+   *
+   * The C_OUTLINEs in the C_BLOBs contain no outline data - just empty
+   * bounding boxes. The Boxa is consumed and destroyed.
+   */
+  static void ConvertBoxaToBlobs(int image_width, int image_height,
+                                 Boxa** boxes, C_BLOB_LIST* blobs);
+
+ private:
+  // Finds vertical line objects in pix_vline and removes them from src_pix.
+  // Uses the given resolution to determine size thresholds instead of any
+  // that may be present in the pix.
+  // The output vertical_x and vertical_y contain a sum of the output vectors,
+  // thereby giving the mean vertical direction.
+  // The output vectors are owned by the list and Frozen (cannot refit) by
+  // having no boxes, as there is no need to refit or merge separator lines.
+  // If no good lines are found, pix_vline is destroyed.
+  static void FindAndRemoveVLines(int resolution,
+                                  Pix* pix_intersections,
+                                  int* vertical_x, int* vertical_y,
+                                  Pix** pix_vline, Pix* pix_non_vline,
+                                  Pix* src_pix, TabVector_LIST* vectors);
+
+
+  // Finds horizontal line objects in pix_vline and removes them from src_pix.
+  // Uses the given resolution to determine size thresholds instead of any
+  // that may be present in the pix.
+  // The output vertical_x and vertical_y contain a sum of the output vectors,
+  // thereby giving the mean vertical direction.
+  // The output vectors are owned by the list and Frozen (cannot refit) by
+  // having no boxes, as there is no need to refit or merge separator lines.
+  // If no good lines are found, pix_hline is destroyed.
+  static void FindAndRemoveHLines(int resolution,
+                                  Pix* pix_intersections,
+                                  int vertical_x, int vertical_y,
+                                  Pix** pix_hline, Pix* pix_non_hline,
+                                  Pix* src_pix, TabVector_LIST* vectors);
+
+  // Finds vertical lines in the given list of BLOBNBOXes. bleft and tright
+  // are the bounds of the image on which the input line_bblobs were found.
+  // The input line_bblobs list is const really.
+  // The output vertical_x and vertical_y are the total of all the vectors.
+  // The output list of TabVector makes no reference to the input BLOBNBOXes.
+  static void FindLineVectors(const ICOORD& bleft, const ICOORD& tright,
+                              BLOBNBOX_LIST* line_bblobs,
+                              int* vertical_x, int* vertical_y,
+                              TabVector_LIST* vectors);
+
+  // Most of the heavy lifting of line finding. Given src_pix and its separate
+  // resolution, returns image masks:
+  // Returns image masks:
+  // pix_vline           candidate vertical lines.
+  // pix_non_vline       pixels that didn't look like vertical lines.
+  // pix_hline           candidate horizontal lines.
+  // pix_non_hline       pixels that didn't look like horizontal lines.
+  // pix_intersections   pixels where vertical and horizontal lines meet.
+  // pix_music_mask      candidate music staves.
+  // This function promises to initialize all the output (2nd level) pointers,
+  // but any of the returns that are empty will be nullptr on output.
+  // None of the input (1st level) pointers may be nullptr except pix_music_mask,
+  // which will disable music detection, and pixa_display, which is for debug.
+  static void GetLineMasks(int resolution, Pix* src_pix,
+                           Pix** pix_vline, Pix** pix_non_vline,
+                           Pix** pix_hline, Pix** pix_non_hline,
+                           Pix** pix_intersections, Pix** pix_music_mask,
+                           Pixa* pixa_display);
+
+  // Returns a list of boxes corresponding to the candidate line segments. Sets
+  // the line_crossings member of the boxes so we can later determine the number
+  // of intersections touched by a full line.
+  static void GetLineBoxes(bool horizontal_lines,
+                           Pix* pix_lines, Pix* pix_intersections,
+                           C_BLOB_LIST* line_cblobs,
+                           BLOBNBOX_LIST* line_bblobs);
+};
+
+}  // namespace tesseract.
+
+#endif  // TESSERACT_TEXTORD_LINEFIND_H_
diff --git a/tesseract/src/textord/makerow.cpp b/tesseract/src/textord/makerow.cpp
new file mode 100644
index 00000000..0df8243a
--- /dev/null
+++ b/tesseract/src/textord/makerow.cpp
@@ -0,0 +1,2673 @@
+/**********************************************************************
+ * File:        makerow.cpp  (Formerly makerows.c)
+ * Description: Code to arrange blobs into rows of text.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+ // Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "makerow.h"
+
+#include "blobbox.h"
+#include "ccstruct.h"
+#include "detlinefit.h"
+#include "statistc.h"
+#include "drawtord.h"
+#include "blkocc.h"
+#include "sortflts.h"
+#include "oldbasel.h"
+#include "textord.h"
+#include "tordmain.h"
+#include "underlin.h"
+#include "tprintf.h"
+#include "tovars.h"
+
+#include <algorithm>
+#include <vector>       // for std::vector
+
+namespace tesseract {
+
+BOOL_VAR(textord_heavy_nr, false, "Vigorously remove noise");
+BOOL_VAR(textord_show_initial_rows, false, "Display row accumulation");
+BOOL_VAR(textord_show_parallel_rows, false, "Display page correlated rows");
+BOOL_VAR(textord_show_expanded_rows, false, "Display rows after expanding");
+BOOL_VAR(textord_show_final_rows, false, "Display rows after final fitting");
+BOOL_VAR(textord_show_final_blobs, false, "Display blob bounds after pre-ass");
+BOOL_VAR(textord_test_landscape, false, "Tests refer to land/port");
+BOOL_VAR(textord_parallel_baselines, true, "Force parallel baselines");
+BOOL_VAR(textord_straight_baselines, false, "Force straight baselines");
+BOOL_VAR(textord_old_baselines, true, "Use old baseline algorithm");
+BOOL_VAR(textord_old_xheight, false, "Use old xheight algorithm");
+BOOL_VAR(textord_fix_xheight_bug, true, "Use spline baseline");
+BOOL_VAR(textord_fix_makerow_bug, true, "Prevent multiple baselines");
+BOOL_VAR(textord_debug_xheights, false, "Test xheight algorithms");
+static BOOL_VAR(textord_biased_skewcalc, true, "Bias skew estimates with line length");
+static BOOL_VAR(textord_interpolating_skew, true, "Interpolate across gaps");
+static INT_VAR(textord_skewsmooth_offset, 4, "For smooth factor");
+static INT_VAR(textord_skewsmooth_offset2, 1, "For smooth factor");
+INT_VAR(textord_test_x, -INT32_MAX, "coord of test pt");
+INT_VAR(textord_test_y, -INT32_MAX, "coord of test pt");
+INT_VAR(textord_min_blobs_in_row, 4, "Min blobs before gradient counted");
+INT_VAR(textord_spline_minblobs, 8, "Min blobs in each spline segment");
+INT_VAR(textord_spline_medianwin, 6, "Size of window for spline segmentation");
+static INT_VAR(textord_max_blob_overlaps, 4,
+               "Max number of blobs a big blob can overlap");
+INT_VAR(textord_min_xheight, 10, "Min credible pixel xheight");
+double_VAR(textord_spline_shift_fraction, 0.02,
+           "Fraction of line spacing for quad");
+double_VAR(textord_spline_outlier_fraction, 0.1,
+           "Fraction of line spacing for outlier");
+double_VAR(textord_skew_ile, 0.5, "Ile of gradients for page skew");
+double_VAR(textord_skew_lag, 0.02, "Lag for skew on row accumulation");
+double_VAR(textord_linespace_iqrlimit, 0.2, "Max iqr/median for linespace");
+double_VAR(textord_width_limit, 8, "Max width of blobs to make rows");
+double_VAR(textord_chop_width, 1.5, "Max width before chopping");
+static double_VAR(textord_expansion_factor, 1.0,
+                  "Factor to expand rows by in expand_rows");
+static double_VAR(textord_overlap_x, 0.375, "Fraction of linespace for good overlap");
+double_VAR(textord_minxh, 0.25, "fraction of linesize for min xheight");
+double_VAR(textord_min_linesize, 1.25, "* blob height for initial linesize");
+double_VAR(textord_excess_blobsize, 1.3,
+           "New row made if blob makes row this big");
+double_VAR(textord_occupancy_threshold, 0.4, "Fraction of neighbourhood");
+double_VAR(textord_underline_width, 2.0, "Multiple of line_size for underline");
+double_VAR(textord_min_blob_height_fraction, 0.75,
+           "Min blob height/top to include blob top into xheight stats");
+double_VAR(textord_xheight_mode_fraction, 0.4,
+           "Min pile height to make xheight");
+double_VAR(textord_ascheight_mode_fraction, 0.08,
+           "Min pile height to make ascheight");
+static double_VAR(textord_descheight_mode_fraction, 0.08,
+                  "Min pile height to make descheight");
+double_VAR(textord_ascx_ratio_min, 1.25, "Min cap/xheight");
+double_VAR(textord_ascx_ratio_max, 1.8, "Max cap/xheight");
+double_VAR(textord_descx_ratio_min, 0.25, "Min desc/xheight");
+double_VAR(textord_descx_ratio_max, 0.6, "Max desc/xheight");
+double_VAR(textord_xheight_error_margin, 0.1, "Accepted variation");
+INT_VAR(textord_lms_line_trials, 12, "Number of linew fits to do");
+BOOL_VAR(textord_new_initial_xheight, true, "Use test xheight mechanism");
+BOOL_VAR(textord_debug_blob, false, "Print test blob information");
+
+#define MAX_HEIGHT_MODES  12
+
+const int kMinLeaderCount = 5;
+
+// Factored-out helper to build a single row from a list of blobs.
+// Returns the mean blob size.
+static float MakeRowFromBlobs(float line_size,
+                              BLOBNBOX_IT* blob_it, TO_ROW_IT* row_it) {
+  blob_it->sort(blob_x_order);
+  blob_it->move_to_first();
+  TO_ROW* row = nullptr;
+  float total_size = 0.0f;
+  int blob_count = 0;
+  // Add all the blobs to a single TO_ROW.
+  for (; !blob_it->empty(); blob_it->forward()) {
+    BLOBNBOX* blob = blob_it->extract();
+    int top = blob->bounding_box().top();
+    int bottom = blob->bounding_box().bottom();
+    if (row == nullptr) {
+      row = new TO_ROW(blob, top, bottom, line_size);
+      row_it->add_before_then_move(row);
+    } else {
+      row->add_blob(blob, top, bottom, line_size);
+    }
+    total_size += top - bottom;
+    ++blob_count;
+  }
+  return blob_count > 0 ? total_size / blob_count : total_size;
+}
+
+// Helper to make a row using the children of a single blob.
+// Returns the mean size of the blobs created.
+static float MakeRowFromSubBlobs(TO_BLOCK* block, C_BLOB* blob,
+                                 TO_ROW_IT* row_it) {
+  // The blobs made from the children will go in the small_blobs list.
+  BLOBNBOX_IT bb_it(&block->small_blobs);
+  C_OUTLINE_IT ol_it(blob->out_list());
+  // Get the children.
+  ol_it.set_to_list(ol_it.data()->child());
+  if (ol_it.empty())
+    return 0.0f;
+  for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) {
+    // Deep copy the child outline and use that to make a blob.
+    blob = new C_BLOB(C_OUTLINE::deep_copy(ol_it.data()));
+    // Correct direction as needed.
+    blob->CheckInverseFlagAndDirection();
+    auto* bbox = new BLOBNBOX(blob);
+    bb_it.add_after_then_move(bbox);
+  }
+  // Now we can make a row from the blobs.
+  return MakeRowFromBlobs(block->line_size, &bb_it, row_it);
+}
+
+/**
+ * @name make_single_row
+ *
+ * Arrange the blobs into a single row... well actually, if there is
+ * only a single blob, it makes 2 rows, in case the top-level blob
+ * is a container of the real blobs to recognize.
+ */
+float make_single_row(ICOORD page_tr, bool allow_sub_blobs,
+                      TO_BLOCK* block, TO_BLOCK_LIST* blocks) {
+  BLOBNBOX_IT blob_it = &block->blobs;
+  TO_ROW_IT row_it = block->get_rows();
+
+  // Include all the small blobs and large blobs.
+  blob_it.add_list_after(&block->small_blobs);
+  blob_it.add_list_after(&block->noise_blobs);
+  blob_it.add_list_after(&block->large_blobs);
+  if (block->blobs.singleton() && allow_sub_blobs) {
+    blob_it.move_to_first();
+    float size = MakeRowFromSubBlobs(block, blob_it.data()->cblob(), &row_it);
+    if (size > block->line_size)
+      block->line_size = size;
+  } else if (block->blobs.empty()) {
+    // Make a fake blob.
+    C_BLOB* blob = C_BLOB::FakeBlob(block->block->pdblk.bounding_box());
+    // The blobnbox owns the blob.
+    auto* bblob = new BLOBNBOX(blob);
+    blob_it.add_after_then_move(bblob);
+  }
+  MakeRowFromBlobs(block->line_size, &blob_it, &row_it);
+  // Fit an LMS line to the rows.
+  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward())
+    fit_lms_line(row_it.data());
+  float gradient;
+  float fit_error;
+  // Compute the skew based on the fitted line.
+  compute_page_skew(blocks, gradient, fit_error);
+  return gradient;
+}
+
+/**
+ * @name make_rows
+ *
+ * Arrange the blobs into rows.
+ */
+float make_rows(ICOORD page_tr, TO_BLOCK_LIST *port_blocks) {
+  float port_m;                  // global skew
+  float port_err;                // global noise
+  TO_BLOCK_IT block_it;          // iterator
+
+  block_it.set_to_list(port_blocks);
+  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
+       block_it.forward())
+  make_initial_textrows(page_tr, block_it.data(), FCOORD(1.0f, 0.0f),
+      !textord_test_landscape);
+                                 // compute globally
+  compute_page_skew(port_blocks, port_m, port_err);
+  block_it.set_to_list(port_blocks);
+  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
+    cleanup_rows_making(page_tr, block_it.data(), port_m, FCOORD(1.0f, 0.0f),
+                 block_it.data()->block->pdblk.bounding_box().left(),
+                 !textord_test_landscape);
+  }
+  return port_m;                 // global skew
+}
+
+/**
+ * @name make_initial_textrows
+ *
+ * Arrange the good blobs into rows of text.
+ */
+void make_initial_textrows(                  //find lines
+        ICOORD page_tr,
+        TO_BLOCK* block,  //block to do
+        FCOORD rotation,  //for drawing
+        bool testing_on  //correct orientation
+) {
+  TO_ROW_IT row_it = block->get_rows ();
+
+#ifndef GRAPHICS_DISABLED
+  ScrollView::Color colour;                 //of row
+
+  if (textord_show_initial_rows && testing_on) {
+    if (to_win == nullptr)
+      create_to_win(page_tr);
+  }
+#endif
+                                 //guess skew
+  assign_blobs_to_rows (block, nullptr, 0, true, true, textord_show_initial_rows && testing_on);
+  row_it.move_to_first ();
+  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
+    fit_lms_line (row_it.data ());
+#ifndef GRAPHICS_DISABLED
+  if (textord_show_initial_rows && testing_on) {
+    colour = ScrollView::RED;
+    for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+      plot_to_row (row_it.data (), colour, rotation);
+      colour = static_cast<ScrollView::Color>(colour + 1);
+      if (colour > ScrollView::MAGENTA)
+        colour = ScrollView::RED;
+    }
+  }
+#endif
+}
+
+
+/**
+ * @name fit_lms_line
+ *
+ * Fit an LMS line to a row.
+ */
+void fit_lms_line(TO_ROW *row) {
+  float m, c;                    // fitted line
+  tesseract::DetLineFit lms;
+  BLOBNBOX_IT blob_it = row->blob_list();
+
+  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+    const TBOX& box = blob_it.data()->bounding_box();
+    lms.Add(ICOORD((box.left() + box.right()) / 2, box.bottom()));
+  }
+  double error = lms.Fit(&m, &c);
+  row->set_line(m, c, error);
+}
+
+
+/**
+ * @name compute_page_skew
+ *
+ * Compute the skew over a full page by averaging the gradients over
+ * all the lines. Get the error of the same row.
+ */
+void compute_page_skew(                        //get average gradient
+                       TO_BLOCK_LIST *blocks,  //list of blocks
+                       float &page_m,          //average gradient
+                       float &page_err         //average error
+                      ) {
+  int32_t row_count;             //total rows
+  int32_t blob_count;            //total_blobs
+  int32_t row_err;               //integer error
+  int32_t row_index;             //of total
+  TO_ROW *row;                   //current row
+  TO_BLOCK_IT block_it = blocks; //iterator
+
+  row_count = 0;
+  blob_count = 0;
+  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
+       block_it.forward ()) {
+    POLY_BLOCK* pb = block_it.data()->block->pdblk.poly_block();
+    if (pb != nullptr && !pb->IsText())
+      continue;  // Pretend non-text blocks don't exist.
+    row_count += block_it.data ()->get_rows ()->length ();
+    //count up rows
+    TO_ROW_IT row_it(block_it.data()->get_rows());
+    for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
+      blob_count += row_it.data ()->blob_list ()->length ();
+  }
+  if (row_count == 0) {
+    page_m = 0.0f;
+    page_err = 0.0f;
+    return;
+  }
+  // of rows
+  std::vector<float> gradients(blob_count);
+  // of rows
+  std::vector<float> errors(blob_count);
+
+  row_index = 0;
+  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
+       block_it.forward ()) {
+    POLY_BLOCK* pb = block_it.data()->block->pdblk.poly_block();
+    if (pb != nullptr && !pb->IsText())
+      continue;  // Pretend non-text blocks don't exist.
+    TO_ROW_IT row_it(block_it.data ()->get_rows());
+    for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+      row = row_it.data ();
+      blob_count = row->blob_list ()->length ();
+      row_err = static_cast<int32_t>(ceil (row->line_error ()));
+      if (row_err <= 0)
+        row_err = 1;
+      if (textord_biased_skewcalc) {
+        blob_count /= row_err;
+        for (blob_count /= row_err; blob_count > 0; blob_count--) {
+          gradients[row_index] = row->line_m ();
+          errors[row_index] = row->line_error ();
+          row_index++;
+        }
+      }
+      else if (blob_count >= textord_min_blobs_in_row) {
+                                 //get gradient
+        gradients[row_index] = row->line_m ();
+        errors[row_index] = row->line_error ();
+        row_index++;
+      }
+    }
+  }
+  if (row_index == 0) {
+                                 //desperate
+    for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
+         block_it.forward ()) {
+      POLY_BLOCK* pb = block_it.data()->block->pdblk.poly_block();
+      if (pb != nullptr && !pb->IsText())
+        continue;  // Pretend non-text blocks don't exist.
+      TO_ROW_IT row_it(block_it.data()->get_rows());
+      for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
+           row_it.forward ()) {
+        row = row_it.data ();
+        gradients[row_index] = row->line_m ();
+        errors[row_index] = row->line_error ();
+        row_index++;
+      }
+    }
+  }
+  row_count = row_index;
+  row_index = choose_nth_item (static_cast<int32_t>(row_count * textord_skew_ile),
+    &gradients[0], row_count);
+  page_m = gradients[row_index];
+  row_index = choose_nth_item (static_cast<int32_t>(row_count * textord_skew_ile),
+    &errors[0], row_count);
+  page_err = errors[row_index];
+}
+
+const double kNoiseSize = 0.5;  // Fraction of xheight.
+const int kMinSize = 8;  // Min pixels to be xheight.
+
+/**
+ * Return true if the dot looks like it is part of the i.
+ * Doesn't work for any other diacritical.
+ */
+static bool dot_of_i(BLOBNBOX* dot, BLOBNBOX* i, TO_ROW* row) {
+  const TBOX& ibox = i->bounding_box();
+  const TBOX& dotbox = dot->bounding_box();
+
+  // Must overlap horizontally by enough and be high enough.
+  int overlap = std::min(dotbox.right(), ibox.right()) -
+          std::max(dotbox.left(), ibox.left());
+  if (ibox.height() <= 2 * dotbox.height() ||
+      (overlap * 2 < ibox.width() && overlap < dotbox.width()))
+    return false;
+
+  // If the i is tall and thin then it is good.
+  if (ibox.height() > ibox.width() * 2)
+    return true;  // The i or ! must be tall and thin.
+
+  // It might still be tall and thin, but it might be joined to something.
+  // So search the outline for a piece of large height close to the edges
+  // of the dot.
+  const double kHeightFraction = 0.6;
+  double target_height = std::min(dotbox.bottom(), ibox.top());
+  target_height -= row->line_m()*dotbox.left() + row->line_c();
+  target_height *= kHeightFraction;
+  int left_min = dotbox.left() - dotbox.width();
+  int middle = (dotbox.left() + dotbox.right())/2;
+  int right_max = dotbox.right() + dotbox.width();
+  int left_miny = 0;
+  int left_maxy = 0;
+  int right_miny = 0;
+  int right_maxy = 0;
+  bool found_left = false;
+  bool found_right = false;
+  bool in_left = false;
+  bool in_right = false;
+  C_BLOB* blob = i->cblob();
+  C_OUTLINE_IT o_it = blob->out_list();
+  for (o_it.mark_cycle_pt(); !o_it.cycled_list(); o_it.forward()) {
+    C_OUTLINE* outline = o_it.data();
+    int length = outline->pathlength();
+    ICOORD pos = outline->start_pos();
+    for (int step = 0; step < length; pos += outline->step(step++)) {
+      int x = pos.x();
+      int y = pos.y();
+      if (x >= left_min && x < middle && !found_left) {
+        // We are in the left part so find min and max y.
+        if (in_left) {
+          if (y > left_maxy) left_maxy = y;
+          if (y < left_miny) left_miny = y;
+        } else {
+          left_maxy = left_miny = y;
+          in_left = true;
+        }
+      } else if (in_left) {
+        // We just left the left so look for size.
+        if (left_maxy - left_miny > target_height) {
+          if (found_right)
+            return true;
+          found_left = true;
+        }
+        in_left = false;
+      }
+      if (x <= right_max && x > middle && !found_right) {
+        // We are in the right part so find min and max y.
+        if (in_right) {
+          if (y > right_maxy) right_maxy = y;
+          if (y < right_miny) right_miny = y;
+        } else {
+          right_maxy = right_miny = y;
+          in_right = true;
+        }
+      } else if (in_right) {
+        // We just left the right so look for size.
+        if (right_maxy - right_miny > target_height) {
+          if (found_left)
+            return true;
+          found_right = true;
+        }
+        in_right = false;
+      }
+    }
+  }
+  return false;
+}
+
+void vigorous_noise_removal(TO_BLOCK* block) {
+  TO_ROW_IT row_it = block->get_rows ();
+  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+    TO_ROW* row = row_it.data();
+    BLOBNBOX_IT b_it = row->blob_list();
+    // Estimate the xheight on the row.
+    int max_height = 0;
+    for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
+      BLOBNBOX* blob = b_it.data();
+      if (blob->bounding_box().height() > max_height)
+        max_height = blob->bounding_box().height();
+    }
+    STATS hstats(0, max_height + 1);
+    for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
+      BLOBNBOX* blob = b_it.data();
+      int height = blob->bounding_box().height();
+      if (height >= kMinSize)
+        hstats.add(blob->bounding_box().height(), 1);
+    }
+    float xheight = hstats.median();
+    // Delete small objects.
+    BLOBNBOX* prev = nullptr;
+    for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
+      BLOBNBOX* blob = b_it.data();
+      const TBOX& box = blob->bounding_box();
+      if (box.height() < kNoiseSize * xheight) {
+        // Small so delete unless it looks like an i dot.
+        if (prev != nullptr) {
+          if (dot_of_i(blob, prev, row))
+            continue;  // Looks OK.
+        }
+        if (!b_it.at_last()) {
+          BLOBNBOX* next = b_it.data_relative(1);
+          if (dot_of_i(blob, next, row))
+            continue;  // Looks OK.
+        }
+        // It might be noise so get rid of it.
+        delete blob->cblob();
+        delete b_it.extract();
+      } else {
+        prev = blob;
+      }
+    }
+  }
+}
+
+/**
+ * cleanup_rows_making
+ *
+ * Remove overlapping rows and fit all the blobs to what's left.
+ */
+void cleanup_rows_making(                   //find lines
+        ICOORD page_tr,    //top right
+        TO_BLOCK* block,   //block to do
+        float gradient,    //gradient to fit
+        FCOORD rotation,   //for drawing
+        int32_t block_edge,  //edge of block
+        bool testing_on  //correct orientation
+) {
+                                 //iterators
+  BLOBNBOX_IT blob_it = &block->blobs;
+  TO_ROW_IT row_it = block->get_rows ();
+
+#ifndef GRAPHICS_DISABLED
+  if (textord_show_parallel_rows && testing_on) {
+    if (to_win == nullptr)
+      create_to_win(page_tr);
+  }
+#endif
+                                 //get row coords
+  fit_parallel_rows(block,
+                    gradient,
+                    rotation,
+                    block_edge,
+                    textord_show_parallel_rows && testing_on);
+  delete_non_dropout_rows(block,
+                          gradient,
+                          rotation,
+                          block_edge,
+                          textord_show_parallel_rows && testing_on);
+  expand_rows(page_tr, block, gradient, rotation, block_edge, testing_on);
+  blob_it.set_to_list (&block->blobs);
+  row_it.set_to_list (block->get_rows ());
+  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
+    blob_it.add_list_after (row_it.data ()->blob_list ());
+  //give blobs back
+  assign_blobs_to_rows (block, &gradient, 1, false, false, false);
+  //now new rows must be genuine
+  blob_it.set_to_list (&block->blobs);
+  blob_it.add_list_after (&block->large_blobs);
+  assign_blobs_to_rows (block, &gradient, 2, true, true, false);
+  //safe to use big ones now
+  blob_it.set_to_list (&block->blobs);
+                                 //throw all blobs in
+  blob_it.add_list_after (&block->noise_blobs);
+  blob_it.add_list_after (&block->small_blobs);
+  assign_blobs_to_rows (block, &gradient, 3, false, false, false);
+}
+
+/**
+ * delete_non_dropout_rows
+ *
+ * Compute the linespacing and offset.
+ */
+void delete_non_dropout_rows(                   //find lines
+        TO_BLOCK* block,   //block to do
+        float gradient,    //global skew
+        FCOORD rotation,   //deskew vector
+        int32_t block_edge,  //left edge
+        bool testing_on   //correct orientation
+) {
+  TBOX block_box;                 //deskewed block
+  int32_t max_y;                   //in block
+  int32_t min_y;
+  int32_t line_index;              //of scan line
+  int32_t line_count;              //no of scan lines
+  int32_t distance;                //to drop-out
+  int32_t xleft;                   //of block
+  int32_t ybottom;                 //of block
+  TO_ROW *row;                   //current row
+  TO_ROW_IT row_it = block->get_rows ();
+  BLOBNBOX_IT blob_it = &block->blobs;
+
+  if (row_it.length () == 0)
+    return;                      //empty block
+  block_box = deskew_block_coords (block, gradient);
+  xleft = block->block->pdblk.bounding_box ().left ();
+  ybottom = block->block->pdblk.bounding_box ().bottom ();
+  min_y = block_box.bottom () - 1;
+  max_y = block_box.top () + 1;
+  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+    line_index = static_cast<int32_t>(floor (row_it.data ()->intercept ()));
+    if (line_index <= min_y)
+      min_y = line_index - 1;
+    if (line_index >= max_y)
+      max_y = line_index + 1;
+  }
+  line_count = max_y - min_y + 1;
+  if (line_count <= 0)
+    return;                      //empty block
+  // change in occupation
+  std::vector<int32_t> deltas(line_count);
+  // of pixel coords
+  std::vector<int32_t> occupation(line_count);
+
+  compute_line_occupation(block, gradient, min_y, max_y, &occupation[0], &deltas[0]);
+  compute_occupation_threshold (static_cast<int32_t>(ceil (block->line_spacing *
+    (tesseract::CCStruct::kDescenderFraction +
+    tesseract::CCStruct::kAscenderFraction))),
+    static_cast<int32_t>(ceil (block->line_spacing *
+    (tesseract::CCStruct::kXHeightFraction +
+    tesseract::CCStruct::kAscenderFraction))),
+    max_y - min_y + 1, &occupation[0], &deltas[0]);
+#ifndef GRAPHICS_DISABLED
+  if (testing_on) {
+    draw_occupation(xleft, ybottom, min_y, max_y, &occupation[0], &deltas[0]);
+  }
+#endif
+  compute_dropout_distances(&occupation[0], &deltas[0], line_count);
+  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+    row = row_it.data ();
+    line_index = static_cast<int32_t>(floor (row->intercept ()));
+    distance = deltas[line_index - min_y];
+    if (find_best_dropout_row (row, distance, block->line_spacing / 2,
+    line_index, &row_it, testing_on)) {
+#ifndef GRAPHICS_DISABLED
+      if (testing_on)
+        plot_parallel_row(row, gradient, block_edge,
+                          ScrollView::WHITE, rotation);
+#endif
+      blob_it.add_list_after (row_it.data ()->blob_list ());
+      delete row_it.extract ();  //too far away
+    }
+  }
+  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+    blob_it.add_list_after (row_it.data ()->blob_list ());
+  }
+}
+
+
+/**
+ * @name find_best_dropout_row
+ *
+ * Delete this row if it has a neighbour with better dropout characteristics.
+ * true is returned if the row should be deleted.
+ */
+bool find_best_dropout_row(                    //find neighbours
+        TO_ROW* row,        //row to test
+        int32_t distance,     //dropout dist
+        float dist_limit,   //threshold distance
+        int32_t line_index,   //index of row
+        TO_ROW_IT* row_it,  //current position
+        bool testing_on    //correct orientation
+) {
+  int32_t next_index;              // of neighbouring row
+  int32_t row_offset;              //from current row
+  int32_t abs_dist;                //absolute distance
+  int8_t row_inc;                  //increment to row_index
+  TO_ROW *next_row;              //nextious row
+
+  if (testing_on)
+    tprintf ("Row at %g(%g), dropout dist=%d,",
+      row->intercept (), row->parallel_c (), distance);
+  if (distance < 0) {
+    row_inc = 1;
+    abs_dist = -distance;
+  }
+  else {
+    row_inc = -1;
+    abs_dist = distance;
+  }
+  if (abs_dist > dist_limit) {
+    if (testing_on) {
+      tprintf (" too far - deleting\n");
+    }
+    return true;
+  }
+  if ((distance < 0 && !row_it->at_last ())
+  || (distance >= 0 && !row_it->at_first ())) {
+    row_offset = row_inc;
+    do {
+      next_row = row_it->data_relative (row_offset);
+      next_index = static_cast<int32_t>(floor (next_row->intercept ()));
+      if ((distance < 0
+        && next_index < line_index
+        && next_index > line_index + distance + distance)
+        || (distance >= 0
+        && next_index > line_index
+      && next_index < line_index + distance + distance)) {
+        if (testing_on) {
+          tprintf (" nearer neighbour (%d) at %g\n",
+            line_index + distance - next_index,
+            next_row->intercept ());
+        }
+        return true;             //other is nearer
+      }
+      else if (next_index == line_index
+      || next_index == line_index + distance + distance) {
+        if (row->believability () <= next_row->believability ()) {
+          if (testing_on) {
+            tprintf (" equal but more believable at %g (%g/%g)\n",
+              next_row->intercept (),
+              row->believability (),
+              next_row->believability ());
+          }
+          return true;           //other is more believable
+        }
+      }
+      row_offset += row_inc;
+    }
+    while ((next_index == line_index
+      || next_index == line_index + distance + distance)
+      && row_offset < row_it->length ());
+    if (testing_on)
+      tprintf (" keeping\n");
+  }
+  return false;
+}
+
+
+/**
+ * @name deskew_block_coords
+ *
+ * Compute the bounding box of all the blobs in the block
+ * if they were deskewed without actually doing it.
+ */
+TBOX deskew_block_coords(                  //block box
+                        TO_BLOCK *block,  //block to do
+                        float gradient    //global skew
+                       ) {
+  TBOX result;                    //block bounds
+  TBOX blob_box;                  //of block
+  FCOORD rotation;               //deskew vector
+  float length;                  //of gradient vector
+  TO_ROW_IT row_it = block->get_rows ();
+  TO_ROW *row;                   //current row
+  BLOBNBOX *blob;                //current blob
+  BLOBNBOX_IT blob_it;           //iterator
+
+  length = sqrt (gradient * gradient + 1);
+  rotation = FCOORD (1 / length, -gradient / length);
+  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+    row = row_it.data ();
+    blob_it.set_to_list (row->blob_list ());
+    for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
+    blob_it.forward ()) {
+      blob = blob_it.data ();
+      blob_box = blob->bounding_box ();
+      blob_box.rotate (rotation);//de-skew it
+      result += blob_box;
+    }
+  }
+  return result;
+}
+
+
+/**
+ * @name compute_line_occupation
+ *
+ * Compute the pixel projection back on the y axis given the global
+ * skew. Also compute the 1st derivative.
+ */
+void compute_line_occupation(                    //project blobs
+                             TO_BLOCK *block,    //block to do
+                             float gradient,     //global skew
+                             int32_t min_y,      //min coord in block
+                             int32_t max_y,      //in block
+                             int32_t *occupation,  //output projection
+                             int32_t *deltas     //derivative
+                            ) {
+  int32_t line_count;              //maxy-miny+1
+  int32_t line_index;              //of scan line
+  int index;                     //array index for daft compilers
+  TO_ROW *row;                   //current row
+  TO_ROW_IT row_it = block->get_rows ();
+  BLOBNBOX *blob;                //current blob
+  BLOBNBOX_IT blob_it;           //iterator
+  float length;                  //of skew vector
+  TBOX blob_box;                  //bounding box
+  FCOORD rotation;               //inverse of skew
+
+  line_count = max_y - min_y + 1;
+  length = sqrt (gradient * gradient + 1);
+  rotation = FCOORD (1 / length, -gradient / length);
+  for (line_index = 0; line_index < line_count; line_index++)
+    deltas[line_index] = 0;
+  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+    row = row_it.data ();
+    blob_it.set_to_list (row->blob_list ());
+    for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
+    blob_it.forward ()) {
+      blob = blob_it.data ();
+      blob_box = blob->bounding_box ();
+      blob_box.rotate (rotation);//de-skew it
+      int32_t width = blob_box.right() - blob_box.left();
+      index = blob_box.bottom() - min_y;
+      ASSERT_HOST(index >= 0 && index < line_count);
+      // count transitions
+      deltas[index] += width;
+      index = blob_box.top() - min_y;
+      ASSERT_HOST(index >= 0 && index < line_count);
+      deltas[index] -= width;
+    }
+  }
+  occupation[0] = deltas[0];
+  for (line_index = 1; line_index < line_count; line_index++)
+    occupation[line_index] = occupation[line_index - 1] + deltas[line_index];
+}
+
+
+/**
+ * compute_occupation_threshold
+ *
+ * Compute thresholds for textline or not for the occupation array.
+ */
+void compute_occupation_threshold(                    //project blobs
+                                  int32_t low_window,   //below result point
+                                  int32_t high_window,  //above result point
+                                  int32_t line_count,   //array sizes
+                                  int32_t *occupation,  //input projection
+                                  int32_t *thresholds   //output thresholds
+                                 ) {
+  int32_t line_index;              //of thresholds line
+  int32_t low_index;               //in occupation
+  int32_t high_index;              //in occupation
+  int32_t sum;                     //current average
+  int32_t divisor;                 //to get thresholds
+  int32_t min_index;               //of min occ
+  int32_t min_occ;                 //min in locality
+  int32_t test_index;              //for finding min
+
+  divisor =
+    static_cast<int32_t>(ceil ((low_window + high_window) / textord_occupancy_threshold));
+  if (low_window + high_window < line_count) {
+    for (sum = 0, high_index = 0; high_index < low_window; high_index++)
+      sum += occupation[high_index];
+    for (low_index = 0; low_index < high_window; low_index++, high_index++)
+      sum += occupation[high_index];
+    min_occ = occupation[0];
+    min_index = 0;
+    for (test_index = 1; test_index < high_index; test_index++) {
+      if (occupation[test_index] <= min_occ) {
+        min_occ = occupation[test_index];
+        min_index = test_index;  //find min in region
+      }
+    }
+    for (line_index = 0; line_index < low_window; line_index++)
+      thresholds[line_index] = (sum - min_occ) / divisor + min_occ;
+    //same out to end
+    for (low_index = 0; high_index < line_count; low_index++, high_index++) {
+      sum -= occupation[low_index];
+      sum += occupation[high_index];
+      if (occupation[high_index] <= min_occ) {
+                                 //find min in region
+        min_occ = occupation[high_index];
+        min_index = high_index;
+      }
+                                 //lost min from region
+      if (min_index <= low_index) {
+        min_occ = occupation[low_index + 1];
+        min_index = low_index + 1;
+        for (test_index = low_index + 2; test_index <= high_index;
+        test_index++) {
+          if (occupation[test_index] <= min_occ) {
+            min_occ = occupation[test_index];
+                                 //find min in region
+            min_index = test_index;
+          }
+        }
+      }
+      thresholds[line_index++] = (sum - min_occ) / divisor + min_occ;
+    }
+  }
+  else {
+    min_occ = occupation[0];
+    min_index = 0;
+    for (sum = 0, low_index = 0; low_index < line_count; low_index++) {
+      if (occupation[low_index] < min_occ) {
+        min_occ = occupation[low_index];
+        min_index = low_index;
+      }
+      sum += occupation[low_index];
+    }
+    line_index = 0;
+  }
+  for (; line_index < line_count; line_index++)
+    thresholds[line_index] = (sum - min_occ) / divisor + min_occ;
+  //same out to end
+}
+
+
+/**
+ * @name compute_dropout_distances
+ *
+ * Compute the distance from each coordinate to the nearest dropout.
+ */
+void compute_dropout_distances(                    //project blobs
+                               int32_t *occupation,  //input projection
+                               int32_t *thresholds,  //output thresholds
+                               int32_t line_count    //array sizes
+                              ) {
+  int32_t line_index;              //of thresholds line
+  int32_t distance;                //from prev dropout
+  int32_t next_dist;               //to next dropout
+  int32_t back_index;              //for back filling
+  int32_t prev_threshold;          //before overwrite
+
+  distance = -line_count;
+  line_index = 0;
+  do {
+    do {
+      distance--;
+      prev_threshold = thresholds[line_index];
+                                 //distance from prev
+      thresholds[line_index] = distance;
+      line_index++;
+    }
+    while (line_index < line_count
+      && (occupation[line_index] < thresholds[line_index]
+      || occupation[line_index - 1] >= prev_threshold));
+    if (line_index < line_count) {
+      back_index = line_index - 1;
+      next_dist = 1;
+      while (next_dist < -distance && back_index >= 0) {
+        thresholds[back_index] = next_dist;
+        back_index--;
+        next_dist++;
+        distance++;
+      }
+      distance = 1;
+    }
+  }
+  while (line_index < line_count);
+}
+
+
+/**
+ * @name expand_rows
+ *
+ * Expand each row to the least of its allowed size and touching its
+ * neighbours. If the expansion would entirely swallow a neighbouring row
+ * then do so.
+ */
+void expand_rows(                   //find lines
+        ICOORD page_tr,    //top right
+        TO_BLOCK* block,   //block to do
+        float gradient,    //gradient to fit
+        FCOORD rotation,   //for drawing
+        int32_t block_edge,  //edge of block
+        bool testing_on   //correct orientation
+) {
+  bool swallowed_row;           //eaten a neighbour
+  float y_max, y_min;            //new row limits
+  float y_bottom, y_top;         //allowed limits
+  TO_ROW *test_row;              //next row
+  TO_ROW *row;                   //current row
+                                 //iterators
+  BLOBNBOX_IT blob_it = &block->blobs;
+  TO_ROW_IT row_it = block->get_rows ();
+
+#ifndef GRAPHICS_DISABLED
+  if (textord_show_expanded_rows && testing_on) {
+    if (to_win == nullptr)
+      create_to_win(page_tr);
+  }
+#endif
+
+  adjust_row_limits(block);  //shift min,max.
+  if (textord_new_initial_xheight) {
+    if (block->get_rows ()->length () == 0)
+      return;
+    compute_row_stats(block, textord_show_expanded_rows && testing_on);
+  }
+  assign_blobs_to_rows (block, &gradient, 4, true, false, false);
+  //get real membership
+  if (block->get_rows ()->length () == 0)
+    return;
+  fit_parallel_rows(block,
+                    gradient,
+                    rotation,
+                    block_edge,
+                    textord_show_expanded_rows && testing_on);
+  if (!textord_new_initial_xheight)
+    compute_row_stats(block, textord_show_expanded_rows && testing_on);
+  row_it.move_to_last ();
+  do {
+    row = row_it.data ();
+    y_max = row->max_y ();       //get current limits
+    y_min = row->min_y ();
+    y_bottom = row->intercept () - block->line_size * textord_expansion_factor *
+      tesseract::CCStruct::kDescenderFraction;
+    y_top = row->intercept () + block->line_size * textord_expansion_factor *
+        (tesseract::CCStruct::kXHeightFraction +
+         tesseract::CCStruct::kAscenderFraction);
+    if (y_min > y_bottom) {      //expansion allowed
+      if (textord_show_expanded_rows && testing_on)
+        tprintf("Expanding bottom of row at %f from %f to %f\n",
+                row->intercept(), y_min, y_bottom);
+                                 //expandable
+      swallowed_row = true;
+      while (swallowed_row && !row_it.at_last ()) {
+        swallowed_row = false;
+                                 //get next one
+        test_row = row_it.data_relative (1);
+                                 //overlaps space
+        if (test_row->max_y () > y_bottom) {
+          if (test_row->min_y () > y_bottom) {
+            if (textord_show_expanded_rows && testing_on)
+              tprintf("Eating row below at %f\n", test_row->intercept());
+            row_it.forward ();
+#ifndef GRAPHICS_DISABLED
+            if (textord_show_expanded_rows && testing_on)
+              plot_parallel_row(test_row,
+                                gradient,
+                                block_edge,
+                                ScrollView::WHITE,
+                                rotation);
+#endif
+            blob_it.set_to_list (row->blob_list ());
+            blob_it.add_list_after (test_row->blob_list ());
+                                 //swallow complete row
+            delete row_it.extract ();
+            row_it.backward ();
+            swallowed_row = true;
+          }
+          else if (test_row->max_y () < y_min) {
+                                 //shorter limit
+            y_bottom = test_row->max_y ();
+            if (textord_show_expanded_rows && testing_on)
+              tprintf("Truncating limit to %f due to touching row at %f\n",
+                      y_bottom, test_row->intercept());
+          }
+          else {
+            y_bottom = y_min;    //can't expand it
+            if (textord_show_expanded_rows && testing_on)
+              tprintf("Not expanding limit beyond %f due to touching row at %f\n",
+                      y_bottom, test_row->intercept());
+          }
+        }
+      }
+      y_min = y_bottom;          //expand it
+    }
+    if (y_max < y_top) {         //expansion allowed
+      if (textord_show_expanded_rows && testing_on)
+        tprintf("Expanding top of row at %f from %f to %f\n",
+                row->intercept(), y_max, y_top);
+      swallowed_row = true;
+      while (swallowed_row && !row_it.at_first ()) {
+        swallowed_row = false;
+                                 //get one above
+        test_row = row_it.data_relative (-1);
+        if (test_row->min_y () < y_top) {
+          if (test_row->max_y () < y_top) {
+            if (textord_show_expanded_rows && testing_on)
+              tprintf("Eating row above at %f\n", test_row->intercept());
+            row_it.backward ();
+            blob_it.set_to_list (row->blob_list ());
+#ifndef GRAPHICS_DISABLED
+            if (textord_show_expanded_rows && testing_on)
+              plot_parallel_row(test_row,
+                                gradient,
+                                block_edge,
+                                ScrollView::WHITE,
+                                rotation);
+#endif
+            blob_it.add_list_after (test_row->blob_list ());
+                                 //swallow complete row
+            delete row_it.extract ();
+            row_it.forward ();
+            swallowed_row = true;
+          }
+          else if (test_row->min_y () < y_max) {
+                                 //shorter limit
+            y_top = test_row->min_y ();
+            if (textord_show_expanded_rows && testing_on)
+              tprintf("Truncating limit to %f due to touching row at %f\n",
+                      y_top, test_row->intercept());
+          }
+          else {
+            y_top = y_max;       //can't expand it
+            if (textord_show_expanded_rows && testing_on)
+              tprintf("Not expanding limit beyond %f due to touching row at %f\n",
+                      y_top, test_row->intercept());
+          }
+        }
+      }
+      y_max = y_top;
+    }
+                                 //new limits
+    row->set_limits (y_min, y_max);
+    row_it.backward ();
+  }
+  while (!row_it.at_last ());
+}
+
+
+/**
+ * adjust_row_limits
+ *
+ * Change the limits of rows to suit the default fractions.
+ */
+void adjust_row_limits(                 //tidy limits
+                       TO_BLOCK *block  //block to do
+                      ) {
+  TO_ROW *row;                   //current row
+  float size;                    //size of row
+  float ymax;                    //top of row
+  float ymin;                    //bottom of row
+  TO_ROW_IT row_it = block->get_rows ();
+
+  if (textord_show_expanded_rows)
+    tprintf("Adjusting row limits for block(%d,%d)\n",
+            block->block->pdblk.bounding_box().left(),
+            block->block->pdblk.bounding_box().top());
+  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+    row = row_it.data ();
+    size = row->max_y () - row->min_y ();
+    if (textord_show_expanded_rows)
+      tprintf("Row at %f has min %f, max %f, size %f\n",
+              row->intercept(), row->min_y(), row->max_y(), size);
+    size /= tesseract::CCStruct::kXHeightFraction +
+        tesseract::CCStruct::kAscenderFraction +
+        tesseract::CCStruct::kDescenderFraction;
+    ymax = size * (tesseract::CCStruct::kXHeightFraction +
+                   tesseract::CCStruct::kAscenderFraction);
+    ymin = -size * tesseract::CCStruct::kDescenderFraction;
+    row->set_limits (row->intercept () + ymin, row->intercept () + ymax);
+    row->merged = false;
+  }
+}
+
+
+/**
+ * @name compute_row_stats
+ *
+ * Compute the linespacing and offset.
+ */
+void compute_row_stats(                  //find lines
+        TO_BLOCK* block,  //block to do
+        bool testing_on  //correct orientation
+) {
+  int32_t row_index;               //of median
+  TO_ROW *row;                   //current row
+  TO_ROW *prev_row;              //previous row
+  float iqr;                     //inter quartile range
+  TO_ROW_IT row_it = block->get_rows ();
+                                 //number of rows
+  int16_t rowcount = row_it.length ();
+  // for choose nth
+  std::vector<TO_ROW*> rows(rowcount);
+  rowcount = 0;
+  prev_row = nullptr;
+  row_it.move_to_last ();        //start at bottom
+  do {
+    row = row_it.data ();
+    if (prev_row != nullptr) {
+      rows[rowcount++] = prev_row;
+      prev_row->spacing = row->intercept () - prev_row->intercept ();
+      if (testing_on)
+        tprintf ("Row at %g yields spacing of %g\n",
+          row->intercept (), prev_row->spacing);
+    }
+    prev_row = row;
+    row_it.backward ();
+  }
+  while (!row_it.at_last ());
+  block->key_row = prev_row;
+  block->baseline_offset =
+    fmod (prev_row->parallel_c (), block->line_spacing);
+  if (testing_on)
+    tprintf ("Blob based spacing=(%g,%g), offset=%g",
+      block->line_size, block->line_spacing, block->baseline_offset);
+  if (rowcount > 0) {
+    row_index = choose_nth_item(rowcount * 3 / 4, &rows[0], rowcount,
+      sizeof (TO_ROW *), row_spacing_order);
+    iqr = rows[row_index]->spacing;
+    row_index = choose_nth_item(rowcount / 4, &rows[0], rowcount,
+      sizeof (TO_ROW *), row_spacing_order);
+    iqr -= rows[row_index]->spacing;
+    row_index = choose_nth_item(rowcount / 2, &rows[0], rowcount,
+      sizeof (TO_ROW *), row_spacing_order);
+    block->key_row = rows[row_index];
+    if (testing_on)
+      tprintf (" row based=%g(%g)", rows[row_index]->spacing, iqr);
+    if (rowcount > 2
+    && iqr < rows[row_index]->spacing * textord_linespace_iqrlimit) {
+      if (!textord_new_initial_xheight) {
+        if (rows[row_index]->spacing < block->line_spacing
+          && rows[row_index]->spacing > block->line_size)
+          //within range
+          block->line_size = rows[row_index]->spacing;
+        //spacing=size
+        else if (rows[row_index]->spacing > block->line_spacing)
+          block->line_size = block->line_spacing;
+        //too big so use max
+      }
+      else {
+        if (rows[row_index]->spacing < block->line_spacing)
+          block->line_size = rows[row_index]->spacing;
+        else
+          block->line_size = block->line_spacing;
+        //too big so use max
+      }
+      if (block->line_size < textord_min_xheight)
+        block->line_size = (float) textord_min_xheight;
+      block->line_spacing = rows[row_index]->spacing;
+      block->max_blob_size =
+        block->line_spacing * textord_excess_blobsize;
+    }
+    block->baseline_offset = fmod (rows[row_index]->intercept (),
+      block->line_spacing);
+  }
+  if (testing_on)
+    tprintf ("\nEstimate line size=%g, spacing=%g, offset=%g\n",
+      block->line_size, block->line_spacing, block->baseline_offset);
+}
+
+
+/**
+ * @name compute_block_xheight
+ *
+ * Compute the xheight of the individual rows, then correlate them
+ * and interpret ascenderless lines, correcting xheights.
+ *
+ * First we compute our best guess of the x-height of each row independently
+ * with compute_row_xheight(), which looks for a pair of commonly occurring
+ * heights that could be x-height and ascender height. This function also
+ * attempts to find descenders of lowercase letters (i.e. not the small
+ * descenders that could appear in upper case letters as Q,J).
+ *
+ * After this computation each row falls into one of the following categories:
+ * ROW_ASCENDERS_FOUND: we found xheight and ascender modes, so this must be
+ *                      a regular row; we'll use its xheight to compute
+ *                      xheight and ascrise estimates for the block
+ * ROW_DESCENDERS_FOUND: no ascenders, so we do not have a high confidence in
+ *                       the xheight of this row (don't use it for estimating
+ *                       block xheight), but this row can't contain all caps
+ * ROW_UNKNOWN: a row with no ascenders/descenders, could be all lowercase
+ *              (or mostly lowercase for fonts with very few ascenders),
+ *              all upper case or small caps
+ * ROW_INVALID: no meaningful xheight could be found for this row
+ *
+ * We then run correct_row_xheight() and use the computed xheight and ascrise
+ * averages to correct xheight values of the rows in ROW_DESCENDERS_FOUND,
+ * ROW_UNKNOWN and ROW_INVALID categories.
+ *
+ */
+void Textord::compute_block_xheight(TO_BLOCK *block, float gradient) {
+  TO_ROW *row;                          // current row
+  float asc_frac_xheight = CCStruct::kAscenderFraction /
+      CCStruct::kXHeightFraction;
+  float desc_frac_xheight = CCStruct::kDescenderFraction /
+      CCStruct::kXHeightFraction;
+  int32_t min_height, max_height;         // limits on xheight
+  TO_ROW_IT row_it = block->get_rows();
+  if (row_it.empty()) return;  // no rows
+
+  // Compute the best guess of xheight of each row individually.
+  // Use xheight and ascrise values of the rows where ascenders were found.
+  get_min_max_xheight(block->line_size, &min_height, &max_height);
+  STATS row_asc_xheights(min_height, max_height + 1);
+  STATS row_asc_ascrise(static_cast<int>(min_height * asc_frac_xheight),
+                        static_cast<int>(max_height * asc_frac_xheight) + 1);
+  int min_desc_height = static_cast<int>(min_height * desc_frac_xheight);
+  int max_desc_height = static_cast<int>(max_height * desc_frac_xheight);
+  STATS row_asc_descdrop(min_desc_height, max_desc_height + 1);
+  STATS row_desc_xheights(min_height, max_height + 1);
+  STATS row_desc_descdrop(min_desc_height, max_desc_height + 1);
+  STATS row_cap_xheights(min_height, max_height + 1);
+  STATS row_cap_floating_xheights(min_height, max_height + 1);
+  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+    row = row_it.data();
+    // Compute the xheight of this row if it has not been computed before.
+    if (row->xheight <= 0.0) {
+      compute_row_xheight(row, block->block->classify_rotation(),
+                          gradient, block->line_size);
+    }
+    ROW_CATEGORY row_category = get_row_category(row);
+    if (row_category == ROW_ASCENDERS_FOUND) {
+      row_asc_xheights.add(static_cast<int32_t>(row->xheight),
+                           row->xheight_evidence);
+      row_asc_ascrise.add(static_cast<int32_t>(row->ascrise),
+                          row->xheight_evidence);
+      row_asc_descdrop.add(static_cast<int32_t>(-row->descdrop),
+                           row->xheight_evidence);
+    } else if (row_category == ROW_DESCENDERS_FOUND) {
+      row_desc_xheights.add(static_cast<int32_t>(row->xheight),
+                            row->xheight_evidence);
+      row_desc_descdrop.add(static_cast<int32_t>(-row->descdrop),
+                            row->xheight_evidence);
+    } else if (row_category == ROW_UNKNOWN) {
+      fill_heights(row, gradient, min_height, max_height,
+                   &row_cap_xheights, &row_cap_floating_xheights);
+    }
+  }
+
+  float xheight = 0.0;
+  float ascrise = 0.0;
+  float descdrop = 0.0;
+  // Compute our best guess of xheight of this block.
+  if (row_asc_xheights.get_total() > 0) {
+    // Determine xheight from rows where ascenders were found.
+    xheight = row_asc_xheights.median();
+    ascrise = row_asc_ascrise.median();
+    descdrop = -row_asc_descdrop.median();
+  } else if (row_desc_xheights.get_total() > 0) {
+    // Determine xheight from rows where descenders were found.
+    xheight = row_desc_xheights.median();
+    descdrop = -row_desc_descdrop.median();
+  } else if (row_cap_xheights.get_total() > 0) {
+    // All the rows in the block were (a/de)scenderless.
+    // Try to search for two modes in row_cap_heights that could
+    // be the xheight and the capheight (e.g. some of the rows
+    // were lowercase, but did not have enough (a/de)scenders.
+    // If such two modes can not be found, this block is most
+    // likely all caps (or all small caps, in which case the code
+    // still works as intended).
+    compute_xheight_from_modes(&row_cap_xheights, &row_cap_floating_xheights,
+                               textord_single_height_mode &&
+                               block->block->classify_rotation().y() == 0.0,
+                               min_height, max_height, &(xheight), &(ascrise));
+    if (ascrise == 0) {  // assume only caps in the whole block
+      xheight = row_cap_xheights.median() * CCStruct::kXHeightCapRatio;
+    }
+  } else {  // default block sizes
+    xheight = block->line_size * CCStruct::kXHeightFraction;
+  }
+  // Correct xheight, ascrise and descdrop if necessary.
+  bool corrected_xheight = false;
+  if (xheight < textord_min_xheight) {
+    xheight = static_cast<float>(textord_min_xheight);
+    corrected_xheight = true;
+  }
+  if (corrected_xheight || ascrise <= 0.0) {
+    ascrise = xheight * asc_frac_xheight;
+  }
+  if (corrected_xheight || descdrop >= 0.0) {
+    descdrop = -(xheight * desc_frac_xheight);
+  }
+  block->xheight = xheight;
+
+  if (textord_debug_xheights) {
+    tprintf("Block average xheight=%.4f, ascrise=%.4f, descdrop=%.4f\n",
+            xheight, ascrise, descdrop);
+  }
+  // Correct xheight, ascrise, descdrop of rows based on block averages.
+  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+    correct_row_xheight(row_it.data(), xheight, ascrise, descdrop);
+  }
+}
+
+/**
+ * @name compute_row_xheight
+ *
+ * Estimate the xheight of this row.
+ * Compute the ascender rise and descender drop at the same time.
+ * Set xheigh_evidence to the number of blobs with the chosen xheight
+ * that appear in this row.
+ */
+void Textord::compute_row_xheight(TO_ROW *row,          // row to do
+                                  const FCOORD& rotation,
+                                  float gradient,       // global skew
+                                  int block_line_size) {
+  // Find blobs representing repeated characters in rows and mark them.
+  // This information is used for computing row xheight and at a later
+  // stage when words are formed by make_words.
+  if (!row->rep_chars_marked()) {
+    mark_repeated_chars(row);
+  }
+
+  int min_height, max_height;
+  get_min_max_xheight(block_line_size, &min_height, &max_height);
+  STATS heights(min_height, max_height + 1);
+  STATS floating_heights(min_height, max_height + 1);
+  fill_heights(row, gradient, min_height, max_height,
+               &heights, &floating_heights);
+  row->ascrise = 0.0f;
+  row->xheight = 0.0f;
+  row->xheight_evidence =
+    compute_xheight_from_modes(&heights, &floating_heights,
+                               textord_single_height_mode &&
+                               rotation.y() == 0.0,
+                               min_height, max_height,
+                               &(row->xheight), &(row->ascrise));
+  row->descdrop = 0.0f;
+  if (row->xheight > 0.0) {
+    row->descdrop = static_cast<float>(
+        compute_row_descdrop(row, gradient, row->xheight_evidence, &heights));
+  }
+}
+
+/**
+ * @name fill_heights
+ *
+ * Fill the given heights with heights of the blobs that are legal
+ * candidates for estimating xheight.
+ */
+void fill_heights(TO_ROW *row, float gradient, int min_height,
+                  int max_height, STATS *heights, STATS *floating_heights) {
+  float xcentre;                 // centre of blob
+  float top;                     // top y coord of blob
+  float height;                  // height of blob
+  BLOBNBOX *blob;                // current blob
+  int repeated_set;
+  BLOBNBOX_IT blob_it = row->blob_list();
+  if (blob_it.empty()) return;  // no blobs in this row
+  bool has_rep_chars =
+    row->rep_chars_marked() && row->num_repeated_sets() > 0;
+  do {
+    blob = blob_it.data();
+    if (!blob->joined_to_prev()) {
+      xcentre = (blob->bounding_box().left() +
+                 blob->bounding_box().right()) / 2.0f;
+      top = blob->bounding_box().top();
+      height = blob->bounding_box().height();
+      if (textord_fix_xheight_bug)
+        top -= row->baseline.y(xcentre);
+      else
+        top -= gradient * xcentre + row->parallel_c();
+      if (top >= min_height && top <= max_height) {
+        heights->add(static_cast<int32_t>(floor(top + 0.5)), 1);
+        if (height / top < textord_min_blob_height_fraction) {
+          floating_heights->add(static_cast<int32_t>(floor(top + 0.5)), 1);
+        }
+      }
+    }
+    // Skip repeated chars, since they are likely to skew the height stats.
+    if (has_rep_chars && blob->repeated_set() != 0) {
+      repeated_set = blob->repeated_set();
+      blob_it.forward();
+      while (!blob_it.at_first() &&
+             blob_it.data()->repeated_set() == repeated_set) {
+        blob_it.forward();
+        if (textord_debug_xheights)
+          tprintf("Skipping repeated char when computing xheight\n");
+      }
+    } else {
+      blob_it.forward();
+    }
+  } while (!blob_it.at_first());
+}
+
+/**
+ * @name compute_xheight_from_modes
+ *
+ * Given a STATS object heights, looks for two most frequently occurring
+ * heights that look like xheight and xheight + ascrise. If found, sets
+ * the values of *xheight and *ascrise accordingly, otherwise sets xheight
+ * to any most frequently occurring height and sets *ascrise to 0.
+ * Returns the number of times xheight occurred in heights.
+ * For each mode that is considered for being an xheight the count of
+ * floating blobs (stored in floating_heights) is subtracted from the
+ * total count of the blobs of this height. This is done because blobs
+ * that sit far above the baseline could represent valid ascenders, but
+ * it is highly unlikely that such a character's height will be an xheight
+ * (e.g.  -, ', =, ^, `, ", ', etc)
+ * If cap_only, then force finding of only the top mode.
+ */
+int compute_xheight_from_modes(
+    STATS *heights, STATS *floating_heights, bool cap_only, int min_height,
+    int max_height, float *xheight, float *ascrise) {
+  int blob_index = heights->mode();  // find mode
+  int blob_count = heights->pile_count(blob_index);  // get count of mode
+  if (textord_debug_xheights) {
+    tprintf("min_height=%d, max_height=%d, mode=%d, count=%d, total=%d\n",
+            min_height, max_height, blob_index, blob_count,
+            heights->get_total());
+    heights->print();
+    floating_heights->print();
+  }
+  if (blob_count == 0) return 0;
+  int modes[MAX_HEIGHT_MODES];  // biggest piles
+  bool in_best_pile = false;
+  int prev_size = -INT32_MAX;
+  int best_count = 0;
+  int mode_count = compute_height_modes(heights, min_height, max_height,
+                                        modes, MAX_HEIGHT_MODES);
+  if (cap_only && mode_count > 1)
+    mode_count = 1;
+  int x;
+  if (textord_debug_xheights) {
+    tprintf("found %d modes: ", mode_count);
+    for (x = 0; x < mode_count; x++) tprintf("%d ", modes[x]);
+    tprintf("\n");
+  }
+
+  for (x = 0; x < mode_count - 1; x++) {
+    if (modes[x] != prev_size + 1)
+      in_best_pile = false;    // had empty height
+    int modes_x_count = heights->pile_count(modes[x]) -
+      floating_heights->pile_count(modes[x]);
+    if ((modes_x_count >= blob_count * textord_xheight_mode_fraction) &&
+        (in_best_pile || modes_x_count > best_count)) {
+      for (int asc = x + 1; asc < mode_count; asc++) {
+        float ratio =
+          static_cast<float>(modes[asc]) / static_cast<float>(modes[x]);
+        if (textord_ascx_ratio_min < ratio &&
+            ratio < textord_ascx_ratio_max &&
+            (heights->pile_count(modes[asc]) >=
+             blob_count * textord_ascheight_mode_fraction)) {
+          if (modes_x_count > best_count) {
+            in_best_pile = true;
+            best_count = modes_x_count;
+          }
+          if (textord_debug_xheights) {
+            tprintf("X=%d, asc=%d, count=%d, ratio=%g\n",
+                    modes[x], modes[asc]-modes[x], modes_x_count, ratio);
+          }
+          prev_size = modes[x];
+          *xheight = static_cast<float>(modes[x]);
+          *ascrise = static_cast<float>(modes[asc] - modes[x]);
+        }
+      }
+    }
+  }
+  if (*xheight == 0) {  // single mode
+    // Remove counts of the "floating" blobs (the one whose height is too
+    // small in relation to it's top end of the bounding box) from heights
+    // before computing the single-mode xheight.
+    // Restore the counts in heights after the mode is found, since
+    // floating blobs might be useful for determining potential ascenders
+    // in compute_row_descdrop().
+    if (floating_heights->get_total() > 0) {
+      for (x = min_height; x < max_height; ++x) {
+        heights->add(x, -(floating_heights->pile_count(x)));
+      }
+      blob_index = heights->mode();  // find the modified mode
+      for (x = min_height; x < max_height; ++x) {
+        heights->add(x, floating_heights->pile_count(x));
+      }
+    }
+    *xheight = static_cast<float>(blob_index);
+    *ascrise = 0.0f;
+    best_count = heights->pile_count(blob_index);
+    if (textord_debug_xheights)
+      tprintf("Single mode xheight set to %g\n", *xheight);
+  } else if (textord_debug_xheights) {
+    tprintf("Multi-mode xheight set to %g, asc=%g\n", *xheight, *ascrise);
+  }
+  return best_count;
+}
+
+/**
+ * @name compute_row_descdrop
+ *
+ * Estimates the descdrop of this row. This function looks for
+ * "significant" descenders of lowercase letters (those that could
+ * not just be the small descenders of upper case letters like Q,J).
+ * The function also takes into account how many potential ascenders
+ * this row might contain. If the number of potential ascenders along
+ * with descenders is close to the expected fraction of the total
+ * number of blobs in the row, the function returns the descender
+ * height, returns 0 otherwise.
+ */
+int32_t compute_row_descdrop(TO_ROW *row, float gradient,
+                           int xheight_blob_count, STATS *asc_heights) {
+  // Count how many potential ascenders are in this row.
+  int i_min = asc_heights->min_bucket();
+  if ((i_min / row->xheight) < textord_ascx_ratio_min) {
+    i_min = static_cast<int>(
+        floor(row->xheight * textord_ascx_ratio_min + 0.5));
+  }
+  int i_max = asc_heights->max_bucket();
+  if ((i_max / row->xheight) > textord_ascx_ratio_max) {
+    i_max = static_cast<int>(floor(row->xheight * textord_ascx_ratio_max));
+  }
+  int num_potential_asc = 0;
+  for (int i = i_min; i <= i_max; ++i) {
+    num_potential_asc += asc_heights->pile_count(i);
+  }
+  auto min_height =
+    static_cast<int32_t>(floor(row->xheight * textord_descx_ratio_min + 0.5));
+  auto max_height =
+    static_cast<int32_t>(floor(row->xheight * textord_descx_ratio_max));
+  float xcentre;                 // centre of blob
+  float height;                  // height of blob
+  BLOBNBOX_IT blob_it = row->blob_list();
+  BLOBNBOX *blob;                // current blob
+  STATS heights (min_height, max_height + 1);
+  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+    blob = blob_it.data();
+    if (!blob->joined_to_prev()) {
+      xcentre = (blob->bounding_box().left() +
+                 blob->bounding_box().right()) / 2.0f;
+      height = (gradient * xcentre + row->parallel_c() -
+                blob->bounding_box().bottom());
+      if (height >= min_height && height <= max_height)
+        heights.add(static_cast<int>(floor(height + 0.5)), 1);
+    }
+  }
+  int blob_index = heights.mode();  // find mode
+  int blob_count = heights.pile_count(blob_index);  // get count of mode
+  float total_fraction =
+    (textord_descheight_mode_fraction + textord_ascheight_mode_fraction);
+  if (static_cast<float>(blob_count + num_potential_asc) <
+      xheight_blob_count * total_fraction) {
+    blob_count = 0;
+  }
+  int descdrop = blob_count > 0 ? -blob_index : 0;
+  if (textord_debug_xheights) {
+    tprintf("Descdrop: %d (potential ascenders %d, descenders %d)\n",
+            descdrop, num_potential_asc, blob_count);
+    heights.print();
+  }
+  return descdrop;
+}
+
+
+/**
+ * @name compute_height_modes
+ *
+ * Find the top maxmodes values in the input array and put their
+ * indices in the output in the order in which they occurred.
+ */
+int32_t compute_height_modes(STATS *heights,    // stats to search
+                           int32_t min_height,  // bottom of range
+                           int32_t max_height,  // top of range
+                           int32_t *modes,      // output array
+                           int32_t maxmodes) {  // size of modes
+  int32_t pile_count;              // no in source pile
+  int32_t src_count;               // no of source entries
+  int32_t src_index;               // current entry
+  int32_t least_count;             // height of smalllest
+  int32_t least_index;             // index of least
+  int32_t dest_count;              // index in modes
+
+  src_count = max_height + 1 - min_height;
+  dest_count = 0;
+  least_count = INT32_MAX;
+  least_index = -1;
+  for (src_index = 0; src_index < src_count; src_index++) {
+    pile_count = heights->pile_count(min_height + src_index);
+    if (pile_count > 0) {
+      if (dest_count < maxmodes) {
+        if (pile_count < least_count) {
+          // find smallest in array
+          least_count = pile_count;
+          least_index = dest_count;
+        }
+        modes[dest_count++] = min_height + src_index;
+      } else if (pile_count >= least_count) {
+        while (least_index < maxmodes - 1) {
+          modes[least_index] = modes[least_index + 1];
+          // shuffle up
+          least_index++;
+        }
+        // new one on end
+        modes[maxmodes - 1] = min_height + src_index;
+        if (pile_count == least_count) {
+          // new smallest
+          least_index = maxmodes - 1;
+        } else {
+          least_count = heights->pile_count(modes[0]);
+          least_index = 0;
+          for (dest_count = 1; dest_count < maxmodes; dest_count++) {
+            pile_count = heights->pile_count(modes[dest_count]);
+            if (pile_count < least_count) {
+              // find smallest
+              least_count = pile_count;
+              least_index = dest_count;
+            }
+          }
+        }
+      }
+    }
+  }
+  return dest_count;
+}
+
+
+/**
+ * @name correct_row_xheight
+ *
+ * Adjust the xheight etc of this row if not within reasonable limits
+ * of the average for the block.
+ */
+void correct_row_xheight(TO_ROW *row, float xheight,
+                         float ascrise, float descdrop) {
+  ROW_CATEGORY row_category = get_row_category(row);
+  if (textord_debug_xheights) {
+    tprintf("correcting row xheight: row->xheight %.4f"
+            ", row->acrise %.4f row->descdrop %.4f\n",
+            row->xheight, row->ascrise, row->descdrop);
+  }
+  bool normal_xheight =
+    within_error_margin(row->xheight, xheight, textord_xheight_error_margin);
+  bool cap_xheight =
+    within_error_margin(row->xheight, xheight + ascrise,
+                        textord_xheight_error_margin);
+  // Use the average xheight/ascrise for the following cases:
+  // -- the xheight of the row could not be determined at all
+  // -- the row has descenders (e.g. "many groups", "ISBN 12345 p.3")
+  //    and its xheight is close to either cap height or average xheight
+  // -- the row does not have ascenders or descenders, but its xheight
+  //    is close to the average block xheight (e.g. row with "www.mmm.com")
+  if (row_category == ROW_ASCENDERS_FOUND) {
+    if (row->descdrop >= 0.0) {
+      row->descdrop = row->xheight * (descdrop / xheight);
+    }
+  } else if (row_category == ROW_INVALID ||
+             (row_category == ROW_DESCENDERS_FOUND &&
+              (normal_xheight || cap_xheight)) ||
+              (row_category == ROW_UNKNOWN && normal_xheight)) {
+    if (textord_debug_xheights) tprintf("using average xheight\n");
+    row->xheight = xheight;
+    row->ascrise = ascrise;
+    row->descdrop = descdrop;
+  } else if (row_category == ROW_DESCENDERS_FOUND) {
+    // Assume this is a row with mostly lowercase letters and it's xheight
+    // is computed correctly (unfortunately there is no way to distinguish
+    // this from the case when descenders are found, but the most common
+    // height is capheight).
+    if (textord_debug_xheights) tprintf("lowercase, corrected ascrise\n");
+    row->ascrise = row->xheight * (ascrise / xheight);
+  } else if (row_category == ROW_UNKNOWN) {
+  // Otherwise assume this row is an all-caps or small-caps row
+  // and adjust xheight and ascrise of the row.
+
+    row->all_caps = true;
+    if (cap_xheight) { // regular all caps
+      if (textord_debug_xheights) tprintf("all caps\n");
+      row->xheight = xheight;
+      row->ascrise = ascrise;
+      row->descdrop = descdrop;
+    } else {  // small caps or caps with an odd xheight
+      if (textord_debug_xheights) {
+        if (row->xheight < xheight + ascrise && row->xheight > xheight) {
+          tprintf("small caps\n");
+        } else {
+          tprintf("all caps with irregular xheight\n");
+        }
+      }
+      row->ascrise = row->xheight * (ascrise / (xheight + ascrise));
+      row->xheight -= row->ascrise;
+      row->descdrop = row->xheight * (descdrop / xheight);
+    }
+  }
+  if (textord_debug_xheights) {
+    tprintf("corrected row->xheight = %.4f, row->acrise = %.4f, row->descdrop"
+            " = %.4f\n", row->xheight, row->ascrise, row->descdrop);
+  }
+}
+
+static int CountOverlaps(const TBOX& box, int min_height,
+                         BLOBNBOX_LIST* blobs) {
+  int overlaps = 0;
+  BLOBNBOX_IT blob_it(blobs);
+  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+    BLOBNBOX* blob = blob_it.data();
+    const TBOX &blob_box = blob->bounding_box();
+    if (blob_box.height() >= min_height && box.major_overlap(blob_box)) {
+      ++overlaps;
+    }
+  }
+  return overlaps;
+}
+
+/**
+ * @name separate_underlines
+ *
+ * Test wide objects for being potential underlines. If they are then
+ * put them in a separate list in the block.
+ */
+void separate_underlines(TO_BLOCK* block,  // block to do
+                         float gradient,   // skew angle
+                         FCOORD rotation,  // inverse landscape
+                         bool testing_on) {  // correct orientation
+  BLOBNBOX *blob;                // current blob
+  C_BLOB *rotated_blob;          // rotated blob
+  TO_ROW *row;                   // current row
+  float length;                  // of g_vec
+  TBOX blob_box;
+  FCOORD blob_rotation;          // inverse of rotation
+  FCOORD g_vec;                  // skew rotation
+  BLOBNBOX_IT blob_it;           // iterator
+                                 // iterator
+  BLOBNBOX_IT under_it = &block->underlines;
+  BLOBNBOX_IT large_it = &block->large_blobs;
+  TO_ROW_IT row_it = block->get_rows();
+  int min_blob_height = static_cast<int>(textord_min_blob_height_fraction *
+                                         block->line_size + 0.5);
+
+                                 // length of vector
+  length = sqrt(1 + gradient * gradient);
+  g_vec = FCOORD(1 / length, -gradient / length);
+  blob_rotation = FCOORD(rotation.x(), -rotation.y());
+  blob_rotation.rotate(g_vec);  // undoing everything
+  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+    row = row_it.data();
+                                 // get blobs
+    blob_it.set_to_list(row->blob_list());
+    for (blob_it.mark_cycle_pt(); !blob_it.cycled_list();
+         blob_it.forward()) {
+      blob = blob_it.data();
+      blob_box = blob->bounding_box();
+      if (blob_box.width() > block->line_size * textord_underline_width) {
+        ASSERT_HOST(blob->cblob() != nullptr);
+        rotated_blob = crotate_cblob (blob->cblob(),
+          blob_rotation);
+        if (test_underline(
+            testing_on && textord_show_final_rows,
+            rotated_blob, static_cast<int16_t>(row->intercept()),
+            static_cast<int16_t>(
+                block->line_size *
+                (tesseract::CCStruct::kXHeightFraction +
+                 tesseract::CCStruct::kAscenderFraction / 2.0f)))) {
+          under_it.add_after_then_move(blob_it.extract());
+          if (testing_on && textord_show_final_rows) {
+            tprintf("Underlined blob at:");
+              rotated_blob->bounding_box().print();
+            tprintf("Was:");
+              blob_box.print();
+          }
+        } else if (CountOverlaps(blob->bounding_box(), min_blob_height,
+                                 row->blob_list()) >
+                   textord_max_blob_overlaps) {
+          large_it.add_after_then_move(blob_it.extract());
+          if (testing_on && textord_show_final_rows) {
+            tprintf("Large blob overlaps %d blobs at:",
+                    CountOverlaps(blob_box, min_blob_height,
+                                  row->blob_list()));
+            blob_box.print();
+          }
+        }
+        delete rotated_blob;
+      }
+    }
+  }
+}
+
+
+/**
+ * @name pre_associate_blobs
+ *
+ * Associate overlapping blobs and fake chop wide blobs.
+ */
+void pre_associate_blobs(                  //make rough chars
+        ICOORD page_tr,   //top right
+        TO_BLOCK* block,  //block to do
+        FCOORD rotation,  //inverse landscape
+        bool testing_on  //correct orientation
+) {
+#ifndef GRAPHICS_DISABLED
+  ScrollView::Color colour;                 //of boxes
+#endif
+  BLOBNBOX *blob;                //current blob
+  BLOBNBOX *nextblob;            //next in list
+  TBOX blob_box;
+  FCOORD blob_rotation;          //inverse of rotation
+  BLOBNBOX_IT blob_it;           //iterator
+  BLOBNBOX_IT start_it;          //iterator
+  TO_ROW_IT row_it = block->get_rows ();
+
+#ifndef GRAPHICS_DISABLED
+  colour = ScrollView::RED;
+#endif
+
+  blob_rotation = FCOORD (rotation.x (), -rotation.y ());
+  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+                                 //get blobs
+    blob_it.set_to_list (row_it.data ()->blob_list ());
+    for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
+    blob_it.forward ()) {
+      blob = blob_it.data ();
+      blob_box = blob->bounding_box ();
+      start_it = blob_it;        //save start point
+      //                      if (testing_on && textord_show_final_blobs)
+      //                      {
+      //                              tprintf("Blob at (%d,%d)->(%d,%d), addr=%x, count=%d\n",
+      //                                      blob_box.left(),blob_box.bottom(),
+      //                                      blob_box.right(),blob_box.top(),
+      //                                      (void*)blob,blob_it.length());
+      //                      }
+      bool overlap;
+      do {
+        overlap = false;
+        if (!blob_it.at_last ()) {
+          nextblob = blob_it.data_relative(1);
+          overlap = blob_box.major_x_overlap(nextblob->bounding_box());
+          if (overlap) {
+            blob->merge(nextblob); // merge new blob
+            blob_box = blob->bounding_box(); // get bigger box
+            blob_it.forward();
+          }
+        }
+      }
+      while (overlap);
+      blob->chop (&start_it, &blob_it,
+        blob_rotation,
+        block->line_size * tesseract::CCStruct::kXHeightFraction *
+        textord_chop_width);
+      //attempt chop
+    }
+#ifndef GRAPHICS_DISABLED
+    if (testing_on && textord_show_final_blobs) {
+      if (to_win == nullptr)
+        create_to_win(page_tr);
+      to_win->Pen(colour);
+      for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
+      blob_it.forward ()) {
+        blob = blob_it.data ();
+        blob_box = blob->bounding_box ();
+        blob_box.rotate (rotation);
+        if (!blob->joined_to_prev ()) {
+          to_win->Rectangle (blob_box.left (), blob_box.bottom (),
+            blob_box.right (), blob_box.top ());
+        }
+      }
+      colour = static_cast<ScrollView::Color>(colour + 1);
+      if (colour > ScrollView::MAGENTA)
+        colour = ScrollView::RED;
+    }
+#endif
+  }
+}
+
+
+/**
+ * @name fit_parallel_rows
+ *
+ * Re-fit the rows in the block to the given gradient.
+ */
+void fit_parallel_rows(                   //find lines
+        TO_BLOCK* block,   //block to do
+        float gradient,    //gradient to fit
+        FCOORD rotation,   //for drawing
+        int32_t block_edge,  //edge of block
+        bool testing_on   //correct orientation
+) {
+#ifndef GRAPHICS_DISABLED
+  ScrollView::Color colour;                 //of row
+#endif
+  TO_ROW_IT row_it = block->get_rows ();
+
+  row_it.move_to_first ();
+  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+    if (row_it.data ()->blob_list ()->empty ())
+      delete row_it.extract ();  //nothing in it
+    else
+      fit_parallel_lms (gradient, row_it.data ());
+  }
+#ifndef GRAPHICS_DISABLED
+  if (testing_on) {
+    colour = ScrollView::RED;
+    for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+      plot_parallel_row (row_it.data (), gradient,
+        block_edge, colour, rotation);
+      colour = static_cast<ScrollView::Color>(colour + 1);
+      if (colour > ScrollView::MAGENTA)
+        colour = ScrollView::RED;
+    }
+  }
+#endif
+  row_it.sort (row_y_order);     //may have gone out of order
+}
+
+
+/**
+ * @name fit_parallel_lms
+ *
+ * Fit an LMS line to a row.
+ * Make the fit parallel to the given gradient and set the
+ * row accordingly.
+ */
+void fit_parallel_lms(float gradient, TO_ROW *row) {
+  float c;                       // fitted line
+  int blobcount;                 // no of blobs
+   tesseract::DetLineFit lms;
+  BLOBNBOX_IT blob_it = row->blob_list();
+
+  blobcount = 0;
+  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+    if (!blob_it.data()->joined_to_prev()) {
+      const TBOX& box = blob_it.data()->bounding_box();
+      lms.Add(ICOORD((box.left() + box.right()) / 2, box.bottom()));
+      blobcount++;
+    }
+  }
+  double error = lms.ConstrainedFit(gradient, &c);
+  row->set_parallel_line(gradient, c, error);
+  if (textord_straight_baselines && blobcount > textord_lms_line_trials) {
+    error = lms.Fit(&gradient, &c);
+  }
+                                 //set the other too
+  row->set_line(gradient, c, error);
+}
+
+
+/**
+ * @name make_spline_rows
+ *
+ * Re-fit the rows in the block to the given gradient.
+ */
+void Textord::make_spline_rows(TO_BLOCK* block,   // block to do
+                               float gradient,    // gradient to fit
+                               bool testing_on) {
+#ifndef GRAPHICS_DISABLED
+  ScrollView::Color colour;       //of row
+#endif
+  TO_ROW_IT row_it = block->get_rows ();
+
+  row_it.move_to_first ();
+  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+    if (row_it.data ()->blob_list ()->empty ())
+      delete row_it.extract ();  //nothing in it
+    else
+      make_baseline_spline (row_it.data (), block);
+  }
+  if (textord_old_baselines) {
+#ifndef GRAPHICS_DISABLED
+    if (testing_on) {
+      colour = ScrollView::RED;
+      for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
+      row_it.forward ()) {
+        row_it.data ()->baseline.plot (to_win, colour);
+        colour = static_cast<ScrollView::Color>(colour + 1);
+        if (colour > ScrollView::MAGENTA)
+          colour = ScrollView::RED;
+      }
+    }
+#endif
+    make_old_baselines(block, testing_on, gradient);
+  }
+#ifndef GRAPHICS_DISABLED
+  if (testing_on) {
+    colour = ScrollView::RED;
+    for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+      row_it.data ()->baseline.plot (to_win, colour);
+      colour = static_cast<ScrollView::Color>(colour + 1);
+      if (colour > ScrollView::MAGENTA)
+        colour = ScrollView::RED;
+    }
+  }
+#endif
+}
+
+/**
+ * @name make_baseline_spline
+ *
+ * Fit an LMS line to a row.
+ * Make the fit parallel to the given gradient and set the
+ * row accordingly.
+ */
+void make_baseline_spline(TO_ROW *row,     //row to fit
+                          TO_BLOCK *block) {
+  double *coeffs;                // quadratic coeffs
+  int32_t segments;              // no of segments
+
+  // spline boundaries
+  auto *xstarts = new int32_t[row->blob_list()->length() + 1];
+  if (segment_baseline(row, block, segments, xstarts)
+  && !textord_straight_baselines && !textord_parallel_baselines) {
+    coeffs = linear_spline_baseline(row, block, segments, xstarts);
+  } else {
+    xstarts[1] = xstarts[segments];
+    segments = 1;
+    coeffs = new double[3];
+    coeffs[0] = 0;
+    coeffs[1] = row->line_m ();
+    coeffs[2] = row->line_c ();
+  }
+  row->baseline = QSPLINE (segments, xstarts, coeffs);
+  delete[] coeffs;
+  delete[] xstarts;
+}
+
+
+/**
+ * @name segment_baseline
+ *
+ * Divide the baseline up into segments which require a different
+ * quadratic fitted to them.
+ * Return true if enough blobs were far enough away to need a quadratic.
+ */
+bool
+segment_baseline(               //split baseline
+        TO_ROW* row,                    //row to fit
+        TO_BLOCK* block,                //block it came from
+        int32_t& segments,                //no fo segments
+        int32_t* xstarts                  //coords of segments
+) {
+  bool needs_curve;             //needs curved line
+  int blobcount;                 //no of blobs
+  int blobindex;                 //current blob
+  int last_state;                //above, on , below
+  int state;                     //of current blob
+  float yshift;                  //from baseline
+  TBOX box;                       //blob box
+  TBOX new_box;                   //new_it box
+  float middle;                  //xcentre of blob
+                                 //blobs
+  BLOBNBOX_IT blob_it = row->blob_list ();
+  BLOBNBOX_IT new_it = blob_it;  //front end
+  SORTED_FLOATS yshifts;         //shifts from baseline
+
+  needs_curve = false;
+  box = box_next_pre_chopped (&blob_it);
+  xstarts[0] = box.left ();
+  segments = 1;
+  blobcount = row->blob_list ()->length ();
+  if (textord_oldbl_debug)
+    tprintf ("Segmenting baseline of %d blobs at (%d,%d)\n",
+      blobcount, box.left (), box.bottom ());
+  if (blobcount <= textord_spline_medianwin
+  || blobcount < textord_spline_minblobs) {
+    blob_it.move_to_last ();
+    box = blob_it.data ()->bounding_box ();
+    xstarts[1] = box.right ();
+    return false;
+  }
+  last_state = 0;
+  new_it.mark_cycle_pt ();
+  for (blobindex = 0; blobindex < textord_spline_medianwin; blobindex++) {
+    new_box = box_next_pre_chopped (&new_it);
+    middle = (new_box.left () + new_box.right ()) / 2.0;
+    yshift = new_box.bottom () - row->line_m () * middle - row->line_c ();
+                                 //record shift
+    yshifts.add (yshift, blobindex);
+    if (new_it.cycled_list ()) {
+      xstarts[1] = new_box.right ();
+      return false;
+    }
+  }
+  for (blobcount = 0; blobcount < textord_spline_medianwin / 2; blobcount++)
+    box = box_next_pre_chopped (&blob_it);
+  do {
+    new_box = box_next_pre_chopped (&new_it);
+                                 //get middle one
+    yshift = yshifts[textord_spline_medianwin / 2];
+    if (yshift > textord_spline_shift_fraction * block->line_size)
+      state = 1;
+    else if (-yshift > textord_spline_shift_fraction * block->line_size)
+      state = -1;
+    else
+      state = 0;
+    if (state != 0)
+      needs_curve = true;
+    //              tprintf("State=%d, prev=%d, shift=%g\n",
+    //                      state,last_state,yshift);
+    if (state != last_state && blobcount > textord_spline_minblobs) {
+      xstarts[segments++] = box.left ();
+      blobcount = 0;
+    }
+    last_state = state;
+    yshifts.remove (blobindex - textord_spline_medianwin);
+    box = box_next_pre_chopped (&blob_it);
+    middle = (new_box.left () + new_box.right ()) / 2.0;
+    yshift = new_box.bottom () - row->line_m () * middle - row->line_c ();
+    yshifts.add (yshift, blobindex);
+    blobindex++;
+    blobcount++;
+  }
+  while (!new_it.cycled_list ());
+  if (blobcount > textord_spline_minblobs || segments == 1) {
+    xstarts[segments] = new_box.right ();
+  }
+  else {
+    xstarts[--segments] = new_box.right ();
+  }
+  if (textord_oldbl_debug)
+    tprintf ("Made %d segments on row at (%d,%d)\n",
+      segments, box.right (), box.bottom ());
+  return needs_curve;
+}
+
+
+/**
+ * @name linear_spline_baseline
+ *
+ * Divide the baseline up into segments which require a different
+ * quadratic fitted to them.
+ * @return true if enough blobs were far enough away to need a quadratic.
+ */
+double *
+linear_spline_baseline (         //split baseline
+TO_ROW * row,                    //row to fit
+TO_BLOCK * block,                //block it came from
+int32_t & segments,              //no fo segments
+int32_t xstarts[]                //coords of segments
+) {
+  int blobcount;                 //no of blobs
+  int blobindex;                 //current blob
+  int index1, index2;            //blob numbers
+  int blobs_per_segment;         //blobs in each
+  TBOX box;                      //blob box
+  TBOX new_box;                  //new_it box
+                                 //blobs
+  BLOBNBOX_IT blob_it = row->blob_list ();
+  BLOBNBOX_IT new_it = blob_it;  //front end
+  float b, c;                    //fitted curve
+  tesseract::DetLineFit lms;
+  int32_t segment;               //current segment
+
+  box = box_next_pre_chopped (&blob_it);
+  xstarts[0] = box.left ();
+  blobcount = 1;
+  while (!blob_it.at_first ()) {
+    blobcount++;
+    box = box_next_pre_chopped (&blob_it);
+  }
+  segments = blobcount / textord_spline_medianwin;
+  if (segments < 1)
+    segments = 1;
+  blobs_per_segment = blobcount / segments;
+  // quadratic coeffs
+  auto *coeffs = new double[segments * 3];
+  if (textord_oldbl_debug)
+    tprintf
+      ("Linear splining baseline of %d blobs at (%d,%d), into %d segments of %d blobs\n",
+      blobcount, box.left (), box.bottom (), segments, blobs_per_segment);
+  segment = 1;
+  for (index2 = 0; index2 < blobs_per_segment / 2; index2++)
+    box_next_pre_chopped(&new_it);
+  index1 = 0;
+  blobindex = index2;
+  do {
+    blobindex += blobs_per_segment;
+    lms.Clear();
+    while (index1 < blobindex || (segment == segments && index1 < blobcount)) {
+      box = box_next_pre_chopped (&blob_it);
+      int middle = (box.left() + box.right()) / 2;
+      lms.Add(ICOORD(middle, box.bottom()));
+      index1++;
+      if (index1 == blobindex - blobs_per_segment / 2
+      || index1 == blobcount - 1) {
+        xstarts[segment] = box.left ();
+      }
+    }
+    lms.Fit(&b, &c);
+    coeffs[segment * 3 - 3] = 0;
+    coeffs[segment * 3 - 2] = b;
+    coeffs[segment * 3 - 1] = c;
+    segment++;
+    if (segment > segments)
+      break;
+
+    blobindex += blobs_per_segment;
+    lms.Clear();
+    while (index2 < blobindex || (segment == segments && index2 < blobcount)) {
+      new_box = box_next_pre_chopped (&new_it);
+      int middle = (new_box.left() + new_box.right()) / 2;
+      lms.Add(ICOORD (middle, new_box.bottom()));
+      index2++;
+      if (index2 == blobindex - blobs_per_segment / 2
+      || index2 == blobcount - 1) {
+        xstarts[segment] = new_box.left ();
+      }
+    }
+    lms.Fit(&b, &c);
+    coeffs[segment * 3 - 3] = 0;
+    coeffs[segment * 3 - 2] = b;
+    coeffs[segment * 3 - 1] = c;
+    segment++;
+  }
+  while (segment <= segments);
+  return coeffs;
+}
+
+
+/**
+ * @name assign_blobs_to_rows
+ *
+ * Make enough rows to allocate all the given blobs to one.
+ * If a block skew is given, use that, else attempt to track it.
+ */
+void assign_blobs_to_rows(                      //find lines
+        TO_BLOCK* block,      //block to do
+        float* gradient,      //block skew
+        int pass,             //identification
+        bool reject_misses,  //chuck big ones out
+        bool make_new_rows,  //add rows for unmatched
+        bool drawing_skew    //draw smoothed skew
+) {
+  OVERLAP_STATE overlap_result;  //what to do with it
+  float ycoord;                  //current y
+  float top, bottom;             //of blob
+  float g_length = 1.0f;         //from gradient
+  int16_t row_count;               //no of rows
+  int16_t left_x;                  //left edge
+  int16_t last_x;                  //previous edge
+  float block_skew;              //y delta
+  float smooth_factor;           //for new coords
+  float near_dist;               //dist to nearest row
+  ICOORD testpt;                 //testing only
+  BLOBNBOX *blob;                //current blob
+  TO_ROW *row;                   //current row
+  TO_ROW *dest_row = nullptr;       //row to put blob in
+                                 //iterators
+  BLOBNBOX_IT blob_it = &block->blobs;
+  TO_ROW_IT row_it = block->get_rows ();
+
+  ycoord =
+    (block->block->pdblk.bounding_box ().bottom () +
+    block->block->pdblk.bounding_box ().top ()) / 2.0f;
+  if (gradient != nullptr)
+    g_length = sqrt (1 + *gradient * *gradient);
+#ifndef GRAPHICS_DISABLED
+  if (drawing_skew)
+    to_win->SetCursor(block->block->pdblk.bounding_box ().left (), ycoord);
+#endif
+  testpt = ICOORD (textord_test_x, textord_test_y);
+  blob_it.sort (blob_x_order);
+  smooth_factor = 1.0;
+  block_skew = 0.0f;
+  row_count = row_it.length ();  //might have rows
+  if (!blob_it.empty ()) {
+    left_x = blob_it.data ()->bounding_box ().left ();
+  }
+  else {
+    left_x = block->block->pdblk.bounding_box ().left ();
+  }
+  last_x = left_x;
+  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
+    blob = blob_it.data ();
+    if (gradient != nullptr) {
+      block_skew = (1 - 1 / g_length) * blob->bounding_box ().bottom ()
+        + *gradient / g_length * blob->bounding_box ().left ();
+    }
+    else if (blob->bounding_box ().left () - last_x > block->line_size / 2
+      && last_x - left_x > block->line_size * 2
+    && textord_interpolating_skew) {
+      //                      tprintf("Interpolating skew from %g",block_skew);
+      block_skew *= static_cast<float>(blob->bounding_box ().left () - left_x)
+        / (last_x - left_x);
+      //                      tprintf("to %g\n",block_skew);
+    }
+    last_x = blob->bounding_box ().left ();
+    top = blob->bounding_box ().top () - block_skew;
+    bottom = blob->bounding_box ().bottom () - block_skew;
+#ifndef GRAPHICS_DISABLED
+    if (drawing_skew)
+      to_win->DrawTo(blob->bounding_box ().left (), ycoord + block_skew);
+#endif
+    if (!row_it.empty ()) {
+      for (row_it.move_to_first ();
+        !row_it.at_last () && row_it.data ()->min_y () > top;
+        row_it.forward ());
+      row = row_it.data ();
+      if (row->min_y () <= top && row->max_y () >= bottom) {
+      //any overlap
+        dest_row = row;
+        overlap_result = most_overlapping_row (&row_it, dest_row,
+          top, bottom,
+          block->line_size,
+          blob->bounding_box ().
+          contains (testpt));
+        if (overlap_result == NEW_ROW && !reject_misses)
+          overlap_result = ASSIGN;
+      }
+      else {
+        overlap_result = NEW_ROW;
+        if (!make_new_rows) {
+          near_dist = row_it.data_relative (-1)->min_y () - top;
+                                 //below bottom
+          if (bottom < row->min_y ()) {
+            if (row->min_y () - bottom <=
+              (block->line_spacing -
+            block->line_size) * tesseract::CCStruct::kDescenderFraction) {
+                                 //done it
+              overlap_result = ASSIGN;
+              dest_row = row;
+            }
+          }
+          else if (near_dist > 0
+          && near_dist < bottom - row->max_y ()) {
+            row_it.backward ();
+            dest_row = row_it.data ();
+            if (dest_row->min_y () - bottom <=
+              (block->line_spacing -
+            block->line_size) * tesseract::CCStruct::kDescenderFraction) {
+                                 //done it
+              overlap_result = ASSIGN;
+            }
+          }
+          else {
+            if (top - row->max_y () <=
+              (block->line_spacing -
+              block->line_size) * (textord_overlap_x +
+            tesseract::CCStruct::kAscenderFraction)) {
+                                 //done it
+              overlap_result = ASSIGN;
+              dest_row = row;
+            }
+          }
+        }
+      }
+      if (overlap_result == ASSIGN)
+        dest_row->add_blob (blob_it.extract (), top, bottom,
+          block->line_size);
+      if (overlap_result == NEW_ROW) {
+        if (make_new_rows && top - bottom < block->max_blob_size) {
+          dest_row =
+            new TO_ROW (blob_it.extract (), top, bottom,
+            block->line_size);
+          row_count++;
+          if (bottom > row_it.data ()->min_y ())
+            row_it.add_before_then_move (dest_row);
+          //insert in right place
+          else
+            row_it.add_after_then_move (dest_row);
+          smooth_factor =
+            1.0 / (row_count * textord_skew_lag +
+            textord_skewsmooth_offset);
+        }
+        else
+          overlap_result = REJECT;
+      }
+    }
+    else if (make_new_rows && top - bottom < block->max_blob_size) {
+      overlap_result = NEW_ROW;
+      dest_row =
+        new TO_ROW(blob_it.extract(), top, bottom, block->line_size);
+      row_count++;
+      row_it.add_after_then_move(dest_row);
+      smooth_factor = 1.0 / (row_count * textord_skew_lag +
+                             textord_skewsmooth_offset2);
+    }
+    else
+      overlap_result = REJECT;
+    if (blob->bounding_box ().contains(testpt) && textord_debug_blob) {
+      if (overlap_result != REJECT) {
+        tprintf("Test blob assigned to row at (%g,%g) on pass %d\n",
+          dest_row->min_y(), dest_row->max_y(), pass);
+      }
+      else {
+        tprintf("Test blob assigned to no row on pass %d\n", pass);
+      }
+    }
+    if (overlap_result != REJECT) {
+      while (!row_it.at_first() &&
+             row_it.data()->min_y() > row_it.data_relative(-1)->min_y()) {
+        row = row_it.extract();
+        row_it.backward();
+        row_it.add_before_then_move(row);
+      }
+      while (!row_it.at_last() &&
+             row_it.data ()->min_y() < row_it.data_relative (1)->min_y()) {
+        row = row_it.extract();
+        row_it.forward();
+                                 // Keep rows in order.
+        row_it.add_after_then_move(row);
+      }
+      BLOBNBOX_IT added_blob_it(dest_row->blob_list());
+      added_blob_it.move_to_last();
+      TBOX prev_box = added_blob_it.data_relative(-1)->bounding_box();
+      if (dest_row->blob_list()->singleton() ||
+          !prev_box.major_x_overlap(blob->bounding_box())) {
+        block_skew = (1 - smooth_factor) * block_skew
+            + smooth_factor * (blob->bounding_box().bottom() -
+            dest_row->initial_min_y());
+      }
+    }
+  }
+  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+    if (row_it.data()->blob_list()->empty())
+      delete row_it.extract();  // Discard empty rows.
+  }
+}
+
+
+/**
+ * @name most_overlapping_row
+ *
+ * Return the row which most overlaps the blob.
+ */
+OVERLAP_STATE most_overlapping_row(                    //find best row
+        TO_ROW_IT* row_it,  //iterator
+        TO_ROW*& best_row,  //output row
+        float top,          //top of blob
+        float bottom,       //bottom of blob
+        float rowsize,      //max row size
+        bool testing_blob  //test stuff
+) {
+  OVERLAP_STATE result;          //result of tests
+  float overlap;                 //of blob & row
+  float bestover;                //nearest row
+  float merge_top, merge_bottom; //size of merged row
+  ICOORD testpt;                 //testing only
+  TO_ROW *row;                   //current row
+  TO_ROW *test_row;              //for multiple overlaps
+  BLOBNBOX_IT blob_it;           //for merging rows
+
+  result = ASSIGN;
+  row = row_it->data ();
+  bestover = top - bottom;
+  if (top > row->max_y ())
+    bestover -= top - row->max_y ();
+  if (bottom < row->min_y ())
+                                 //compute overlap
+    bestover -= row->min_y () - bottom;
+  if (testing_blob && textord_debug_blob) {
+    tprintf("Test blob y=(%g,%g), row=(%f,%f), size=%g, overlap=%f\n",
+            bottom, top, row->min_y(), row->max_y(), rowsize, bestover);
+  }
+  test_row = row;
+  do {
+    if (!row_it->at_last ()) {
+      row_it->forward ();
+      test_row = row_it->data ();
+      if (test_row->min_y () <= top && test_row->max_y () >= bottom) {
+        merge_top =
+          test_row->max_y () >
+          row->max_y ()? test_row->max_y () : row->max_y ();
+        merge_bottom =
+          test_row->min_y () <
+          row->min_y ()? test_row->min_y () : row->min_y ();
+        if (merge_top - merge_bottom <= rowsize) {
+          if (testing_blob && textord_debug_blob) {
+            tprintf ("Merging rows at (%g,%g), (%g,%g)\n",
+              row->min_y (), row->max_y (),
+              test_row->min_y (), test_row->max_y ());
+          }
+          test_row->set_limits (merge_bottom, merge_top);
+          blob_it.set_to_list (test_row->blob_list ());
+          blob_it.add_list_after (row->blob_list ());
+          blob_it.sort (blob_x_order);
+          row_it->backward ();
+          delete row_it->extract ();
+          row_it->forward ();
+          bestover = -1.0f;      //force replacement
+        }
+        overlap = top - bottom;
+        if (top > test_row->max_y ())
+          overlap -= top - test_row->max_y ();
+        if (bottom < test_row->min_y ())
+          overlap -= test_row->min_y () - bottom;
+        if (bestover >= rowsize - 1 && overlap >= rowsize - 1) {
+          result = REJECT;
+        }
+        if (overlap > bestover) {
+          bestover = overlap;    //find biggest overlap
+          row = test_row;
+        }
+        if (testing_blob && textord_debug_blob) {
+          tprintf("Test blob y=(%g,%g), row=(%f,%f), size=%g, overlap=%f->%f\n",
+                  bottom, top, test_row->min_y(), test_row->max_y(),
+                  rowsize, overlap, bestover);
+        }
+      }
+    }
+  }
+  while (!row_it->at_last ()
+    && test_row->min_y () <= top && test_row->max_y () >= bottom);
+  while (row_it->data () != row)
+    row_it->backward ();         //make it point to row
+                                 //doesn't overlap much
+  if (top - bottom - bestover > rowsize * textord_overlap_x &&
+      (!textord_fix_makerow_bug || bestover < rowsize * textord_overlap_x)
+    && result == ASSIGN)
+    result = NEW_ROW;            //doesn't overlap enough
+  best_row = row;
+  return result;
+}
+
+
+/**
+ * @name blob_x_order
+ *
+ * Sort function to sort blobs in x from page left.
+ */
+int blob_x_order(                    //sort function
+                 const void *item1,  //items to compare
+                 const void *item2) {
+                                 //converted ptr
+  const BLOBNBOX *blob1 = *reinterpret_cast<const BLOBNBOX* const*>(item1);
+                                 //converted ptr
+  const BLOBNBOX *blob2 = *reinterpret_cast<const BLOBNBOX* const*>(item2);
+
+  if (blob1->bounding_box ().left () < blob2->bounding_box ().left ())
+    return -1;
+  else if (blob1->bounding_box ().left () > blob2->bounding_box ().left ())
+    return 1;
+  else
+    return 0;
+}
+
+
+/**
+ * @name row_y_order
+ *
+ * Sort function to sort rows in y from page top.
+ */
+int row_y_order(                    //sort function
+                const void *item1,  //items to compare
+                const void *item2) {
+                                 //converted ptr
+  const TO_ROW *row1 = *reinterpret_cast<const TO_ROW* const*>(item1);
+                                 //converted ptr
+  const TO_ROW *row2 = *reinterpret_cast<const TO_ROW* const*>(item2);
+
+  if (row1->parallel_c () > row2->parallel_c ())
+    return -1;
+  else if (row1->parallel_c () < row2->parallel_c ())
+    return 1;
+  else
+    return 0;
+}
+
+
+/**
+ * @name row_spacing_order
+ *
+ * Qsort style function to compare 2 TO_ROWS based on their spacing value.
+ */
+int row_spacing_order(                    //sort function
+                      const void *item1,  //items to compare
+                      const void *item2) {
+                                 //converted ptr
+  const TO_ROW *row1 = *reinterpret_cast<const TO_ROW* const*>(item1);
+                                 //converted ptr
+  const TO_ROW *row2 = *reinterpret_cast<const TO_ROW* const*>(item2);
+
+  if (row1->spacing < row2->spacing)
+    return -1;
+  else if (row1->spacing > row2->spacing)
+    return 1;
+  else
+    return 0;
+}
+
+/**
+ * @name mark_repeated_chars
+ *
+ * Mark blobs marked with BTFT_LEADER in repeated sets using the
+ * repeated_set member of BLOBNBOX.
+ */
+void mark_repeated_chars(TO_ROW *row) {
+  BLOBNBOX_IT box_it(row->blob_list());            // Iterator.
+  int num_repeated_sets = 0;
+  if (!box_it.empty()) {
+    do {
+      BLOBNBOX* bblob = box_it.data();
+      int repeat_length = 1;
+      if (bblob->flow() == BTFT_LEADER &&
+          !bblob->joined_to_prev() && bblob->cblob() != nullptr) {
+        BLOBNBOX_IT test_it(box_it);
+        for (test_it.forward(); !test_it.at_first();) {
+          bblob = test_it.data();
+          if (bblob->flow() != BTFT_LEADER)
+            break;
+          test_it.forward();
+          bblob = test_it.data();
+          if (bblob->joined_to_prev() || bblob->cblob() == nullptr) {
+            repeat_length = 0;
+            break;
+          }
+          ++repeat_length;
+        }
+      }
+      if (repeat_length >= kMinLeaderCount) {
+        num_repeated_sets++;
+        for (; repeat_length > 0; box_it.forward(), --repeat_length) {
+          bblob = box_it.data();
+          bblob->set_repeated_set(num_repeated_sets);
+        }
+     } else {
+        bblob->set_repeated_set(0);
+        box_it.forward();
+      }
+    } while (!box_it.at_first());  // until all done
+  }
+  row->set_num_repeated_sets(num_repeated_sets);
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/textord/makerow.h b/tesseract/src/textord/makerow.h
new file mode 100644
index 00000000..c9e1e5e6
--- /dev/null
+++ b/tesseract/src/textord/makerow.h
@@ -0,0 +1,291 @@
+/**********************************************************************
+ * File:        makerow.h  (Formerly makerows.h)
+ * Description: Code to arrange blobs into rows of text.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef           MAKEROW_H
+#define           MAKEROW_H
+
+#include          "params.h"
+#include          "ocrblock.h"
+#include          "blobs.h"
+#include          "blobbox.h"
+#include          "statistc.h"
+
+namespace tesseract {
+
+enum OVERLAP_STATE
+{
+  ASSIGN,                        //assign it to row
+  REJECT,                        //reject it - dual overlap
+  NEW_ROW
+};
+
+enum ROW_CATEGORY {
+  ROW_ASCENDERS_FOUND,
+  ROW_DESCENDERS_FOUND,
+  ROW_UNKNOWN,
+  ROW_INVALID,
+};
+
+extern BOOL_VAR_H(textord_heavy_nr, false, "Vigorously remove noise");
+extern BOOL_VAR_H (textord_show_initial_rows, false,
+"Display row accumulation");
+extern BOOL_VAR_H (textord_show_parallel_rows, false,
+"Display page correlated rows");
+extern BOOL_VAR_H (textord_show_expanded_rows, false,
+"Display rows after expanding");
+extern BOOL_VAR_H (textord_show_final_rows, false,
+"Display rows after final fitting");
+extern BOOL_VAR_H (textord_show_final_blobs, false,
+"Display blob bounds after pre-ass");
+extern BOOL_VAR_H (textord_test_landscape, false, "Tests refer to land/port");
+extern BOOL_VAR_H (textord_parallel_baselines, true,
+"Force parallel baselines");
+extern BOOL_VAR_H (textord_straight_baselines, false,
+"Force straight baselines");
+extern BOOL_VAR_H (textord_quadratic_baselines, false,
+"Use quadratic splines");
+extern BOOL_VAR_H (textord_old_baselines, true, "Use old baseline algorithm");
+extern BOOL_VAR_H (textord_old_xheight, true, "Use old xheight algorithm");
+extern BOOL_VAR_H (textord_fix_xheight_bug, true, "Use spline baseline");
+extern BOOL_VAR_H (textord_fix_makerow_bug, true,
+"Prevent multiple baselines");
+extern BOOL_VAR_H (textord_cblob_blockocc, true,
+"Use new projection for underlines");
+extern BOOL_VAR_H (textord_debug_xheights, false, "Test xheight algorithms");
+extern INT_VAR_H (textord_test_x, -INT32_MAX, "coord of test pt");
+extern INT_VAR_H (textord_test_y, -INT32_MAX, "coord of test pt");
+extern INT_VAR_H (textord_min_blobs_in_row, 4,
+"Min blobs before gradient counted");
+extern INT_VAR_H (textord_spline_minblobs, 8,
+"Min blobs in each spline segment");
+extern INT_VAR_H (textord_spline_medianwin, 6,
+"Size of window for spline segmentation");
+extern INT_VAR_H (textord_min_xheight, 10, "Min credible pixel xheight");
+extern double_VAR_H (textord_spline_shift_fraction, 0.02,
+"Fraction of line spacing for quad");
+extern double_VAR_H (textord_spline_outlier_fraction, 0.1,
+"Fraction of line spacing for outlier");
+extern double_VAR_H (textord_skew_ile, 0.5, "Ile of gradients for page skew");
+extern double_VAR_H (textord_skew_lag, 0.75,
+"Lag for skew on row accumulation");
+extern double_VAR_H (textord_linespace_iqrlimit, 0.2,
+"Max iqr/median for linespace");
+extern double_VAR_H (textord_width_limit, 8,
+"Max width of blobs to make rows");
+extern double_VAR_H (textord_chop_width, 1.5, "Max width before chopping");
+extern double_VAR_H (textord_minxh, 0.25,
+"fraction of linesize for min xheight");
+extern double_VAR_H (textord_min_linesize, 1.25,
+"* blob height for initial linesize");
+extern double_VAR_H (textord_excess_blobsize, 1.3,
+"New row made if blob makes row this big");
+extern double_VAR_H (textord_occupancy_threshold, 0.4,
+"Fraction of neighbourhood");
+extern double_VAR_H (textord_underline_width, 2.0,
+"Multiple of line_size for underline");
+extern double_VAR_H(textord_min_blob_height_fraction, 0.75,
+"Min blob height/top to include blob top into xheight stats");
+extern double_VAR_H (textord_xheight_mode_fraction, 0.4,
+"Min pile height to make xheight");
+extern double_VAR_H (textord_ascheight_mode_fraction, 0.15,
+"Min pile height to make ascheight");
+extern double_VAR_H (textord_ascx_ratio_min, 1.2, "Min cap/xheight");
+extern double_VAR_H (textord_ascx_ratio_max, 1.7, "Max cap/xheight");
+extern double_VAR_H (textord_descx_ratio_min, 0.15, "Min desc/xheight");
+extern double_VAR_H (textord_descx_ratio_max, 0.6, "Max desc/xheight");
+extern double_VAR_H (textord_xheight_error_margin, 0.1, "Accepted variation");
+extern INT_VAR_H (textord_lms_line_trials, 12, "Number of linew fits to do");
+extern BOOL_VAR_H (textord_new_initial_xheight, true,
+"Use test xheight mechanism");
+extern BOOL_VAR_H(textord_debug_blob, false, "Print test blob information");
+
+inline void get_min_max_xheight(int block_linesize,
+                                int *min_height, int *max_height) {
+  *min_height = static_cast<int32_t>(floor(block_linesize * textord_minxh));
+  if (*min_height < textord_min_xheight) *min_height = textord_min_xheight;
+  *max_height = static_cast<int32_t>(ceil(block_linesize * 3.0));
+}
+
+inline ROW_CATEGORY get_row_category(const TO_ROW *row) {
+  if (row->xheight <= 0) return ROW_INVALID;
+  return (row->ascrise > 0) ? ROW_ASCENDERS_FOUND :
+    (row->descdrop != 0) ? ROW_DESCENDERS_FOUND : ROW_UNKNOWN;
+}
+
+inline bool within_error_margin(float test, float num, float margin) {
+  return (test >= num * (1 - margin) && test <= num * (1 + margin));
+}
+
+void fill_heights(TO_ROW *row, float gradient, int min_height,
+                  int max_height, STATS *heights, STATS *floating_heights);
+
+float make_single_row(ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK* block,
+                      TO_BLOCK_LIST* blocks);
+float make_rows(ICOORD page_tr,              // top right
+                TO_BLOCK_LIST *port_blocks);
+void make_initial_textrows(ICOORD page_tr,
+                           TO_BLOCK* block,  // block to do
+                           FCOORD rotation,  // for drawing
+                           bool testing_on);  // correct orientation
+void fit_lms_line(TO_ROW *row);
+void compute_page_skew(TO_BLOCK_LIST *blocks,  // list of blocks
+                       float &page_m,          // average gradient
+                       float &page_err);       // average error
+void vigorous_noise_removal(TO_BLOCK* block);
+void cleanup_rows_making(ICOORD page_tr,     // top right
+                         TO_BLOCK* block,    // block to do
+                         float gradient,     // gradient to fit
+                         FCOORD rotation,    // for drawing
+                         int32_t block_edge,   // edge of block
+                         bool testing_on);  // correct orientation
+void delete_non_dropout_rows(                   //find lines
+        TO_BLOCK* block,   //block to do
+        float gradient,    //global skew
+        FCOORD rotation,   //deskew vector
+        int32_t block_edge,  //left edge
+        bool testing_on   //correct orientation
+);
+bool find_best_dropout_row(                    //find neighbours
+        TO_ROW* row,        //row to test
+        int32_t distance,     //dropout dist
+        float dist_limit,   //threshold distance
+        int32_t line_index,   //index of row
+        TO_ROW_IT* row_it,  //current position
+        bool testing_on    //correct orientation
+);
+TBOX deskew_block_coords(                  //block box
+                        TO_BLOCK *block,  //block to do
+                        float gradient    //global skew
+                       );
+void compute_line_occupation(                    //project blobs
+                             TO_BLOCK *block,    //block to do
+                             float gradient,     //global skew
+                             int32_t min_y,        //min coord in block
+                             int32_t max_y,        //in block
+                             int32_t *occupation,  //output projection
+                             int32_t *deltas       //derivative
+                            );
+void compute_occupation_threshold(                    //project blobs
+                                  int32_t low_window,   //below result point
+                                  int32_t high_window,  //above result point
+                                  int32_t line_count,   //array sizes
+                                  int32_t *occupation,  //input projection
+                                  int32_t *thresholds   //output thresholds
+                                 );
+void compute_dropout_distances(                    //project blobs
+                               int32_t *occupation,  //input projection
+                               int32_t *thresholds,  //output thresholds
+                               int32_t line_count    //array sizes
+                              );
+void expand_rows(                   //find lines
+        ICOORD page_tr,    //top right
+        TO_BLOCK* block,   //block to do
+        float gradient,    //gradient to fit
+        FCOORD rotation,   //for drawing
+        int32_t block_edge,  //edge of block
+        bool testing_on   //correct orientation
+);
+void adjust_row_limits(                 //tidy limits
+                       TO_BLOCK *block  //block to do
+                      );
+void compute_row_stats(                  //find lines
+        TO_BLOCK* block,  //block to do
+        bool testing_on  //correct orientation
+);
+float median_block_xheight(                  //find lines
+                           TO_BLOCK *block,  //block to do
+                           float gradient    //global skew
+                          );
+
+int compute_xheight_from_modes(
+    STATS *heights, STATS *floating_heights, bool cap_only, int min_height,
+    int max_height, float *xheight, float *ascrise);
+
+int32_t compute_row_descdrop(TO_ROW *row,     // row to do
+                           float gradient,  // global skew
+                           int xheight_blob_count,
+                           STATS *heights);
+int32_t compute_height_modes(STATS *heights,    // stats to search
+                           int32_t min_height,  // bottom of range
+                           int32_t max_height,  // top of range
+                           int32_t *modes,      // output array
+                           int32_t maxmodes);   // size of modes
+void correct_row_xheight(TO_ROW *row,    // row to fix
+                         float xheight,  // average values
+                         float ascrise,
+                         float descdrop);
+void separate_underlines(TO_BLOCK* block,  // block to do
+                         float gradient,   // skew angle
+                         FCOORD rotation,  // inverse landscape
+                         bool testing_on);  // correct orientation
+void pre_associate_blobs(ICOORD page_tr,   // top right
+                         TO_BLOCK* block,  // block to do
+                         FCOORD rotation,  // inverse landscape
+                         bool testing_on);  // correct orientation
+void fit_parallel_rows(TO_BLOCK* block,   // block to do
+                       float gradient,    // gradient to fit
+                       FCOORD rotation,   // for drawing
+                       int32_t block_edge,  // edge of block
+                       bool testing_on);  // correct orientation
+void fit_parallel_lms(float gradient,  // forced gradient
+                      TO_ROW *row);      // row to fit
+void make_baseline_spline(TO_ROW *row,     // row to fit
+                          TO_BLOCK *block);  // block it came from
+bool segment_baseline(         //split baseline
+        TO_ROW* row,                    //row to fit
+        TO_BLOCK* block,                //block it came from
+        int32_t& segments,                //no fo segments
+        int32_t* xstarts                  //coords of segments
+);
+double *linear_spline_baseline ( //split baseline
+TO_ROW * row,                    //row to fit
+TO_BLOCK * block,                //block it came from
+int32_t & segments,                //no fo segments
+int32_t xstarts[]                  //coords of segments
+);
+void assign_blobs_to_rows(                      //find lines
+        TO_BLOCK* block,      //block to do
+        float* gradient,      //block skew
+        int pass,             //identification
+        bool reject_misses,  //chuck big ones out
+        bool make_new_rows,  //add rows for unmatched
+        bool drawing_skew    //draw smoothed skew
+);
+                                 //find best row
+OVERLAP_STATE most_overlapping_row(TO_ROW_IT* row_it,  //iterator
+                                   TO_ROW*& best_row,  //output row
+                                   float top,          //top of blob
+                                   float bottom,       //bottom of blob
+                                   float rowsize,      //max row size
+                                   bool testing_blob  //test stuff
+                                 );
+int blob_x_order(                    //sort function
+                 const void *item1,  //items to compare
+                 const void *item2);
+int row_y_order(                    //sort function
+                const void *item1,  //items to compare
+                const void *item2);
+int row_spacing_order(                    //sort function
+                      const void *item1,  //items to compare
+                      const void *item2);
+
+void mark_repeated_chars(TO_ROW *row);
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/textord/oldbasel.cpp b/tesseract/src/textord/oldbasel.cpp
new file mode 100644
index 00000000..f8dadc33
--- /dev/null
+++ b/tesseract/src/textord/oldbasel.cpp
@@ -0,0 +1,1698 @@
+/**********************************************************************
+ * File:        oldbasel.cpp  (Formerly oldbl.c)
+ * Description: A re-implementation of the old baseline algorithm.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1993, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+ // Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "oldbasel.h"
+
+#include "ccstruct.h"
+#include "statistc.h"
+#include "quadlsq.h"
+#include "detlinefit.h"
+#include "makerow.h"
+#include "drawtord.h"
+#include "textord.h"
+#include "tprintf.h"
+
+#include <vector>       // for std::vector
+
+#include <algorithm>
+
+namespace tesseract {
+
+static BOOL_VAR (textord_really_old_xheight, false,
+"Use original wiseowl xheight");
+BOOL_VAR (textord_oldbl_debug, false, "Debug old baseline generation");
+static BOOL_VAR (textord_debug_baselines, false, "Debug baseline generation");
+static BOOL_VAR (textord_oldbl_paradef, true, "Use para default mechanism");
+static BOOL_VAR (textord_oldbl_split_splines, true, "Split stepped splines");
+static BOOL_VAR (textord_oldbl_merge_parts, true, "Merge suspect partitions");
+static BOOL_VAR (oldbl_corrfix, true, "Improve correlation of heights");
+static BOOL_VAR (oldbl_xhfix, false,
+"Fix bug in modes threshold for xheights");
+static BOOL_VAR(textord_ocropus_mode, false, "Make baselines for ocropus");
+static double_VAR (oldbl_xhfract, 0.4, "Fraction of est allowed in calc");
+static INT_VAR (oldbl_holed_losscount, 10,
+"Max lost before fallback line used");
+static double_VAR (oldbl_dot_error_size, 1.26, "Max aspect ratio of a dot");
+static double_VAR (textord_oldbl_jumplimit, 0.15,
+"X fraction for new partition");
+
+#define TURNLIMIT          1     /*min size for turning point */
+#define X_HEIGHT_FRACTION  0.7   /*x-height/caps height */
+#define DESCENDER_FRACTION 0.5   /*descender/x-height */
+#define MIN_ASC_FRACTION   0.20  /*min size of ascenders */
+#define MIN_DESC_FRACTION  0.25  /*min size of descenders */
+#define MINASCRISE         2.0   /*min ascender/desc step */
+#define MAXHEIGHTVARIANCE  0.15  /*accepted variation in x-height */
+#define MAXHEIGHT          300   /*max blob height */
+#define MAXOVERLAP         0.1   /*max 10% missed overlap */
+#define MAXBADRUN          2     /*max non best for failed */
+#define HEIGHTBUCKETS      200   /* Num of buckets */
+#define MODENUM            10
+#define MAXPARTS      6
+#define SPLINESIZE      23
+
+#define ABS(x) ((x)<0 ? (-(x)) : (x))
+
+/**********************************************************************
+ * make_old_baselines
+ *
+ * Top level function to make baselines the old way.
+ **********************************************************************/
+
+void Textord::make_old_baselines(TO_BLOCK* block,   // block to do
+                                 bool testing_on,  // correct orientation
+                                 float gradient) {
+  QSPLINE *prev_baseline;        // baseline of previous row
+  TO_ROW *row;                   // current row
+  TO_ROW_IT row_it = block->get_rows();
+  BLOBNBOX_IT blob_it;
+
+  prev_baseline = nullptr;          // nothing yet
+  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+    row = row_it.data();
+    find_textlines(block, row, 2, nullptr);
+    if (row->xheight <= 0 && prev_baseline != nullptr)
+      find_textlines(block, row, 2, prev_baseline);
+    if (row->xheight > 0) {  // was a good one
+      prev_baseline = &row->baseline;
+    } else {
+      prev_baseline = nullptr;
+      blob_it.set_to_list(row->blob_list());
+      if (textord_debug_baselines)
+        tprintf("Row baseline generation failed on row at (%d,%d)\n",
+          blob_it.data()->bounding_box().left(),
+          blob_it.data()->bounding_box().bottom());
+    }
+  }
+  correlate_lines(block, gradient);
+  block->block->set_xheight(block->xheight);
+}
+
+
+/**********************************************************************
+ * correlate_lines
+ *
+ * Correlate the x-heights and ascender heights of a block to fill-in
+ * the ascender height and descender height for rows without one.
+ * Also fix baselines of rows without a decent fit.
+ **********************************************************************/
+
+void Textord::correlate_lines(TO_BLOCK *block, float gradient) {
+  int rowcount;                  /*no of rows to do */
+  int rowindex;                  /*no of row */
+                                 // iterator
+  TO_ROW_IT row_it = block->get_rows ();
+
+  rowcount = row_it.length ();
+  if (rowcount == 0) {
+                                 //default value
+    block->xheight = block->line_size;
+    return;                      /*none to do */
+  }
+  // array of ptrs
+  std::vector <TO_ROW *> rows(rowcount);
+  rowindex = 0;
+  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
+                                 //make array
+    rows[rowindex++] = row_it.data ();
+
+                                 /*try to fix bad lines */
+  correlate_neighbours(block, &rows[0], rowcount);
+
+  if (textord_really_old_xheight || textord_old_xheight) {
+    block->xheight = static_cast<float>(correlate_with_stats(&rows[0], rowcount, block));
+    if (block->xheight <= 0)
+      block->xheight = block->line_size * tesseract::CCStruct::kXHeightFraction;
+    if (block->xheight < textord_min_xheight)
+      block->xheight = (float) textord_min_xheight;
+  } else {
+    compute_block_xheight(block, gradient);
+  }
+}
+
+
+/**********************************************************************
+ * correlate_neighbours
+ *
+ * Try to fix rows that had a bad spline fit by using neighbours.
+ **********************************************************************/
+
+void Textord::correlate_neighbours(TO_BLOCK *block,  // block rows are in.
+                                   TO_ROW **rows,    // rows of block.
+                                   int rowcount) {   // no of rows to do.
+  TO_ROW *row;                   /*current row */
+  int rowindex;                  /*no of row */
+  int otherrow;                  /*second row */
+  int upperrow;                  /*row above to use */
+  int lowerrow;                  /*row below to use */
+  float biggest;
+
+  for (rowindex = 0; rowindex < rowcount; rowindex++) {
+    row = rows[rowindex];        /*current row */
+    if (row->xheight < 0) {
+                                 /*quadratic failed */
+      for (otherrow = rowindex - 2;
+        otherrow >= 0
+        && (rows[otherrow]->xheight < 0.0
+        || !row->baseline.overlap (&rows[otherrow]->baseline,
+        MAXOVERLAP)); otherrow--);
+      upperrow = otherrow;       /*decent row above */
+      for (otherrow = rowindex + 1;
+        otherrow < rowcount
+        && (rows[otherrow]->xheight < 0.0
+        || !row->baseline.overlap (&rows[otherrow]->baseline,
+        MAXOVERLAP)); otherrow++);
+      lowerrow = otherrow;       /*decent row below */
+      if (upperrow >= 0)
+        find_textlines(block, row, 2, &rows[upperrow]->baseline);
+      if (row->xheight < 0 && lowerrow < rowcount)
+        find_textlines(block, row, 2, &rows[lowerrow]->baseline);
+      if (row->xheight < 0) {
+        if (upperrow >= 0)
+          find_textlines(block, row, 1, &rows[upperrow]->baseline);
+        else if (lowerrow < rowcount)
+          find_textlines(block, row, 1, &rows[lowerrow]->baseline);
+      }
+    }
+  }
+
+  for (biggest = 0.0f, rowindex = 0; rowindex < rowcount; rowindex++) {
+    row = rows[rowindex];        /*current row */
+    if (row->xheight < 0)        /*linear failed */
+                                 /*make do */
+        row->xheight = -row->xheight;
+    biggest = std::max(biggest, row->xheight);
+  }
+}
+
+
+/**********************************************************************
+ * correlate_with_stats
+ *
+ * correlate the x-heights and ascender heights of a block to fill-in
+ * the ascender height and descender height for rows without one.
+ **********************************************************************/
+
+int Textord::correlate_with_stats(TO_ROW **rows,  // rows of block.
+                                  int rowcount,   // no of rows to do.
+                                  TO_BLOCK* block) {
+  TO_ROW *row;                   /*current row */
+  int rowindex;                  /*no of row */
+  float lineheight;              /*mean x-height */
+  float ascheight;               /*average ascenders */
+  float minascheight;            /*min allowed ascheight */
+  int xcount;                    /*no of samples for xheight */
+  float fullheight;              /*mean top height */
+  int fullcount;                 /*no of samples */
+  float descheight;              /*mean descender drop */
+  float mindescheight;           /*min allowed descheight */
+  int desccount;                 /*no of samples */
+
+                                 /*no samples */
+  xcount = fullcount = desccount = 0;
+  lineheight = ascheight = fullheight = descheight = 0.0;
+  for (rowindex = 0; rowindex < rowcount; rowindex++) {
+    row = rows[rowindex];        /*current row */
+    if (row->ascrise > 0.0) {    /*got ascenders? */
+      lineheight += row->xheight;/*average x-heights */
+      ascheight += row->ascrise; /*average ascenders */
+      xcount++;
+    }
+    else {
+      fullheight += row->xheight;/*assume full height */
+      fullcount++;
+    }
+    if (row->descdrop < 0.0) {   /*got descenders? */
+                                 /*average descenders */
+      descheight += row->descdrop;
+      desccount++;
+    }
+  }
+
+  if (xcount > 0 && (!oldbl_corrfix || xcount >= fullcount)) {
+    lineheight /= xcount;        /*average x-height */
+                                 /*average caps height */
+    fullheight = lineheight + ascheight / xcount;
+                                 /*must be decent size */
+    if (fullheight < lineheight * (1 + MIN_ASC_FRACTION))
+      fullheight = lineheight * (1 + MIN_ASC_FRACTION);
+  }
+  else {
+    fullheight /= fullcount;     /*average max height */
+                                 /*guess x-height */
+    lineheight = fullheight * X_HEIGHT_FRACTION;
+  }
+  if (desccount > 0 && (!oldbl_corrfix || desccount >= rowcount / 2))
+    descheight /= desccount;     /*average descenders */
+  else
+                                 /*guess descenders */
+    descheight = -lineheight * DESCENDER_FRACTION;
+
+  if (lineheight > 0.0f)
+    block->block->set_cell_over_xheight((fullheight - descheight) / lineheight);
+
+  minascheight = lineheight * MIN_ASC_FRACTION;
+  mindescheight = -lineheight * MIN_DESC_FRACTION;
+  for (rowindex = 0; rowindex < rowcount; rowindex++) {
+    row = rows[rowindex];        /*do each row */
+    row->all_caps = false;
+    if (row->ascrise / row->xheight < MIN_ASC_FRACTION) {
+    /*no ascenders */
+      if (row->xheight >= lineheight * (1 - MAXHEIGHTVARIANCE)
+      && row->xheight <= lineheight * (1 + MAXHEIGHTVARIANCE)) {
+        row->ascrise = fullheight - lineheight;
+                                 /*set to average */
+        row->xheight = lineheight;
+
+      }
+      else if (row->xheight >= fullheight * (1 - MAXHEIGHTVARIANCE)
+      && row->xheight <= fullheight * (1 + MAXHEIGHTVARIANCE)) {
+        row->ascrise = row->xheight - lineheight;
+                                 /*set to average */
+        row->xheight = lineheight;
+        row->all_caps = true;
+      }
+      else {
+        row->ascrise = (fullheight - lineheight) * row->xheight
+          / fullheight;
+                                 /*scale it */
+        row->xheight -= row->ascrise;
+        row->all_caps = true;
+      }
+      if (row->ascrise < minascheight)
+        row->ascrise =
+          row->xheight * ((1.0 - X_HEIGHT_FRACTION) / X_HEIGHT_FRACTION);
+    }
+    if (row->descdrop > mindescheight) {
+      if (row->xheight >= lineheight * (1 - MAXHEIGHTVARIANCE)
+        && row->xheight <= lineheight * (1 + MAXHEIGHTVARIANCE))
+                                 /*set to average */
+          row->descdrop = descheight;
+      else
+        row->descdrop = -row->xheight * DESCENDER_FRACTION;
+    }
+  }
+  return static_cast<int>(lineheight);       //block xheight
+}
+
+
+/**********************************************************************
+ * find_textlines
+ *
+ * Compute the baseline for the given row.
+ **********************************************************************/
+
+void Textord::find_textlines(TO_BLOCK *block,  // block row is in
+                             TO_ROW *row,      // row to do
+                             int degree,       // required approximation
+                             QSPLINE *spline) {  // starting spline
+  int partcount;                 /*no of partitions of */
+  bool holed_line = false;      //lost too many blobs
+  int bestpart;                  /*biggest partition */
+  int partsizes[MAXPARTS];       /*no in each partition */
+  int lineheight;                /*guessed x-height */
+  float jumplimit;               /*allowed delta change */
+  int blobcount;                 /*no of blobs on line */
+  int pointcount;                /*no of coords */
+  int xstarts[SPLINESIZE + 1];   //segment boundaries
+  int segments;                  //no of segments
+
+                                 //no of blobs in row
+  blobcount = row->blob_list ()->length ();
+  // partition no of each blob
+  std::vector<char> partids(blobcount);
+  // useful sample points
+  std::vector<int> xcoords(blobcount);
+  // useful sample points
+  std::vector<int> ycoords(blobcount);
+  // edges of blob rectangles
+  std::vector<TBOX> blobcoords(blobcount);
+  // diffs from 1st approx
+  std::vector<float> ydiffs(blobcount);
+
+  lineheight = get_blob_coords(row, static_cast<int>(block->line_size), &blobcoords[0],
+    holed_line, blobcount);
+                                 /*limit for line change */
+  jumplimit = lineheight * textord_oldbl_jumplimit;
+  if (jumplimit < MINASCRISE)
+    jumplimit = MINASCRISE;
+
+  if (textord_oldbl_debug) {
+    tprintf
+      ("\nInput height=%g, Estimate x-height=%d pixels, jumplimit=%.2f\n",
+      block->line_size, lineheight, jumplimit);
+  }
+  if (holed_line)
+    make_holed_baseline(&blobcoords[0], blobcount, spline, &row->baseline,
+      row->line_m ());
+  else
+    make_first_baseline(&blobcoords[0], blobcount,
+      &xcoords[0], &ycoords[0], spline, &row->baseline, jumplimit);
+#ifndef GRAPHICS_DISABLED
+  if (textord_show_final_rows)
+    row->baseline.plot (to_win, ScrollView::GOLDENROD);
+#endif
+  if (blobcount > 1) {
+    bestpart = partition_line(&blobcoords[0], blobcount,
+      &partcount, &partids[0], partsizes,
+      &row->baseline, jumplimit, &ydiffs[0]);
+    pointcount = partition_coords(&blobcoords[0], blobcount,
+      &partids[0], bestpart, &xcoords[0], &ycoords[0]);
+    segments = segment_spline(&blobcoords[0], blobcount,
+      &xcoords[0], &ycoords[0], degree, pointcount, xstarts);
+    if (!holed_line) {
+      do {
+        row->baseline = QSPLINE(xstarts, segments,
+          &xcoords[0], &ycoords[0], pointcount, degree);
+      }
+      while (textord_oldbl_split_splines
+        && split_stepped_spline (&row->baseline, jumplimit / 2,
+        &xcoords[0], xstarts, segments));
+    }
+    find_lesser_parts(row, &blobcoords[0], blobcount,
+                      &partids[0], partsizes, partcount, bestpart);
+
+  }
+  else {
+    row->xheight = -1.0f;        /*failed */
+    row->descdrop = 0.0f;
+    row->ascrise = 0.0f;
+  }
+  row->baseline.extrapolate (row->line_m (),
+    block->block->pdblk.bounding_box ().left (),
+    block->block->pdblk.bounding_box ().right ());
+
+  if (textord_really_old_xheight) {
+    old_first_xheight (row, &blobcoords[0], lineheight,
+      blobcount, &row->baseline, jumplimit);
+  } else if (textord_old_xheight) {
+    make_first_xheight (row, &blobcoords[0], lineheight, static_cast<int>(block->line_size),
+                        blobcount, &row->baseline, jumplimit);
+  } else {
+    compute_row_xheight(row, block->block->classify_rotation(),
+                        row->line_m(), block->line_size);
+  }
+}
+
+/**********************************************************************
+ * get_blob_coords
+ *
+ * Fill the blobcoords array with the coordinates of the blobs
+ * in the row. The return value is the first guess at the line height.
+ **********************************************************************/
+
+int get_blob_coords(                    //get boxes
+        TO_ROW* row,        //row to use
+        int32_t lineheight,   //block level
+        TBOX* blobcoords,    //output boxes
+        bool& holed_line,  //lost a lot of blobs
+        int& outcount       //no of real blobs
+) {
+                                 //blobs
+  BLOBNBOX_IT blob_it = row->blob_list ();
+  int blobindex;                 /*no along text line */
+  int losscount;                 //lost blobs
+  int maxlosscount;              //greatest lost blobs
+                                 /*height stat collection */
+  STATS heightstat (0, MAXHEIGHT);
+
+  if (blob_it.empty ())
+    return 0;                    //none
+  maxlosscount = 0;
+  losscount = 0;
+  blob_it.mark_cycle_pt ();
+  blobindex = 0;
+  do {
+    blobcoords[blobindex] = box_next_pre_chopped (&blob_it);
+    if (blobcoords[blobindex].height () > lineheight * 0.25)
+      heightstat.add (blobcoords[blobindex].height (), 1);
+    if (blobindex == 0
+      || blobcoords[blobindex].height () > lineheight * 0.25
+    || blob_it.cycled_list ()) {
+      blobindex++;               /*no of merged blobs */
+      losscount = 0;
+    }
+    else {
+      if (blobcoords[blobindex].height ()
+        < blobcoords[blobindex].width () * oldbl_dot_error_size
+        && blobcoords[blobindex].width ()
+      < blobcoords[blobindex].height () * oldbl_dot_error_size) {
+                                 //counts as dot
+        blobindex++;
+        losscount = 0;
+      }
+      else {
+        losscount++;             //lost it
+        if (losscount > maxlosscount)
+                                 //remember max
+            maxlosscount = losscount;
+      }
+    }
+  }
+  while (!blob_it.cycled_list ());
+
+  holed_line = maxlosscount > oldbl_holed_losscount;
+  outcount = blobindex;          /*total blobs */
+
+  if (heightstat.get_total () > 1)
+                                 /*guess x-height */
+    return static_cast<int>(heightstat.ile (0.25));
+  else
+    return blobcoords[0].height ();
+}
+
+
+/**********************************************************************
+ * make_first_baseline
+ *
+ * Make the first estimate at a baseline, either by shifting
+ * a supplied previous spline, or by doing a piecewise linear
+ * approximation using all the blobs.
+ **********************************************************************/
+
+void
+make_first_baseline (            //initial approximation
+TBOX blobcoords[],                /*blob bounding boxes */
+int blobcount,                   /*no of blobcoords */
+int xcoords[],                   /*coords for spline */
+int ycoords[],                   /*approximator */
+QSPLINE * spline,                /*initial spline */
+QSPLINE * baseline,              /*output spline */
+float jumplimit                  /*guess half descenders */
+) {
+  int leftedge;                  /*left edge of line */
+  int rightedge;                 /*right edge of line */
+  int blobindex;                 /*current blob */
+  int segment;                   /*current segment */
+  float prevy, thisy, nexty;     /*3 y coords */
+  float y1, y2, y3;              /*3 smooth blobs */
+  float maxmax, minmin;          /*absolute limits */
+  int x2 = 0;                    /*right edge of old y3 */
+  int ycount;                    /*no of ycoords in use */
+  float yturns[SPLINESIZE];      /*y coords of turn pts */
+  int xturns[SPLINESIZE];        /*xcoords of turn pts */
+  int xstarts[SPLINESIZE + 1];
+  int segments;                  //no of segments
+  ICOORD shift;                  //shift of spline
+
+  prevy = 0;
+                                 /*left edge of row */
+  leftedge = blobcoords[0].left ();
+                                 /*right edge of line */
+  rightedge = blobcoords[blobcount - 1].right ();
+  if (spline == nullptr             /*no given spline */
+    || spline->segments < 3      /*or trivial */
+                                 /*or too non-overlap */
+    || spline->xcoords[1] > leftedge + MAXOVERLAP * (rightedge - leftedge)
+    || spline->xcoords[spline->segments - 1] < rightedge
+  - MAXOVERLAP * (rightedge - leftedge)) {
+    if (textord_oldbl_paradef)
+      return;                    //use default
+    xstarts[0] = blobcoords[0].left () - 1;
+    for (blobindex = 0; blobindex < blobcount; blobindex++) {
+      xcoords[blobindex] = (blobcoords[blobindex].left ()
+        + blobcoords[blobindex].right ()) / 2;
+      ycoords[blobindex] = blobcoords[blobindex].bottom ();
+    }
+    xstarts[1] = blobcoords[blobcount - 1].right () + 1;
+    segments = 1;                /*no of segments */
+
+                                 /*linear */
+    *baseline = QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1);
+
+    if (blobcount >= 3) {
+      y1 = y2 = y3 = 0.0f;
+      ycount = 0;
+      segment = 0;               /*no of segments */
+      maxmax = minmin = 0.0f;
+      thisy = ycoords[0] - baseline->y (xcoords[0]);
+      nexty = ycoords[1] - baseline->y (xcoords[1]);
+      for (blobindex = 2; blobindex < blobcount; blobindex++) {
+        prevy = thisy;           /*shift ycoords */
+        thisy = nexty;
+        nexty = ycoords[blobindex] - baseline->y (xcoords[blobindex]);
+                                 /*middle of smooth y */
+        if (ABS (thisy - prevy) < jumplimit && ABS (thisy - nexty) < jumplimit) {
+          y1 = y2;               /*shift window */
+          y2 = y3;
+          y3 = thisy;            /*middle point */
+          ycount++;
+                                 /*local max */
+          if (ycount >= 3 && ((y1 < y2 && y2 >= y3)
+                                 /*local min */
+          || (y1 > y2 && y2 <= y3))) {
+            if (segment < SPLINESIZE - 2) {
+                                 /*turning pt */
+              xturns[segment] = x2;
+              yturns[segment] = y2;
+              segment++;         /*no of spline segs */
+            }
+          }
+          if (ycount == 1) {
+            maxmax = minmin = y3;/*initialise limits */
+          }
+          else {
+            if (y3 > maxmax)
+              maxmax = y3;       /*biggest max */
+            if (y3 < minmin)
+              minmin = y3;       /*smallest min */
+          }
+                                 /*possible turning pt */
+          x2 = blobcoords[blobindex - 1].right ();
+        }
+      }
+
+      jumplimit *= 1.2f;
+                                 /*must be wavy */
+      if (maxmax - minmin > jumplimit) {
+        ycount = segment;        /*no of segments */
+        for (blobindex = 0, segment = 1; blobindex < ycount;
+        blobindex++) {
+          if (yturns[blobindex] > minmin + jumplimit
+          || yturns[blobindex] < maxmax - jumplimit) {
+                                 /*significant peak */
+            if (segment == 1
+              || yturns[blobindex] > prevy + jumplimit
+            || yturns[blobindex] < prevy - jumplimit) {
+                                 /*different to previous */
+              xstarts[segment] = xturns[blobindex];
+              segment++;
+              prevy = yturns[blobindex];
+            }
+                                 /*bigger max */
+            else if ((prevy > minmin + jumplimit && yturns[blobindex] > prevy)
+                                 /*smaller min */
+            || (prevy < maxmax - jumplimit && yturns[blobindex] < prevy)) {
+              xstarts[segment - 1] = xturns[blobindex];
+                                 /*improved previous */
+              prevy = yturns[blobindex];
+            }
+          }
+        }
+        xstarts[segment] = blobcoords[blobcount - 1].right () + 1;
+        segments = segment;      /*no of segments */
+                                 /*linear */
+        *baseline = QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1);
+      }
+    }
+  }
+  else {
+    *baseline = *spline;         /*copy it */
+    shift = ICOORD (0, static_cast<int16_t>(blobcoords[0].bottom ()
+      - spline->y (blobcoords[0].right ())));
+    baseline->move (shift);
+  }
+}
+
+
+/**********************************************************************
+ * make_holed_baseline
+ *
+ * Make the first estimate at a baseline, either by shifting
+ * a supplied previous spline, or by doing a piecewise linear
+ * approximation using all the blobs.
+ **********************************************************************/
+
+void
+make_holed_baseline (            //initial approximation
+TBOX blobcoords[],                /*blob bounding boxes */
+int blobcount,                   /*no of blobcoords */
+QSPLINE * spline,                /*initial spline */
+QSPLINE * baseline,              /*output spline */
+float gradient                   //of line
+) {
+  int leftedge;                  /*left edge of line */
+  int rightedge;                 /*right edge of line */
+  int blobindex;                 /*current blob */
+  float x;                       //centre of row
+  ICOORD shift;                  //shift of spline
+
+  tesseract::DetLineFit lms;  // straight baseline
+  int32_t xstarts[2];              //straight line
+  double coeffs[3];
+  float c;                       //line parameter
+
+                                 /*left edge of row */
+  leftedge = blobcoords[0].left ();
+                                 /*right edge of line */
+  rightedge = blobcoords[blobcount - 1].right();
+  for (blobindex = 0; blobindex < blobcount; blobindex++) {
+    lms.Add(ICOORD((blobcoords[blobindex].left() +
+                    blobcoords[blobindex].right()) / 2,
+                   blobcoords[blobindex].bottom()));
+  }
+  lms.ConstrainedFit(gradient, &c);
+  xstarts[0] = leftedge;
+  xstarts[1] = rightedge;
+  coeffs[0] = 0;
+  coeffs[1] = gradient;
+  coeffs[2] = c;
+  *baseline = QSPLINE (1, xstarts, coeffs);
+  if (spline != nullptr             /*no given spline */
+    && spline->segments >= 3     /*or trivial */
+                                 /*or too non-overlap */
+    && spline->xcoords[1] <= leftedge + MAXOVERLAP * (rightedge - leftedge)
+    && spline->xcoords[spline->segments - 1] >= rightedge
+  - MAXOVERLAP * (rightedge - leftedge)) {
+    *baseline = *spline;         /*copy it */
+    x = (leftedge + rightedge) / 2.0;
+    shift = ICOORD (0, static_cast<int16_t>(gradient * x + c - spline->y (x)));
+    baseline->move (shift);
+  }
+}
+
+
+/**********************************************************************
+ * partition_line
+ *
+ * Partition a row of blobs into different groups of continuous
+ * y position. jumplimit specifies the max allowable limit on a jump
+ * before a new partition is started.
+ * The return value is the biggest partition
+ **********************************************************************/
+
+int
+partition_line (                 //partition blobs
+TBOX blobcoords[],                //bounding boxes
+int blobcount,                   /*no of blobs on row */
+int *numparts,                   /*number of partitions */
+char partids[],                  /*partition no of each blob */
+int partsizes[],                 /*no in each partition */
+QSPLINE * spline,                /*curve to fit to */
+float jumplimit,                 /*allowed delta change */
+float ydiffs[]                   /*diff from spline */
+) {
+  int blobindex;                 /*no along text line */
+  int bestpart;                  /*best new partition */
+  int biggestpart;               /*part with most members */
+  float diff;                    /*difference from line */
+  int startx;                    /*index of start blob */
+  float partdiffs[MAXPARTS];     /*step between parts */
+
+  for (bestpart = 0; bestpart < MAXPARTS; bestpart++)
+    partsizes[bestpart] = 0;     /*zero them all */
+
+  startx = get_ydiffs (blobcoords, blobcount, spline, ydiffs);
+  *numparts = 1;                 /*1 partition */
+  bestpart = -1;                 /*first point */
+  float drift = 0.0f;
+  float last_delta = 0.0f;
+  for (blobindex = startx; blobindex < blobcount; blobindex++) {
+  /*do each blob in row */
+    diff = ydiffs[blobindex];    /*diff from line */
+    if (textord_oldbl_debug) {
+      tprintf ("%d(%d,%d), ", blobindex,
+        blobcoords[blobindex].left (),
+        blobcoords[blobindex].bottom ());
+    }
+    bestpart = choose_partition(diff, partdiffs, bestpart, jumplimit,
+                                &drift, &last_delta, numparts);
+                                 /*record partition */
+    partids[blobindex] = bestpart;
+    partsizes[bestpart]++;       /*another in it */
+  }
+
+  bestpart = -1;                 /*first point */
+  drift = 0.0f;
+  last_delta = 0.0f;
+  partsizes[0]--;                /*doing 1st pt again */
+                                 /*do each blob in row */
+  for (blobindex = startx; blobindex >= 0; blobindex--) {
+    diff = ydiffs[blobindex];    /*diff from line */
+    if (textord_oldbl_debug) {
+      tprintf ("%d(%d,%d), ", blobindex,
+        blobcoords[blobindex].left (),
+        blobcoords[blobindex].bottom ());
+    }
+    bestpart = choose_partition(diff, partdiffs, bestpart, jumplimit,
+                                &drift, &last_delta, numparts);
+                                 /*record partition */
+    partids[blobindex] = bestpart;
+    partsizes[bestpart]++;       /*another in it */
+  }
+
+  for (biggestpart = 0, bestpart = 1; bestpart < *numparts; bestpart++)
+    if (partsizes[bestpart] >= partsizes[biggestpart])
+      biggestpart = bestpart;    /*new biggest */
+  if (textord_oldbl_merge_parts)
+    merge_oldbl_parts(blobcoords,
+                      blobcount,
+                      partids,
+                      partsizes,
+                      biggestpart,
+                      jumplimit);
+  return biggestpart;            /*biggest partition */
+}
+
+
+/**********************************************************************
+ * merge_oldbl_parts
+ *
+ * For any adjacent group of blobs in a different part, put them in the
+ * main part if they fit closely to neighbours in the main part.
+ **********************************************************************/
+
+void
+merge_oldbl_parts (              //partition blobs
+TBOX blobcoords[],                //bounding boxes
+int blobcount,                   /*no of blobs on row */
+char partids[],                  /*partition no of each blob */
+int partsizes[],                 /*no in each partition */
+int biggestpart,                 //major partition
+float jumplimit                  /*allowed delta change */
+) {
+  bool found_one;               //found a bestpart blob
+  bool close_one;               //found was close enough
+  int blobindex;                 /*no along text line */
+  int prevpart;                  //previous iteration
+  int runlength;                 //no in this part
+  float diff;                    /*difference from line */
+  int startx;                    /*index of start blob */
+  int test_blob;                 //another index
+  FCOORD coord;                  //blob coordinate
+  float m, c;                    //fitted line
+  QLSQ stats;                    //line stuff
+
+  prevpart = biggestpart;
+  runlength = 0;
+  startx = 0;
+  for (blobindex = 0; blobindex < blobcount; blobindex++) {
+    if (partids[blobindex] != prevpart) {
+      //                      tprintf("Partition change at (%d,%d) from %d to %d after run of %d\n",
+      //                              blobcoords[blobindex].left(),blobcoords[blobindex].bottom(),
+      //                              prevpart,partids[blobindex],runlength);
+      if (prevpart != biggestpart && runlength > MAXBADRUN) {
+        stats.clear ();
+        for (test_blob = startx; test_blob < blobindex; test_blob++) {
+          coord = FCOORD ((blobcoords[test_blob].left ()
+            + blobcoords[test_blob].right ()) / 2.0,
+            blobcoords[test_blob].bottom ());
+          stats.add (coord.x (), coord.y ());
+        }
+        stats.fit (1);
+        m = stats.get_b ();
+        c = stats.get_c ();
+        if (textord_oldbl_debug)
+          tprintf ("Fitted line y=%g x + %g\n", m, c);
+        found_one = false;
+        close_one = false;
+        for (test_blob = 1; !found_one
+          && (startx - test_blob >= 0
+        || blobindex + test_blob <= blobcount); test_blob++) {
+          if (startx - test_blob >= 0
+          && partids[startx - test_blob] == biggestpart) {
+            found_one = true;
+            coord = FCOORD ((blobcoords[startx - test_blob].left ()
+              + blobcoords[startx -
+              test_blob].right ()) /
+              2.0,
+              blobcoords[startx -
+              test_blob].bottom ());
+            diff = m * coord.x () + c - coord.y ();
+            if (textord_oldbl_debug)
+              tprintf
+                ("Diff of common blob to suspect part=%g at (%g,%g)\n",
+                diff, coord.x (), coord.y ());
+            if (diff < jumplimit && -diff < jumplimit)
+              close_one = true;
+          }
+          if (blobindex + test_blob <= blobcount
+          && partids[blobindex + test_blob - 1] == biggestpart) {
+            found_one = true;
+            coord =
+              FCOORD ((blobcoords[blobindex + test_blob - 1].
+              left () + blobcoords[blobindex + test_blob -
+              1].right ()) / 2.0,
+              blobcoords[blobindex + test_blob -
+              1].bottom ());
+            diff = m * coord.x () + c - coord.y ();
+            if (textord_oldbl_debug)
+              tprintf
+                ("Diff of common blob to suspect part=%g at (%g,%g)\n",
+                diff, coord.x (), coord.y ());
+            if (diff < jumplimit && -diff < jumplimit)
+              close_one = true;
+          }
+        }
+        if (close_one) {
+          if (textord_oldbl_debug)
+            tprintf
+              ("Merged %d blobs back into part %d from %d starting at (%d,%d)\n",
+              runlength, biggestpart, prevpart,
+              blobcoords[startx].left (),
+              blobcoords[startx].bottom ());
+                                 //switch sides
+          partsizes[prevpart] -= runlength;
+          for (test_blob = startx; test_blob < blobindex; test_blob++)
+            partids[test_blob] = biggestpart;
+        }
+      }
+      prevpart = partids[blobindex];
+      runlength = 1;
+      startx = blobindex;
+    }
+    else
+      runlength++;
+  }
+}
+
+
+/**********************************************************************
+ * get_ydiffs
+ *
+ * Get the differences between the blobs and the spline,
+ * putting them in ydiffs.  The return value is the index
+ * of the blob in the middle of the "best behaved" region
+ **********************************************************************/
+
+int
+get_ydiffs (                     //evaluate differences
+TBOX blobcoords[],                //bounding boxes
+int blobcount,                   /*no of blobs */
+QSPLINE * spline,                /*approximating spline */
+float ydiffs[]                   /*output */
+) {
+  int blobindex;                 /*current blob */
+  int xcentre;                   /*xcoord */
+  int lastx;                     /*last xcentre */
+  float diffsum;                 /*sum of diffs */
+  float diff;                    /*current difference */
+  float drift;                   /*sum of spline steps */
+  float bestsum;                 /*smallest diffsum */
+  int bestindex;                 /*index of bestsum */
+
+  diffsum = 0.0f;
+  bestindex = 0;
+  bestsum = static_cast<float>(INT32_MAX);
+  drift = 0.0f;
+  lastx = blobcoords[0].left ();
+                                 /*do each blob in row */
+  for (blobindex = 0; blobindex < blobcount; blobindex++) {
+                                 /*centre of blob */
+    xcentre = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) >> 1;
+                                 //step functions in spline
+    drift += spline->step (lastx, xcentre);
+    lastx = xcentre;
+    diff = blobcoords[blobindex].bottom ();
+    diff -= spline->y (xcentre);
+    diff += drift;
+    ydiffs[blobindex] = diff;    /*store difference */
+    if (blobindex > 2)
+                                 /*remove old one */
+      diffsum -= ABS (ydiffs[blobindex - 3]);
+    diffsum += ABS (diff);       /*add new one */
+    if (blobindex >= 2 && diffsum < bestsum) {
+      bestsum = diffsum;         /*find min sum */
+      bestindex = blobindex - 1; /*middle of set */
+    }
+  }
+  return bestindex;
+}
+
+
+/**********************************************************************
+ * choose_partition
+ *
+ * Choose a partition for the point and return the index.
+ **********************************************************************/
+
+int
+choose_partition (               //select partition
+float diff,             /*diff from spline */
+float partdiffs[],               /*diff on all parts */
+int lastpart,                    /*last assigned partition */
+float jumplimit,                 /*new part threshold */
+float* drift,
+float* lastdelta,
+int *partcount                   /*no of partitions */
+) {
+  int partition;                 /*partition no */
+  int bestpart;                  /*best new partition */
+  float bestdelta;               /*best gap from a part */
+  float delta;                   /*diff from part */
+
+  if (lastpart < 0) {
+    partdiffs[0] = diff;
+    lastpart = 0;                /*first point */
+    *drift = 0.0f;
+    *lastdelta = 0.0f;
+  }
+                                 /*adjusted diff from part */
+  delta = diff - partdiffs[lastpart] - *drift;
+  if (textord_oldbl_debug) {
+    tprintf ("Diff=%.2f, Delta=%.3f, Drift=%.3f, ", diff, delta, *drift);
+  }
+  if (ABS (delta) > jumplimit / 2) {
+                                 /*delta on part 0 */
+    bestdelta = diff - partdiffs[0] - *drift;
+    bestpart = 0;                /*0 best so far */
+    for (partition = 1; partition < *partcount; partition++) {
+      delta = diff - partdiffs[partition] - *drift;
+      if (ABS (delta) < ABS (bestdelta)) {
+        bestdelta = delta;
+        bestpart = partition;    /*part with nearest jump */
+      }
+    }
+    delta = bestdelta;
+                                 /*too far away */
+    if (ABS (bestdelta) > jumplimit
+    && *partcount < MAXPARTS) {  /*and spare part left */
+      bestpart = (*partcount)++; /*best was new one */
+                                 /*start new one */
+      partdiffs[bestpart] = diff - *drift;
+      delta = 0.0f;
+    }
+  }
+  else {
+    bestpart = lastpart;         /*best was last one */
+  }
+
+  if (bestpart == lastpart
+    && (ABS (delta - *lastdelta) < jumplimit / 2
+    || ABS (delta) < jumplimit / 2))
+                                 /*smooth the drift */
+    *drift = (3 * *drift + delta) / 3;
+  *lastdelta = delta;
+
+  if (textord_oldbl_debug) {
+    tprintf ("P=%d\n", bestpart);
+  }
+
+  return bestpart;
+}
+
+/**********************************************************************
+ * partition_coords
+ *
+ * Get the x,y coordinates of all points in the bestpart and put them
+ * in xcoords,ycoords. Return the number of points found.
+ **********************************************************************/
+
+int
+partition_coords (               //find relevant coords
+TBOX blobcoords[],                //bounding boxes
+int blobcount,                   /*no of blobs in row */
+char partids[],                  /*partition no of each blob */
+int bestpart,                    /*best new partition */
+int xcoords[],                   /*points to work on */
+int ycoords[]                    /*points to work on */
+) {
+  int blobindex;                 /*no along text line */
+  int pointcount;                /*no of points */
+
+  pointcount = 0;
+  for (blobindex = 0; blobindex < blobcount; blobindex++) {
+    if (partids[blobindex] == bestpart) {
+                                 /*centre of blob */
+      xcoords[pointcount] = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) >> 1;
+      ycoords[pointcount++] = blobcoords[blobindex].bottom ();
+    }
+  }
+  return pointcount;             /*no of points found */
+}
+
+
+/**********************************************************************
+ * segment_spline
+ *
+ * Segment the row at midpoints between maxima and minima of the x,y pairs.
+ * The xstarts of the segments are returned and the number found.
+ **********************************************************************/
+
+int
+segment_spline (                 //make xstarts
+TBOX blobcoords[],                //boundign boxes
+int blobcount,                   /*no of blobs in row */
+int xcoords[],                   /*points to work on */
+int ycoords[],                   /*points to work on */
+int degree, int pointcount,      /*no of points */
+int xstarts[]                    //result
+) {
+  int ptindex;                   /*no along text line */
+  int segment;                   /*partition no */
+  int lastmin, lastmax;          /*possible turn points */
+  int turnpoints[SPLINESIZE];    /*good turning points */
+  int turncount;                 /*no of turning points */
+  int max_x;                     //max specified coord
+
+  xstarts[0] = xcoords[0] - 1;   //leftmost defined pt
+  max_x = xcoords[pointcount - 1] + 1;
+  if (degree < 2)
+    pointcount = 0;
+  turncount = 0;                 /*no turning points yet */
+  if (pointcount > 3) {
+    ptindex = 1;
+    lastmax = lastmin = 0;       /*start with first one */
+    while (ptindex < pointcount - 1 && turncount < SPLINESIZE - 1) {
+                                 /*minimum */
+      if (ycoords[ptindex - 1] > ycoords[ptindex] && ycoords[ptindex] <= ycoords[ptindex + 1]) {
+        if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT) {
+          if (turncount == 0 || turnpoints[turncount - 1] != lastmax)
+                                 /*new max point */
+            turnpoints[turncount++] = lastmax;
+          lastmin = ptindex;     /*latest minimum */
+        }
+        else if (ycoords[ptindex] < ycoords[lastmin]) {
+          lastmin = ptindex;     /*lower minimum */
+        }
+      }
+
+                                 /*maximum */
+      if (ycoords[ptindex - 1] < ycoords[ptindex] && ycoords[ptindex] >= ycoords[ptindex + 1]) {
+        if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT) {
+          if (turncount == 0 || turnpoints[turncount - 1] != lastmin)
+                                 /*new min point */
+            turnpoints[turncount++] = lastmin;
+          lastmax = ptindex;     /*latest maximum */
+        }
+        else if (ycoords[ptindex] > ycoords[lastmax]) {
+          lastmax = ptindex;     /*higher maximum */
+        }
+      }
+      ptindex++;
+    }
+                                 /*possible global min */
+    if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT
+    && (turncount == 0 || turnpoints[turncount - 1] != lastmax)) {
+      if (turncount < SPLINESIZE - 1)
+                                 /*2 more turns */
+        turnpoints[turncount++] = lastmax;
+      if (turncount < SPLINESIZE - 1)
+        turnpoints[turncount++] = ptindex;
+    }
+    else if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT
+      /*possible global max */
+    && (turncount == 0 || turnpoints[turncount - 1] != lastmin)) {
+      if (turncount < SPLINESIZE - 1)
+                                 /*2 more turns */
+        turnpoints[turncount++] = lastmin;
+      if (turncount < SPLINESIZE - 1)
+        turnpoints[turncount++] = ptindex;
+    }
+    else if (turncount > 0 && turnpoints[turncount - 1] == lastmin
+    && turncount < SPLINESIZE - 1) {
+      if (ycoords[ptindex] > ycoords[lastmax])
+        turnpoints[turncount++] = ptindex;
+      else
+        turnpoints[turncount++] = lastmax;
+    }
+    else if (turncount > 0 && turnpoints[turncount - 1] == lastmax
+    && turncount < SPLINESIZE - 1) {
+      if (ycoords[ptindex] < ycoords[lastmin])
+        turnpoints[turncount++] = ptindex;
+      else
+        turnpoints[turncount++] = lastmin;
+    }
+  }
+
+  if (textord_oldbl_debug && turncount > 0)
+    tprintf ("First turn is %d at (%d,%d)\n",
+      turnpoints[0], xcoords[turnpoints[0]], ycoords[turnpoints[0]]);
+  for (segment = 1; segment < turncount; segment++) {
+                                 /*centre y coord */
+    lastmax = (ycoords[turnpoints[segment - 1]] + ycoords[turnpoints[segment]]) / 2;
+
+    /* fix alg so that it works with both rising and falling sections */
+    if (ycoords[turnpoints[segment - 1]] < ycoords[turnpoints[segment]])
+                                 /*find rising y centre */
+      for (ptindex = turnpoints[segment - 1] + 1; ptindex < turnpoints[segment] && ycoords[ptindex + 1] <= lastmax; ptindex++);
+    else
+                                 /*find falling y centre */
+      for (ptindex = turnpoints[segment - 1] + 1; ptindex < turnpoints[segment] && ycoords[ptindex + 1] >= lastmax; ptindex++);
+
+                                 /*centre x */
+    xstarts[segment] = (xcoords[ptindex - 1] + xcoords[ptindex]
+      + xcoords[turnpoints[segment - 1]]
+      + xcoords[turnpoints[segment]] + 2) / 4;
+    /*halfway between turns */
+    if (textord_oldbl_debug)
+      tprintf ("Turn %d is %d at (%d,%d), mid pt is %d@%d, final @%d\n",
+        segment, turnpoints[segment],
+        xcoords[turnpoints[segment]], ycoords[turnpoints[segment]],
+        ptindex - 1, xcoords[ptindex - 1], xstarts[segment]);
+  }
+
+  xstarts[segment] = max_x;
+  return segment;                /*no of splines */
+}
+
+
+/**********************************************************************
+ * split_stepped_spline
+ *
+ * Re-segment the spline in cases where there is a big step function.
+ * Return true if any were done.
+ **********************************************************************/
+
+bool
+split_stepped_spline(           //make xstarts
+        QSPLINE* baseline,              //current shot
+        float jumplimit,                 //max step function
+        int* xcoords,                   /*points to work on */
+        int* xstarts,                   //result
+        int& segments                    //no of segments
+) {
+  bool doneany;                 //return value
+  int segment;                   /*partition no */
+  int startindex, centreindex, endindex;
+  float leftcoord, rightcoord;
+  int leftindex, rightindex;
+  float step;                    //spline step
+
+  doneany = false;
+  startindex = 0;
+  for (segment = 1; segment < segments - 1; segment++) {
+    step = baseline->step ((xstarts[segment - 1] + xstarts[segment]) / 2.0,
+      (xstarts[segment] + xstarts[segment + 1]) / 2.0);
+    if (step < 0)
+      step = -step;
+    if (step > jumplimit) {
+      while (xcoords[startindex] < xstarts[segment - 1])
+        startindex++;
+      centreindex = startindex;
+      while (xcoords[centreindex] < xstarts[segment])
+        centreindex++;
+      endindex = centreindex;
+      while (xcoords[endindex] < xstarts[segment + 1])
+        endindex++;
+      if (segments >= SPLINESIZE) {
+        if (textord_debug_baselines)
+          tprintf ("Too many segments to resegment spline!!\n");
+      }
+      else if (endindex - startindex >= textord_spline_medianwin * 3) {
+        while (centreindex - startindex <
+          textord_spline_medianwin * 3 / 2)
+          centreindex++;
+        while (endindex - centreindex <
+          textord_spline_medianwin * 3 / 2)
+          centreindex--;
+        leftindex = (startindex + startindex + centreindex) / 3;
+        rightindex = (centreindex + endindex + endindex) / 3;
+        leftcoord =
+          (xcoords[startindex] * 2 + xcoords[centreindex]) / 3.0;
+        rightcoord =
+          (xcoords[centreindex] + xcoords[endindex] * 2) / 3.0;
+        while (xcoords[leftindex] > leftcoord
+          && leftindex - startindex > textord_spline_medianwin)
+          leftindex--;
+        while (xcoords[leftindex] < leftcoord
+          && centreindex - leftindex >
+          textord_spline_medianwin / 2)
+          leftindex++;
+        if (xcoords[leftindex] - leftcoord >
+          leftcoord - xcoords[leftindex - 1])
+          leftindex--;
+        while (xcoords[rightindex] > rightcoord
+          && rightindex - centreindex >
+          textord_spline_medianwin / 2)
+          rightindex--;
+        while (xcoords[rightindex] < rightcoord
+          && endindex - rightindex > textord_spline_medianwin)
+          rightindex++;
+        if (xcoords[rightindex] - rightcoord >
+          rightcoord - xcoords[rightindex - 1])
+          rightindex--;
+        if (textord_debug_baselines)
+          tprintf ("Splitting spline at %d with step %g at (%d,%d)\n",
+            xstarts[segment],
+            baseline->
+            step ((xstarts[segment - 1] +
+            xstarts[segment]) / 2.0,
+            (xstarts[segment] +
+            xstarts[segment + 1]) / 2.0),
+            (xcoords[leftindex - 1] + xcoords[leftindex]) / 2,
+            (xcoords[rightindex - 1] + xcoords[rightindex]) / 2);
+        insert_spline_point (xstarts, segment,
+          (xcoords[leftindex - 1] +
+          xcoords[leftindex]) / 2,
+          (xcoords[rightindex - 1] +
+          xcoords[rightindex]) / 2, segments);
+        doneany = true;
+      }
+      else if (textord_debug_baselines) {
+        tprintf
+          ("Resegmenting spline failed - insufficient pts (%d,%d,%d,%d)\n",
+          startindex, centreindex, endindex,
+          (int32_t) textord_spline_medianwin);
+      }
+    }
+    //              else tprintf("Spline step at %d is %g\n",
+    //                      xstarts[segment],
+    //                      baseline->step((xstarts[segment-1]+xstarts[segment])/2.0,
+    //                      (xstarts[segment]+xstarts[segment+1])/2.0));
+  }
+  return doneany;
+}
+
+
+/**********************************************************************
+ * insert_spline_point
+ *
+ * Insert a new spline point and shuffle up the others.
+ **********************************************************************/
+
+void
+insert_spline_point (            //get descenders
+int xstarts[],                   //starts to shuffle
+int segment,                     //insertion pt
+int coord1,                      //coords to add
+int coord2, int &segments        //total segments
+) {
+  int index;                     //for shuffling
+
+  for (index = segments; index > segment; index--)
+    xstarts[index + 1] = xstarts[index];
+  segments++;
+  xstarts[segment] = coord1;
+  xstarts[segment + 1] = coord2;
+}
+
+
+/**********************************************************************
+ * find_lesser_parts
+ *
+ * Average the step from the spline for the other partitions
+ * and find the commonest partition which has a descender.
+ **********************************************************************/
+
+void
+find_lesser_parts (              //get descenders
+TO_ROW * row,                    //row to process
+TBOX blobcoords[],                //bounding boxes
+int blobcount,                   /*no of blobs */
+char partids[],                  /*partition of each blob */
+int partsizes[],                 /*size of each part */
+int partcount,                   /*no of partitions */
+int bestpart                     /*biggest partition */
+) {
+  int blobindex;                 /*index of blob */
+  int partition;                 /*current partition */
+  int xcentre;                   /*centre of blob */
+  int poscount;                  /*count of best up step */
+  int negcount;                  /*count of best down step */
+  float partsteps[MAXPARTS];     /*average step to part */
+  float bestneg;                 /*best down step */
+  int runlength;                 /*length of bad run */
+  int biggestrun;                /*biggest bad run */
+
+  biggestrun = 0;
+  for (partition = 0; partition < partcount; partition++)
+    partsteps[partition] = 0.0;  /*zero accumulators */
+  for (runlength = 0, blobindex = 0; blobindex < blobcount; blobindex++) {
+    xcentre = (blobcoords[blobindex].left ()
+      + blobcoords[blobindex].right ()) >> 1;
+                                 /*in other parts */
+    int part_id =
+        static_cast<int>(static_cast<unsigned char>(partids[blobindex]));
+    if (part_id != bestpart) {
+      runlength++;               /*run of non bests */
+      if (runlength > biggestrun)
+        biggestrun = runlength;
+      partsteps[part_id] += blobcoords[blobindex].bottom()
+        - row->baseline.y(xcentre);
+    }
+    else
+      runlength = 0;
+  }
+  if (biggestrun > MAXBADRUN)
+    row->xheight = -1.0f;        /*failed */
+  else
+    row->xheight = 1.0f;         /*success */
+  poscount = negcount = 0;
+  bestneg = 0.0;       /*no step yet */
+  for (partition = 0; partition < partcount; partition++) {
+    if (partition != bestpart) {
+      // by jetsoft divide by zero possible
+      if (partsizes[partition] == 0)
+        partsteps[partition] = 0;
+      else
+        partsteps[partition] /= partsizes[partition];
+      //
+
+      if (partsteps[partition] >= MINASCRISE
+      && partsizes[partition] > poscount) {
+        poscount = partsizes[partition];
+      }
+      if (partsteps[partition] <= -MINASCRISE
+      && partsizes[partition] > negcount) {
+                                 /*ascender rise */
+        bestneg = partsteps[partition];
+                                 /*2nd most popular */
+        negcount = partsizes[partition];
+      }
+    }
+  }
+                                 /*average x-height */
+  partsteps[bestpart] /= blobcount;
+  row->descdrop = bestneg;
+}
+
+
+/**********************************************************************
+ * old_first_xheight
+ *
+ * Makes an x-height spline by copying the baseline and shifting it.
+ * It estimates the x-height across the line to use as the shift.
+ * It also finds the ascender height if it can.
+ **********************************************************************/
+
+void
+old_first_xheight (              //the wiseowl way
+TO_ROW * row,                    /*current row */
+TBOX blobcoords[],                /*blob bounding boxes */
+int initialheight,               //initial guess
+int blobcount,                   /*blobs in blobcoords */
+QSPLINE * baseline,              /*established */
+float jumplimit                  /*min ascender height */
+) {
+  int blobindex; /*current blob */
+                 /*height statistics */
+  STATS heightstat (0, MAXHEIGHT);
+  int height;                    /*height of blob */
+  int xcentre;                   /*centre of blob */
+  int lineheight;                /*approx xheight */
+  float ascenders;               /*ascender sum */
+  int asccount;                  /*no of ascenders */
+  float xsum;                    /*xheight sum */
+  int xcount;                    /*xheight count */
+  float diff;                    /*height difference */
+
+  if (blobcount > 1) {
+    for (blobindex = 0; blobindex < blobcount; blobindex++) {
+      xcentre = (blobcoords[blobindex].left ()
+        + blobcoords[blobindex].right ()) / 2;
+                                 /*height of blob */
+      height = static_cast<int>(blobcoords[blobindex].top () - baseline->y (xcentre) + 0.5);
+      if (height > initialheight * oldbl_xhfract
+        && height > textord_min_xheight)
+        heightstat.add (height, 1);
+    }
+    if (heightstat.get_total () > 3) {
+      lineheight = static_cast<int>(heightstat.ile (0.25));
+      if (lineheight <= 0)
+        lineheight = static_cast<int>(heightstat.ile (0.5));
+    }
+    else
+      lineheight = initialheight;
+  }
+  else {
+    lineheight = static_cast<int>(blobcoords[0].top ()
+      - baseline->y ((blobcoords[0].left ()
+      + blobcoords[0].right ()) / 2) +
+      0.5);
+  }
+
+  xsum = 0.0f;
+  xcount = 0;
+  for (ascenders = 0.0f, asccount = 0, blobindex = 0; blobindex < blobcount;
+  blobindex++) {
+    xcentre = (blobcoords[blobindex].left ()
+      + blobcoords[blobindex].right ()) / 2;
+    diff = blobcoords[blobindex].top () - baseline->y (xcentre);
+                                 /*is it ascender */
+    if (diff > lineheight + jumplimit) {
+      ascenders += diff;
+      asccount++;                /*count ascenders */
+    }
+    else if (diff > lineheight - jumplimit) {
+      xsum += diff;              /*mean xheight */
+      xcount++;
+    }
+  }
+  if (xcount > 0)
+    xsum /= xcount;              /*average xheight */
+  else
+    xsum = static_cast<float>(lineheight);   /*guess it */
+  row->xheight *= xsum;
+  if (asccount > 0)
+    row->ascrise = ascenders / asccount - xsum;
+  else
+    row->ascrise = 0.0f;         /*had none */
+  if (row->xheight == 0)
+    row->xheight = -1.0f;
+}
+
+
+/**********************************************************************
+ * make_first_xheight
+ *
+ * Makes an x-height spline by copying the baseline and shifting it.
+ * It estimates the x-height across the line to use as the shift.
+ * It also finds the ascender height if it can.
+ **********************************************************************/
+
+void
+make_first_xheight (             //find xheight
+TO_ROW * row,                    /*current row */
+TBOX blobcoords[],                /*blob bounding boxes */
+int lineheight,                  //initial guess
+int init_lineheight,             //block level guess
+int blobcount,                   /*blobs in blobcoords */
+QSPLINE * baseline,              /*established */
+float jumplimit                  /*min ascender height */
+) {
+  STATS heightstat (0, HEIGHTBUCKETS);
+  int lefts[HEIGHTBUCKETS];
+  int rights[HEIGHTBUCKETS];
+  int modelist[MODENUM];
+  int blobindex;
+  int mode_count;                //blobs to count in thr
+  int sign_bit;
+  int mode_threshold;
+  const int kBaselineTouch = 2;  // This really should change with resolution.
+  const int kGoodStrength = 8;  // Strength of baseline-touching heights.
+  const float kMinHeight = 0.25;  // Min fraction of lineheight to use.
+
+  sign_bit = row->xheight > 0 ? 1 : -1;
+
+  memset(lefts, 0, HEIGHTBUCKETS * sizeof(lefts[0]));
+  memset(rights, 0, HEIGHTBUCKETS * sizeof(rights[0]));
+  mode_count = 0;
+  for (blobindex = 0; blobindex < blobcount; blobindex++) {
+    int xcenter = (blobcoords[blobindex].left () +
+        blobcoords[blobindex].right ()) / 2;
+    float base = baseline->y(xcenter);
+    float bottomdiff = fabs(base - blobcoords[blobindex].bottom());
+    int strength = textord_ocropus_mode &&
+                   bottomdiff <= kBaselineTouch ? kGoodStrength : 1;
+    int height = static_cast<int>(blobcoords[blobindex].top () - base + 0.5);
+    if (blobcoords[blobindex].height () > init_lineheight * kMinHeight) {
+      if (height > lineheight * oldbl_xhfract
+        && height > textord_min_xheight) {
+        heightstat.add (height, strength);
+        if (height < HEIGHTBUCKETS) {
+          if (xcenter > rights[height])
+            rights[height] = xcenter;
+          if (xcenter > 0 && (lefts[height] == 0 || xcenter < lefts[height]))
+            lefts[height] = xcenter;
+        }
+      }
+      mode_count += strength;
+    }
+  }
+
+  mode_threshold = static_cast<int>(blobcount * 0.1);
+  if (oldbl_dot_error_size > 1 || oldbl_xhfix)
+    mode_threshold = static_cast<int>(mode_count * 0.1);
+
+  if (textord_oldbl_debug) {
+    tprintf ("blobcount=%d, mode_count=%d, mode_t=%d\n",
+      blobcount, mode_count, mode_threshold);
+  }
+  find_top_modes(&heightstat, HEIGHTBUCKETS, modelist, MODENUM);
+  if (textord_oldbl_debug) {
+    for (blobindex = 0; blobindex < MODENUM; blobindex++)
+      tprintf ("mode[%d]=%d ", blobindex, modelist[blobindex]);
+    tprintf ("\n");
+  }
+  pick_x_height(row, modelist, lefts, rights, &heightstat, mode_threshold);
+
+  if (textord_oldbl_debug)
+    tprintf ("Output xheight=%g\n", row->xheight);
+  if (row->xheight < 0 && textord_oldbl_debug)
+    tprintf ("warning: Row Line height < 0; %4.2f\n", row->xheight);
+
+  if (sign_bit < 0)
+    row->xheight = -row->xheight;
+}
+
+/**********************************************************************
+ * find_top_modes
+ *
+ * Fill the input array with the indices of the top ten modes of the
+ * input distribution.
+ **********************************************************************/
+
+const int kMinModeFactorOcropus = 32;
+const int kMinModeFactor = 12;
+
+void
+find_top_modes (                 //get modes
+STATS * stats,                   //stats to hack
+int statnum,                     //no of piles
+int modelist[], int modenum      //no of modes to get
+) {
+  int mode_count;
+  int last_i = 0;
+  int last_max = INT32_MAX;
+  int i;
+  int mode;
+  int total_max = 0;
+  int mode_factor = textord_ocropus_mode ?
+                    kMinModeFactorOcropus : kMinModeFactor;
+
+  for (mode_count = 0; mode_count < modenum; mode_count++) {
+    mode = 0;
+    for (i = 0; i < statnum; i++) {
+      if (stats->pile_count (i) > stats->pile_count (mode)) {
+        if ((stats->pile_count (i) < last_max) ||
+        ((stats->pile_count (i) == last_max) && (i > last_i))) {
+          mode = i;
+        }
+      }
+    }
+    last_i = mode;
+    last_max = stats->pile_count (last_i);
+    total_max += last_max;
+    if (last_max <= total_max / mode_factor)
+      mode = 0;
+    modelist[mode_count] = mode;
+  }
+}
+
+
+/**********************************************************************
+ * pick_x_height
+ *
+ * Choose based on the height modes the best x height value.
+ **********************************************************************/
+
+void pick_x_height(TO_ROW * row,                    //row to do
+                   int modelist[],
+                   int lefts[], int rights[],
+                   STATS * heightstat,
+                   int mode_threshold) {
+  int x;
+  int y;
+  int z;
+  float ratio;
+  int found_one_bigger = false;
+  int best_x_height = 0;
+  int best_asc = 0;
+  int num_in_best;
+
+  for (x = 0; x < MODENUM; x++) {
+    for (y = 0; y < MODENUM; y++) {
+      /* Check for two modes */
+      if (modelist[x] && modelist[y] &&
+          heightstat->pile_count (modelist[x]) > mode_threshold &&
+          (!textord_ocropus_mode ||
+                  std::min(rights[modelist[x]], rights[modelist[y]]) >
+                   std::max(lefts[modelist[x]], lefts[modelist[y]]))) {
+        ratio = static_cast<float>(modelist[y]) / static_cast<float>(modelist[x]);
+        if (1.2 < ratio && ratio < 1.8) {
+          /* Two modes found */
+          best_x_height = modelist[x];
+          num_in_best = heightstat->pile_count (modelist[x]);
+
+          /* Try to get one higher */
+          do {
+            found_one_bigger = false;
+            for (z = 0; z < MODENUM; z++) {
+              if (modelist[z] == best_x_height + 1 &&
+                  (!textord_ocropus_mode ||
+                          std::min(rights[modelist[x]], rights[modelist[y]]) >
+                            std::max(lefts[modelist[x]], lefts[modelist[y]]))) {
+                ratio = static_cast<float>(modelist[y]) / static_cast<float>(modelist[z]);
+                if ((1.2 < ratio && ratio < 1.8) &&
+                               /* Should be half of best */
+                    heightstat->pile_count (modelist[z]) >
+                    num_in_best * 0.5) {
+                  best_x_height++;
+                  found_one_bigger = true;
+                  break;
+                }
+              }
+            }
+          }
+          while (found_one_bigger);
+
+          /* try to get a higher ascender */
+
+          best_asc = modelist[y];
+          num_in_best = heightstat->pile_count (modelist[y]);
+
+          /* Try to get one higher */
+          do {
+            found_one_bigger = false;
+            for (z = 0; z < MODENUM; z++) {
+              if (modelist[z] > best_asc &&
+                  (!textord_ocropus_mode ||
+                          std::min(rights[modelist[x]], rights[modelist[y]]) >
+                            std::max(lefts[modelist[x]], lefts[modelist[y]]))) {
+                ratio = static_cast<float>(modelist[z]) / static_cast<float>(best_x_height);
+                if ((1.2 < ratio && ratio < 1.8) &&
+                               /* Should be half of best */
+                    heightstat->pile_count (modelist[z]) >
+                    num_in_best * 0.5) {
+                  best_asc = modelist[z];
+                  found_one_bigger = true;
+                  break;
+                }
+              }
+            }
+          }
+          while (found_one_bigger);
+
+          row->xheight = static_cast<float>(best_x_height);
+          row->ascrise = static_cast<float>(best_asc) - best_x_height;
+          return;
+        }
+      }
+    }
+  }
+
+  best_x_height = modelist[0];   /* Single Mode found */
+  num_in_best = heightstat->pile_count (best_x_height);
+  do {
+                                 /* Try to get one higher */
+    found_one_bigger = false;
+    for (z = 1; z < MODENUM; z++) {
+      /* Should be half of best */
+      if ((modelist[z] == best_x_height + 1) &&
+      (heightstat->pile_count (modelist[z]) > num_in_best * 0.5)) {
+        best_x_height++;
+        found_one_bigger = true;
+        break;
+      }
+    }
+  }
+  while (found_one_bigger);
+
+  row->ascrise = 0.0f;
+  row->xheight = static_cast<float>(best_x_height);
+  if (row->xheight == 0)
+    row->xheight = -1.0f;
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/textord/oldbasel.h b/tesseract/src/textord/oldbasel.h
new file mode 100644
index 00000000..0e25df0d
--- /dev/null
+++ b/tesseract/src/textord/oldbasel.h
@@ -0,0 +1,164 @@
+/**********************************************************************
+ * File:        oldbasel.h  (Formerly oldbl.h)
+ * Description: A re-implementation of the old baseline algorithm.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1993, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef           OLDBASEL_H
+#define           OLDBASEL_H
+
+#include          "params.h"
+#include          "blobbox.h"
+
+namespace tesseract {
+
+extern BOOL_VAR_H (textord_oldbl_debug, false,
+"Debug old baseline generation");
+
+int get_blob_coords(                    //get boxes
+        TO_ROW* row,        //row to use
+        int32_t lineheight,   //block level
+        TBOX* blobcoords,    //output boxes
+        bool& holed_line,  //lost a lot of blobs
+        int& outcount       //no of real blobs
+);
+void make_first_baseline (       //initial approximation
+TBOX blobcoords[],                /*blob bounding boxes */
+int blobcount,                   /*no of blobcoords */
+int xcoords[],                   /*coords for spline */
+int ycoords[],                   /*approximator */
+QSPLINE * spline,                /*initial spline */
+QSPLINE * baseline,              /*output spline */
+float jumplimit                  /*guess half descenders */
+);
+void make_holed_baseline (       //initial approximation
+TBOX blobcoords[],                /*blob bounding boxes */
+int blobcount,                   /*no of blobcoords */
+QSPLINE * spline,                /*initial spline */
+QSPLINE * baseline,              /*output spline */
+float gradient                   //of line
+);
+int partition_line (             //partition blobs
+TBOX blobcoords[],                //bounding boxes
+int blobcount,                   /*no of blobs on row */
+int *numparts,                   /*number of partitions */
+char partids[],                  /*partition no of each blob */
+int partsizes[],                 /*no in each partition */
+QSPLINE * spline,                /*curve to fit to */
+float jumplimit,                 /*allowed delta change */
+float ydiffs[]                   /*diff from spline */
+);
+void merge_oldbl_parts (         //partition blobs
+TBOX blobcoords[],                //bounding boxes
+int blobcount,                   /*no of blobs on row */
+char partids[],                  /*partition no of each blob */
+int partsizes[],                 /*no in each partition */
+int biggestpart,                 //major partition
+float jumplimit                  /*allowed delta change */
+);
+int get_ydiffs (                 //evaluate differences
+TBOX blobcoords[],                //bounding boxes
+int blobcount,                   /*no of blobs */
+QSPLINE * spline,                /*approximating spline */
+float ydiffs[]                   /*output */
+);
+int choose_partition (           //select partition
+float diff,             /*diff from spline */
+float partdiffs[],               /*diff on all parts */
+int lastpart,                    /*last assigned partition */
+float jumplimit,                 /*new part threshold */
+float* drift,
+float* last_delta,
+int *partcount                   /*no of partitions */
+);
+int partition_coords (           //find relevant coords
+TBOX blobcoords[],                //bounding boxes
+int blobcount,                   /*no of blobs in row */
+char partids[],                  /*partition no of each blob */
+int bestpart,                    /*best new partition */
+int xcoords[],                   /*points to work on */
+int ycoords[]                    /*points to work on */
+);
+int segment_spline (             //make xstarts
+TBOX blobcoords[],                //boundign boxes
+int blobcount,                   /*no of blobs in row */
+int xcoords[],                   /*points to work on */
+int ycoords[],                   /*points to work on */
+int degree, int pointcount,      /*no of points */
+int xstarts[]                    //result
+);
+bool split_stepped_spline(     //make xstarts
+        QSPLINE* baseline,              //current shot
+        float jumplimit,                 //max step function
+        int* xcoords,                   /*points to work on */
+        int* xstarts,                   //result
+        int& segments                    //no of segments
+);
+void insert_spline_point (       //get descenders
+int xstarts[],                   //starts to shuffle
+int segment,                     //insertion pt
+int coord1,                      //coords to add
+int coord2, int &segments        //total segments
+);
+void find_lesser_parts (         //get descenders
+TO_ROW * row,                    //row to process
+TBOX blobcoords[],                //bounding boxes
+int blobcount,                   /*no of blobs */
+char partids[],                  /*partition of each blob */
+int partsizes[],                 /*size of each part */
+int partcount,                   /*no of partitions */
+int bestpart                     /*biggest partition */
+);
+
+void old_first_xheight (         //the wiseowl way
+TO_ROW * row,                    /*current row */
+TBOX blobcoords[],                /*blob bounding boxes */
+int initialheight,               //initial guess
+int blobcount,                   /*blobs in blobcoords */
+QSPLINE * baseline,              /*established */
+float jumplimit                  /*min ascender height */
+);
+
+void make_first_xheight (        //find xheight
+TO_ROW * row,                    /*current row */
+TBOX blobcoords[],                /*blob bounding boxes */
+int lineheight,                  //initial guess
+int init_lineheight,             //block level guess
+int blobcount,                   /*blobs in blobcoords */
+QSPLINE * baseline,              /*established */
+float jumplimit                  /*min ascender height */
+);
+
+int *make_height_array (         //get array of heights
+TBOX blobcoords[],                /*blob bounding boxes */
+int blobcount,                   /*blobs in blobcoords */
+QSPLINE * baseline               /*established */
+);
+
+void find_top_modes (            //get modes
+STATS * stats,                   //stats to hack
+int statnum,                     //no of piles
+int modelist[], int modenum      //no of modes to get
+);
+
+void pick_x_height(TO_ROW * row,                    //row to do
+int modelist[],
+int lefts[], int rights[],
+STATS * heightstat,
+int mode_threshold);
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/textord/pithsync.cpp b/tesseract/src/textord/pithsync.cpp
new file mode 100644
index 00000000..462f0b3c
--- /dev/null
+++ b/tesseract/src/textord/pithsync.cpp
@@ -0,0 +1,693 @@
+/**********************************************************************
+ * File:        pithsync.cpp  (Formerly pitsync2.c)
+ * Description: Code to find the optimum fixed pitch segmentation of some blobs.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#include "pithsync.h"
+
+#include "makerow.h"
+#include "pitsync1.h"
+#include "topitch.h"
+#include "tprintf.h"
+
+#include <cmath>
+#include <cfloat>       // for FLT_MAX
+#include <vector>       // for std::vector
+
+namespace tesseract {
+
+/**********************************************************************
+ * FPCUTPT::setup
+ *
+ * Constructor to make a new FPCUTPT.
+ **********************************************************************/
+
+void FPCUTPT::setup(                     //constructor
+                    FPCUTPT *cutpts,     //predecessors
+                    int16_t array_origin,  //start coord
+                    STATS *projection,   //vertical occupation
+                    int16_t zero_count,    //official zero
+                    int16_t pitch,         //proposed pitch
+                    int16_t x,             //position
+                    int16_t offset         //dist to gap
+                   ) {
+                                 //half of pitch
+  int16_t half_pitch = pitch / 2 - 1;
+  uint32_t lead_flag;              //new flag
+  int32_t ind;                     //current position
+
+  if (half_pitch > 31)
+    half_pitch = 31;
+  else if (half_pitch < 0)
+    half_pitch = 0;
+  lead_flag = 1 << half_pitch;
+
+  pred = nullptr;
+  mean_sum = 0;
+  sq_sum = offset * offset;
+  cost = sq_sum;
+  faked = false;
+  terminal = false;
+  fake_count = 0;
+  xpos = x;
+  region_index = 0;
+  mid_cuts = 0;
+  if (x == array_origin) {
+    back_balance = 0;
+    fwd_balance = 0;
+    for (ind = 0; ind <= half_pitch; ind++) {
+      fwd_balance >>= 1;
+      if (projection->pile_count (ind) > zero_count)
+        fwd_balance |= lead_flag;
+    }
+  }
+  else {
+    back_balance = cutpts[x - 1 - array_origin].back_balance << 1;
+    back_balance &= lead_flag + (lead_flag - 1);
+    if (projection->pile_count (x) > zero_count)
+      back_balance |= 1;
+    fwd_balance = cutpts[x - 1 - array_origin].fwd_balance >> 1;
+    if (projection->pile_count (x + half_pitch) > zero_count)
+      fwd_balance |= lead_flag;
+  }
+}
+
+
+/**********************************************************************
+ * FPCUTPT::assign
+ *
+ * Constructor to make a new FPCUTPT.
+ **********************************************************************/
+
+void FPCUTPT::assign(                         //constructor
+        FPCUTPT* cutpts,         //predecessors
+        int16_t array_origin,      //start coord
+        int16_t x,                 //position
+        bool faking,            //faking this one
+        bool mid_cut,           //cheap cut.
+        int16_t offset,            //dist to gap
+        STATS* projection,       //vertical occupation
+        float projection_scale,  //scaling
+        int16_t zero_count,        //official zero
+        int16_t pitch,             //proposed pitch
+        int16_t pitch_error        //allowed tolerance
+) {
+  int index;                     //test index
+  int balance_index;             //for balance factor
+  int16_t balance_count;           //ding factor
+  int16_t r_index;                 //test cut number
+  FPCUTPT *segpt;                //segment point
+  int32_t dist;                    //from prev segment
+  double sq_dist;                //squared distance
+  double mean;                   //mean pitch
+  double total;                  //total dists
+  double factor;                 //cost function
+                                 //half of pitch
+  int16_t half_pitch = pitch / 2 - 1;
+  uint32_t lead_flag;              //new flag
+
+  if (half_pitch > 31)
+    half_pitch = 31;
+  else if (half_pitch < 0)
+    half_pitch = 0;
+  lead_flag = 1 << half_pitch;
+
+  back_balance = cutpts[x - 1 - array_origin].back_balance << 1;
+  back_balance &= lead_flag + (lead_flag - 1);
+  if (projection->pile_count (x) > zero_count)
+    back_balance |= 1;
+  fwd_balance = cutpts[x - 1 - array_origin].fwd_balance >> 1;
+  if (projection->pile_count (x + half_pitch) > zero_count)
+    fwd_balance |= lead_flag;
+
+  xpos = x;
+  cost = FLT_MAX;
+  pred = nullptr;
+  faked = faking;
+  terminal = false;
+  region_index = 0;
+  fake_count = INT16_MAX;
+  for (index = x - pitch - pitch_error; index <= x - pitch + pitch_error;
+  index++) {
+    if (index >= array_origin) {
+      segpt = &cutpts[index - array_origin];
+      dist = x - segpt->xpos;
+      if (!segpt->terminal && segpt->fake_count < INT16_MAX) {
+        balance_count = 0;
+        if (textord_balance_factor > 0) {
+          if (textord_fast_pitch_test) {
+            lead_flag = back_balance ^ segpt->fwd_balance;
+            balance_count = 0;
+            while (lead_flag != 0) {
+              balance_count++;
+              lead_flag &= lead_flag - 1;
+            }
+          }
+          else {
+            for (balance_index = 0;
+              index + balance_index < x - balance_index;
+              balance_index++)
+            balance_count +=
+                (projection->pile_count (index + balance_index) <=
+                zero_count) ^ (projection->pile_count (x -
+                balance_index)
+                <= zero_count);
+          }
+          balance_count =
+            static_cast<int16_t>(balance_count * textord_balance_factor /
+            projection_scale);
+        }
+        r_index = segpt->region_index + 1;
+        total = segpt->mean_sum + dist;
+        balance_count += offset;
+        sq_dist =
+          dist * dist + segpt->sq_sum + balance_count * balance_count;
+        mean = total / r_index;
+        factor = mean - pitch;
+        factor *= factor;
+        factor += sq_dist / (r_index) - mean * mean;
+        if (factor < cost && segpt->fake_count + faked <= fake_count) {
+          cost = factor;         //find least cost
+          pred = segpt;          //save path
+          mean_sum = total;
+          sq_sum = sq_dist;
+          fake_count = segpt->fake_count + faked;
+          mid_cuts = segpt->mid_cuts + mid_cut;
+          region_index = r_index;
+        }
+      }
+    }
+  }
+}
+
+
+/**********************************************************************
+ * FPCUTPT::assign_cheap
+ *
+ * Constructor to make a new FPCUTPT on the cheap.
+ **********************************************************************/
+
+void FPCUTPT::assign_cheap(                         //constructor
+                           FPCUTPT *cutpts,         //predecessors
+                           int16_t array_origin,    //start coord
+                           int16_t x,               //position
+                           bool faking,             //faking this one
+                           bool mid_cut,            //cheap cut.
+                           int16_t offset,          //dist to gap
+                           STATS *projection,       //vertical occupation
+                           float projection_scale,  //scaling
+                           int16_t zero_count,      //official zero
+                           int16_t pitch,           //proposed pitch
+                           int16_t pitch_error      //allowed tolerance
+                          ) {
+  int index;                     //test index
+  int16_t balance_count;           //ding factor
+  int16_t r_index;                 //test cut number
+  FPCUTPT *segpt;                //segment point
+  int32_t dist;                    //from prev segment
+  double sq_dist;                //squared distance
+  double mean;                   //mean pitch
+  double total;                  //total dists
+  double factor;                 //cost function
+                                 //half of pitch
+  int16_t half_pitch = pitch / 2 - 1;
+  uint32_t lead_flag;              //new flag
+
+  if (half_pitch > 31)
+    half_pitch = 31;
+  else if (half_pitch < 0)
+    half_pitch = 0;
+  lead_flag = 1 << half_pitch;
+
+  back_balance = cutpts[x - 1 - array_origin].back_balance << 1;
+  back_balance &= lead_flag + (lead_flag - 1);
+  if (projection->pile_count (x) > zero_count)
+    back_balance |= 1;
+  fwd_balance = cutpts[x - 1 - array_origin].fwd_balance >> 1;
+  if (projection->pile_count (x + half_pitch) > zero_count)
+    fwd_balance |= lead_flag;
+
+  xpos = x;
+  cost = FLT_MAX;
+  pred = nullptr;
+  faked = faking;
+  terminal = false;
+  region_index = 0;
+  fake_count = INT16_MAX;
+  index = x - pitch;
+  if (index >= array_origin) {
+    segpt = &cutpts[index - array_origin];
+    dist = x - segpt->xpos;
+    if (!segpt->terminal && segpt->fake_count < INT16_MAX) {
+      balance_count = 0;
+      if (textord_balance_factor > 0) {
+        lead_flag = back_balance ^ segpt->fwd_balance;
+        balance_count = 0;
+        while (lead_flag != 0) {
+          balance_count++;
+          lead_flag &= lead_flag - 1;
+        }
+        balance_count = static_cast<int16_t>(balance_count * textord_balance_factor
+          / projection_scale);
+      }
+      r_index = segpt->region_index + 1;
+      total = segpt->mean_sum + dist;
+      balance_count += offset;
+      sq_dist =
+        dist * dist + segpt->sq_sum + balance_count * balance_count;
+      mean = total / r_index;
+      factor = mean - pitch;
+      factor *= factor;
+      factor += sq_dist / (r_index) - mean * mean;
+      cost = factor;             //find least cost
+      pred = segpt;              //save path
+      mean_sum = total;
+      sq_sum = sq_dist;
+      fake_count = segpt->fake_count + faked;
+      mid_cuts = segpt->mid_cuts + mid_cut;
+      region_index = r_index;
+    }
+  }
+}
+
+
+/**********************************************************************
+ * check_pitch_sync
+ *
+ * Construct the lattice of possible segmentation points and choose the
+ * optimal path. Return the optimal path only.
+ * The return value is a measure of goodness of the sync.
+ **********************************************************************/
+
+double check_pitch_sync2(                          //find segmentation
+                         BLOBNBOX_IT *blob_it,     //blobs to do
+                         int16_t blob_count,         //no of blobs
+                         int16_t pitch,              //pitch estimate
+                         int16_t pitch_error,        //tolerance
+                         STATS *projection,        //vertical
+                         int16_t projection_left,    //edges //scale factor
+                         int16_t projection_right,
+                         float projection_scale,
+                         int16_t &occupation_count,  //no of occupied cells
+                         FPSEGPT_LIST *seg_list,   //output list
+                         int16_t start,              //start of good range
+                         int16_t end                 //end of good range
+                        ) {
+  bool faking;                  //illegal cut pt
+  bool mid_cut;                 //cheap cut pt.
+  int16_t x;                       //current coord
+  int16_t blob_index;              //blob number
+  int16_t left_edge;               //of word
+  int16_t right_edge;              //of word
+  int16_t array_origin;            //x coord of array
+  int16_t offset;                  //dist to legal area
+  int16_t zero_count;              //projection zero
+  int16_t best_left_x = 0;         //for equals
+  int16_t best_right_x = 0;        //right edge
+  TBOX this_box;                  //bounding box
+  TBOX next_box;                  //box of next blob
+  FPSEGPT *segpt;                //segment point
+  double best_cost;              //best path
+  double mean_sum;               //computes result
+  FPCUTPT *best_end;             //end of best path
+  int16_t best_fake;               //best fake level
+  int16_t best_count;              //no of cuts
+  BLOBNBOX_IT this_it;           //copy iterator
+  FPSEGPT_IT seg_it = seg_list;  //output iterator
+
+  //      tprintf("Computing sync on word of %d blobs with pitch %d\n",
+  //              blob_count, pitch);
+  //      if (blob_count==8 && pitch==27)
+  //              projection->print(stdout,true);
+  zero_count = 0;
+  if (pitch < 3)
+    pitch = 3;                   //nothing ludicrous
+  if ((pitch - 3) / 2 < pitch_error)
+    pitch_error = (pitch - 3) / 2;
+  this_it = *blob_it;
+  this_box = box_next (&this_it);//get box
+  //      left_edge=this_box.left();                                              //left of word
+  //      right_edge=this_box.right();
+  //      for (blob_index=1;blob_index<blob_count;blob_index++)
+  //      {
+  //              this_box=box_next(&this_it);
+  //              if (this_box.right()>right_edge)
+  //                      right_edge=this_box.right();
+  //      }
+  for (left_edge = projection_left; projection->pile_count (left_edge) == 0
+    && left_edge < projection_right; left_edge++);
+  for (right_edge = projection_right; projection->pile_count (right_edge) == 0
+    && right_edge > left_edge; right_edge--);
+  ASSERT_HOST (right_edge >= left_edge);
+  if (pitsync_linear_version >= 4)
+    return check_pitch_sync3 (projection_left, projection_right, zero_count,
+      pitch, pitch_error, projection,
+      projection_scale, occupation_count, seg_list,
+      start, end);
+  array_origin = left_edge - pitch;
+  // array of points
+  std::vector<FPCUTPT> cutpts(right_edge - left_edge + pitch * 2 + 1);
+  for (x = array_origin; x < left_edge; x++)
+                                 //free cuts
+    cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection,
+      zero_count, pitch, x, 0);
+  for (offset = 0; offset <= pitch_error; offset++, x++)
+                                 //not quite free
+    cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection,
+      zero_count, pitch, x, offset);
+
+  this_it = *blob_it;
+  best_cost = FLT_MAX;
+  best_end = nullptr;
+  this_box = box_next (&this_it);//first box
+  next_box = box_next (&this_it);//second box
+  blob_index = 1;
+  while (x < right_edge - pitch_error) {
+    if (x > this_box.right () + pitch_error && blob_index < blob_count) {
+      this_box = next_box;
+      next_box = box_next (&this_it);
+      blob_index++;
+    }
+    faking = false;
+    mid_cut = false;
+    if (x <= this_box.left ())
+      offset = 0;
+    else if (x <= this_box.left () + pitch_error)
+      offset = x - this_box.left ();
+    else if (x >= this_box.right ())
+      offset = 0;
+    else if (x >= next_box.left () && blob_index < blob_count) {
+      offset = x - next_box.left ();
+      if (this_box.right () - x < offset)
+        offset = this_box.right () - x;
+    }
+    else if (x >= this_box.right () - pitch_error)
+      offset = this_box.right () - x;
+    else if (x - this_box.left () > pitch * pitsync_joined_edge
+    && this_box.right () - x > pitch * pitsync_joined_edge) {
+      mid_cut = true;
+      offset = 0;
+    }
+    else {
+      faking = true;
+      offset = projection->pile_count (x);
+    }
+    cutpts[x - array_origin].assign (&cutpts[0], array_origin, x,
+      faking, mid_cut, offset, projection,
+      projection_scale, zero_count, pitch,
+      pitch_error);
+    x++;
+  }
+
+  best_fake = INT16_MAX;
+  best_cost = INT32_MAX;
+  best_count = INT16_MAX;
+  while (x < right_edge + pitch) {
+    offset = x < right_edge ? right_edge - x : 0;
+    cutpts[x - array_origin].assign (&cutpts[0], array_origin, x,
+      false, false, offset, projection,
+      projection_scale, zero_count, pitch,
+      pitch_error);
+    cutpts[x - array_origin].terminal = true;
+    if (cutpts[x - array_origin].index () +
+    cutpts[x - array_origin].fake_count <= best_count + best_fake) {
+      if (cutpts[x - array_origin].fake_count < best_fake
+        || (cutpts[x - array_origin].fake_count == best_fake
+      && cutpts[x - array_origin].cost_function () < best_cost)) {
+        best_fake = cutpts[x - array_origin].fake_count;
+        best_cost = cutpts[x - array_origin].cost_function ();
+        best_left_x = x;
+        best_right_x = x;
+        best_count = cutpts[x - array_origin].index ();
+      }
+      else if (cutpts[x - array_origin].fake_count == best_fake
+        && x == best_right_x + 1
+      && cutpts[x - array_origin].cost_function () == best_cost) {
+      //exactly equal
+        best_right_x = x;
+      }
+    }
+    x++;
+  }
+  ASSERT_HOST (best_fake < INT16_MAX);
+
+  best_end = &cutpts[(best_left_x + best_right_x) / 2 - array_origin];
+  if (this_box.right () == textord_test_x
+  && this_box.top () == textord_test_y) {
+    for (x = left_edge - pitch; x < right_edge + pitch; x++) {
+      tprintf ("x=%d, C=%g, s=%g, sq=%g, prev=%d\n",
+        x, cutpts[x - array_origin].cost_function (),
+        cutpts[x - array_origin].sum (),
+        cutpts[x - array_origin].squares (),
+        cutpts[x - array_origin].previous ()->position ());
+    }
+  }
+  occupation_count = -1;
+  do {
+    for (x = best_end->position () - pitch + pitch_error;
+      x < best_end->position () - pitch_error
+      && projection->pile_count (x) == 0; x++);
+    if (x < best_end->position () - pitch_error)
+      occupation_count++;
+                                 //copy it
+    segpt = new FPSEGPT (best_end);
+    seg_it.add_before_then_move (segpt);
+    best_end = best_end->previous ();
+  }
+  while (best_end != nullptr);
+  seg_it.move_to_last ();
+  mean_sum = seg_it.data ()->sum ();
+  mean_sum = mean_sum * mean_sum / best_count;
+  if (seg_it.data ()->squares () - mean_sum < 0)
+    tprintf ("Impossible sqsum=%g, mean=%g, total=%d\n",
+      seg_it.data ()->squares (), seg_it.data ()->sum (), best_count);
+  //      tprintf("blob_count=%d, pitch=%d, sync=%g, occ=%d\n",
+  //              blob_count,pitch,seg_it.data()->squares()-mean_sum,
+  //              occupation_count);
+  return seg_it.data ()->squares () - mean_sum;
+}
+
+
+/**********************************************************************
+ * check_pitch_sync
+ *
+ * Construct the lattice of possible segmentation points and choose the
+ * optimal path. Return the optimal path only.
+ * The return value is a measure of goodness of the sync.
+ **********************************************************************/
+
+double check_pitch_sync3(                          //find segmentation
+                         int16_t projection_left,    //edges //to be considered 0
+                         int16_t projection_right,
+                         int16_t zero_count,
+                         int16_t pitch,              //pitch estimate
+                         int16_t pitch_error,        //tolerance
+                         STATS *projection,        //vertical
+                         float projection_scale,   //scale factor
+                         int16_t &occupation_count,  //no of occupied cells
+                         FPSEGPT_LIST *seg_list,   //output list
+                         int16_t start,              //start of good range
+                         int16_t end                 //end of good range
+                        ) {
+  bool faking;                   //illegal cut pt
+  bool mid_cut;                  //cheap cut pt.
+  int16_t left_edge;             //of word
+  int16_t right_edge;            //of word
+  int16_t x;                     //current coord
+  int16_t array_origin;          //x coord of array
+  int16_t offset;                //dist to legal area
+  int16_t projection_offset;     //from scaled projection
+  int16_t prev_zero;             //previous zero dist
+  int16_t next_zero;             //next zero dist
+  int16_t zero_offset;           //scan window
+  int16_t best_left_x = 0;       //for equals
+  int16_t best_right_x = 0;      //right edge
+  FPSEGPT *segpt;                //segment point
+  int minindex;                  //next input position
+  int test_index;                //index to mins
+  double best_cost;              //best path
+  double mean_sum;               //computes result
+  FPCUTPT *best_end;             //end of best path
+  int16_t best_fake;             //best fake level
+  int16_t best_count;            //no of cuts
+  FPSEGPT_IT seg_it = seg_list;  //output iterator
+
+  end = (end - start) % pitch;
+  if (pitch < 3)
+    pitch = 3;                   //nothing ludicrous
+  if ((pitch - 3) / 2 < pitch_error)
+    pitch_error = (pitch - 3) / 2;
+                                 //min dist of zero
+  zero_offset = static_cast<int16_t>(pitch * pitsync_joined_edge);
+  for (left_edge = projection_left; projection->pile_count (left_edge) == 0
+    && left_edge < projection_right; left_edge++);
+  for (right_edge = projection_right; projection->pile_count (right_edge) == 0
+    && right_edge > left_edge; right_edge--);
+  array_origin = left_edge - pitch;
+  // array of points
+  std::vector<FPCUTPT> cutpts(right_edge - left_edge + pitch * 2 + 1);
+  // local min results
+  std::vector<bool> mins(pitch_error * 2 + 1);
+  for (x = array_origin; x < left_edge; x++)
+                                 //free cuts
+    cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection,
+      zero_count, pitch, x, 0);
+  prev_zero = left_edge - 1;
+  for (offset = 0; offset <= pitch_error; offset++, x++)
+                                 //not quite free
+    cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection,
+      zero_count, pitch, x, offset);
+
+  best_cost = FLT_MAX;
+  best_end = nullptr;
+  for (offset = -pitch_error, minindex = 0; offset < pitch_error;
+    offset++, minindex++)
+  mins[minindex] = projection->local_min (x + offset);
+  next_zero = x + zero_offset + 1;
+  for (offset = next_zero - 1; offset >= x; offset--) {
+    if (projection->pile_count (offset) <= zero_count) {
+      next_zero = offset;
+      break;
+    }
+  }
+  while (x < right_edge - pitch_error) {
+    mins[minindex] = projection->local_min (x + pitch_error);
+    minindex++;
+    if (minindex > pitch_error * 2)
+      minindex = 0;
+    faking = false;
+    mid_cut = false;
+    offset = 0;
+    if (projection->pile_count (x) <= zero_count) {
+      prev_zero = x;
+    }
+    else {
+      for (offset = 1; offset <= pitch_error; offset++)
+        if (projection->pile_count (x + offset) <= zero_count
+        || projection->pile_count (x - offset) <= zero_count)
+          break;
+    }
+    if (offset > pitch_error) {
+      if (x - prev_zero > zero_offset && next_zero - x > zero_offset) {
+        for (offset = 0; offset <= pitch_error; offset++) {
+          test_index = minindex + pitch_error + offset;
+          if (test_index > pitch_error * 2)
+            test_index -= pitch_error * 2 + 1;
+          if (mins[test_index])
+            break;
+          test_index = minindex + pitch_error - offset;
+          if (test_index > pitch_error * 2)
+            test_index -= pitch_error * 2 + 1;
+          if (mins[test_index])
+            break;
+        }
+      }
+      if (offset > pitch_error) {
+        offset = projection->pile_count (x);
+        faking = true;
+      }
+      else {
+        projection_offset =
+          static_cast<int16_t>(projection->pile_count (x) / projection_scale);
+        if (projection_offset > offset)
+          offset = projection_offset;
+        mid_cut = true;
+      }
+    }
+    if ((start == 0 && end == 0)
+      || !textord_fast_pitch_test
+      || (x - projection_left - start) % pitch <= end)
+      cutpts[x - array_origin].assign(&cutpts[0], array_origin, x,
+        faking, mid_cut, offset, projection,
+        projection_scale, zero_count, pitch,
+        pitch_error);
+    else
+      cutpts[x - array_origin].assign_cheap(&cutpts[0], array_origin, x,
+        faking, mid_cut, offset,
+        projection, projection_scale,
+        zero_count, pitch,
+        pitch_error);
+    x++;
+    if (next_zero < x || next_zero == x + zero_offset)
+      next_zero = x + zero_offset + 1;
+    if (projection->pile_count (x + zero_offset) <= zero_count)
+      next_zero = x + zero_offset;
+  }
+
+  best_fake = INT16_MAX;
+  best_cost = INT32_MAX;
+  best_count = INT16_MAX;
+  while (x < right_edge + pitch) {
+    offset = x < right_edge ? right_edge - x : 0;
+    cutpts[x - array_origin].assign(&cutpts[0], array_origin, x,
+      false, false, offset, projection,
+      projection_scale, zero_count, pitch,
+      pitch_error);
+    cutpts[x - array_origin].terminal = true;
+    if (cutpts[x - array_origin].index () +
+    cutpts[x - array_origin].fake_count <= best_count + best_fake) {
+      if (cutpts[x - array_origin].fake_count < best_fake
+        || (cutpts[x - array_origin].fake_count == best_fake
+      && cutpts[x - array_origin].cost_function () < best_cost)) {
+        best_fake = cutpts[x - array_origin].fake_count;
+        best_cost = cutpts[x - array_origin].cost_function ();
+        best_left_x = x;
+        best_right_x = x;
+        best_count = cutpts[x - array_origin].index ();
+      }
+      else if (cutpts[x - array_origin].fake_count == best_fake
+        && x == best_right_x + 1
+      && cutpts[x - array_origin].cost_function () == best_cost) {
+      //exactly equal
+        best_right_x = x;
+      }
+    }
+    x++;
+  }
+  ASSERT_HOST (best_fake < INT16_MAX);
+
+  best_end = &cutpts[(best_left_x + best_right_x) / 2 - array_origin];
+  //      for (x=left_edge-pitch;x<right_edge+pitch;x++)
+  //      {
+  //              tprintf("x=%d, C=%g, s=%g, sq=%g, prev=%d\n",
+  //                      x,cutpts[x-array_origin].cost_function(),
+  //                      cutpts[x-array_origin].sum(),
+  //                      cutpts[x-array_origin].squares(),
+  //                      cutpts[x-array_origin].previous()->position());
+  //      }
+  occupation_count = -1;
+  do {
+    for (x = best_end->position () - pitch + pitch_error;
+      x < best_end->position () - pitch_error
+      && projection->pile_count (x) == 0; x++);
+    if (x < best_end->position () - pitch_error)
+      occupation_count++;
+                                 //copy it
+    segpt = new FPSEGPT (best_end);
+    seg_it.add_before_then_move (segpt);
+    best_end = best_end->previous ();
+  }
+  while (best_end != nullptr);
+  seg_it.move_to_last ();
+  mean_sum = seg_it.data ()->sum ();
+  mean_sum = mean_sum * mean_sum / best_count;
+  if (seg_it.data ()->squares () - mean_sum < 0)
+    tprintf ("Impossible sqsum=%g, mean=%g, total=%d\n",
+      seg_it.data ()->squares (), seg_it.data ()->sum (), best_count);
+  return seg_it.data ()->squares () - mean_sum;
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/textord/pithsync.h b/tesseract/src/textord/pithsync.h
new file mode 100644
index 00000000..f6309f19
--- /dev/null
+++ b/tesseract/src/textord/pithsync.h
@@ -0,0 +1,136 @@
+/**********************************************************************
+ * File:        pithsync.h  (Formerly pitsync2.h)
+ * Description: Code to find the optimum fixed pitch segmentation of some blobs.
+ * Author:    Ray Smith
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef           PITHSYNC_H
+#define           PITHSYNC_H
+
+#include          "blobbox.h"
+#include          "params.h"
+#include          "statistc.h"
+
+namespace tesseract {
+
+class FPSEGPT_LIST;
+
+class FPCUTPT
+{
+  public:
+    FPCUTPT() = default;
+    void setup (                 //start of cut
+      FPCUTPT cutpts[],          //predecessors
+      int16_t array_origin,        //start coord
+      STATS * projection,        //occupation
+      int16_t zero_count,          //official zero
+      int16_t pitch,               //proposed pitch
+      int16_t x,                   //position
+      int16_t offset);             //dist to gap
+
+    void assign(                //evaluate cut
+            FPCUTPT cutpts[],          //predecessors
+            int16_t array_origin,        //start coord
+            int16_t x,                   //position
+            bool faking,              //faking this one
+            bool mid_cut,             //doing free cut
+            int16_t offset,              //extra cost dist
+            STATS* projection,        //occupation
+            float projection_scale,    //scaling
+            int16_t zero_count,          //official zero
+            int16_t pitch,               //proposed pitch
+            int16_t pitch_error);        //allowed tolerance
+
+    void assign_cheap (          //evaluate cut
+      FPCUTPT cutpts[],          //predecessors
+      int16_t array_origin,      //start coord
+      int16_t x,                 //position
+      bool faking,               //faking this one
+      bool mid_cut,              //doing free cut
+      int16_t offset,            //extra cost dist
+      STATS * projection,        //occupation
+      float projection_scale,    //scaling
+      int16_t zero_count,        //official zero
+      int16_t pitch,             //proposed pitch
+      int16_t pitch_error);      //allowed tolerance
+
+    int32_t position() {  // access func
+      return xpos;
+    }
+    double cost_function() {
+      return cost;
+    }
+    double squares() {
+      return sq_sum;
+    }
+    double sum() {
+      return mean_sum;
+    }
+    FPCUTPT *previous() {
+      return pred;
+    }
+    int16_t cheap_cuts() const {  //no of mi cuts
+      return mid_cuts;
+    }
+    int16_t index() const {
+      return region_index;
+    }
+
+    bool faked;                 //faked split point
+    bool terminal;              //successful end
+    int16_t fake_count;            //total fakes to here
+
+  private:
+    int16_t region_index;          //cut serial number
+    int16_t mid_cuts;              //no of cheap cuts
+    int32_t xpos;                  //location
+    uint32_t back_balance;         //proj backwards
+    uint32_t fwd_balance;          //proj forwards
+    FPCUTPT *pred;               //optimal previous
+    double mean_sum;             //mean so far
+    double sq_sum;               //summed distsances
+    double cost;                 //cost function
+};
+double check_pitch_sync2(                          //find segmentation
+                         BLOBNBOX_IT *blob_it,     //blobs to do
+                         int16_t blob_count,         //no of blobs
+                         int16_t pitch,              //pitch estimate
+                         int16_t pitch_error,        //tolerance
+                         STATS *projection,        //vertical
+                         int16_t projection_left,    //edges //scale factor
+                         int16_t projection_right,
+                         float projection_scale,
+                         int16_t &occupation_count,  //no of occupied cells
+                         FPSEGPT_LIST *seg_list,   //output list
+                         int16_t start,              //start of good range
+                         int16_t end                 //end of good range
+                        );
+double check_pitch_sync3(                          //find segmentation
+                         int16_t projection_left,    //edges //to be considered 0
+                         int16_t projection_right,
+                         int16_t zero_count,
+                         int16_t pitch,              //pitch estimate
+                         int16_t pitch_error,        //tolerance
+                         STATS *projection,        //vertical
+                         float projection_scale,   //scale factor
+                         int16_t &occupation_count,  //no of occupied cells
+                         FPSEGPT_LIST *seg_list,   //output list
+                         int16_t start,              //start of good range
+                         int16_t end                 //end of good range
+                        );
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/textord/pitsync1.cpp b/tesseract/src/textord/pitsync1.cpp
new file mode 100644
index 00000000..ca46dc84
--- /dev/null
+++ b/tesseract/src/textord/pitsync1.cpp
@@ -0,0 +1,422 @@
+/**********************************************************************
+ * File:        pitsync1.cpp  (Formerly pitsync.c)
+ * Description: Code to find the optimum fixed pitch segmentation of some blobs.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#include "pitsync1.h"
+
+#include <cfloat>      // for FLT_MAX
+#include <cmath>
+
+namespace tesseract {
+
+ELISTIZE (FPSEGPT) CLISTIZE (FPSEGPT_LIST)
+
+INT_VAR(pitsync_linear_version, 6, "Use new fast algorithm");
+double_VAR(pitsync_joined_edge, 0.75, "Dist inside big blob for chopping");
+double_VAR(pitsync_offset_freecut_fraction, 0.25,
+  "Fraction of cut for free cuts");
+INT_VAR(pitsync_fake_depth, 1, "Max advance fake generation");
+
+/**********************************************************************
+ * FPSEGPT::FPSEGPT
+ *
+ * Constructor to make a new FPSEGPT.
+ * The existing FPCUTPT is duplicated.
+ **********************************************************************/
+
+FPSEGPT::FPSEGPT(                //constructor
+                 FPCUTPT *cutpt  //create from new form
+                ) {
+  pred = nullptr;
+  mean_sum = cutpt->sum ();
+  sq_sum = cutpt->squares ();
+  cost = cutpt->cost_function ();
+  faked = cutpt->faked;
+  terminal = cutpt->terminal;
+  fake_count = cutpt->fake_count;
+  xpos = cutpt->position ();
+  mid_cuts = cutpt->cheap_cuts ();
+}
+
+
+/**********************************************************************
+ * FPSEGPT::FPSEGPT
+ *
+ * Constructor to make a new FPSEGPT.
+ **********************************************************************/
+
+FPSEGPT::FPSEGPT (               //constructor
+int16_t x                        //position
+):xpos (x) {
+  pred = nullptr;
+  mean_sum = 0;
+  sq_sum = 0;
+  cost = 0;
+  faked = false;
+  terminal = false;
+  fake_count = 0;
+  mid_cuts = 0;
+}
+
+
+/**********************************************************************
+ * FPSEGPT::FPSEGPT
+ *
+ * Constructor to make a new FPSEGPT.
+ **********************************************************************/
+
+FPSEGPT::FPSEGPT (               //constructor
+int16_t x,                       //position
+bool faking,                     //faking this one
+int16_t offset,                  //dist to gap
+int16_t region_index,            //segment number
+int16_t pitch,                   //proposed pitch
+int16_t pitch_error,             //allowed tolerance
+FPSEGPT_LIST * prev_list         //previous segment
+)
+: fake_count(0),
+  xpos(x),
+  mean_sum(0.0),
+  sq_sum(0.0)
+{
+  int16_t best_fake;             //on previous
+  FPSEGPT *segpt;                //segment point
+  int32_t dist;                  //from prev segment
+  double sq_dist;                //squared distance
+  double mean;                   //mean pitch
+  double total;                  //total dists
+  double factor;                 //cost function
+  FPSEGPT_IT pred_it = prev_list;//for previuos segment
+
+  cost = FLT_MAX;
+  pred = nullptr;
+  faked = faking;
+  terminal = false;
+  best_fake = INT16_MAX;
+  mid_cuts = 0;
+  for (pred_it.mark_cycle_pt (); !pred_it.cycled_list (); pred_it.forward ()) {
+    segpt = pred_it.data ();
+    if (segpt->fake_count < best_fake)
+      best_fake = segpt->fake_count;
+    dist = x - segpt->xpos;
+    if (dist >= pitch - pitch_error && dist <= pitch + pitch_error
+    && !segpt->terminal) {
+      total = segpt->mean_sum + dist;
+      sq_dist = dist * dist + segpt->sq_sum + offset * offset;
+      //sum of squarees
+      mean = total / region_index;
+      factor = mean - pitch;
+      factor *= factor;
+      factor += sq_dist / (region_index) - mean * mean;
+      if (factor < cost) {
+        cost = factor;           //find least cost
+        pred = segpt;            //save path
+        mean_sum = total;
+        sq_sum = sq_dist;
+        fake_count = segpt->fake_count + faked;
+      }
+    }
+  }
+  if (fake_count > best_fake + 1)
+    pred = nullptr;                 //fail it
+}
+
+/**********************************************************************
+ * check_pitch_sync
+ *
+ * Construct the lattice of possible segmentation points and choose the
+ * optimal path. Return the optimal path only.
+ * The return value is a measure of goodness of the sync.
+ **********************************************************************/
+
+double check_pitch_sync(                        //find segmentation
+                        BLOBNBOX_IT *blob_it,   //blobs to do
+                        int16_t blob_count,     //no of blobs
+                        int16_t pitch,          //pitch estimate
+                        int16_t pitch_error,    //tolerance
+                        STATS *projection,      //vertical
+                        FPSEGPT_LIST *seg_list  //output list
+                       ) {
+  int16_t x;                     //current coord
+  int16_t min_index;             //blob number
+  int16_t max_index;             //blob number
+  int16_t left_edge;             //of word
+  int16_t right_edge;            //of word
+  int16_t right_max;             //max allowed x
+  int16_t min_x;                 //in this region
+  int16_t max_x;
+  int16_t region_index;
+  int16_t best_region_index = 0; //for best result
+  int16_t offset;                //dist to legal area
+  int16_t left_best_x;           //edge of good region
+  int16_t right_best_x;          //right edge
+  TBOX min_box;                  //bounding box
+  TBOX max_box;                  //bounding box
+  TBOX next_box;                 //box of next blob
+  FPSEGPT *segpt;                //segment point
+  FPSEGPT_LIST *segpts;          //points in a segment
+  double best_cost;              //best path
+  double mean_sum;               //computes result
+  FPSEGPT *best_end;             //end of best path
+  BLOBNBOX_IT min_it;            //copy iterator
+  BLOBNBOX_IT max_it;            //copy iterator
+  FPSEGPT_IT segpt_it;           //iterator
+                                 //output segments
+  FPSEGPT_IT outseg_it = seg_list;
+  FPSEGPT_LIST_CLIST lattice;    //list of lists
+                                 //region iterator
+  FPSEGPT_LIST_C_IT lattice_it = &lattice;
+
+  //      tprintf("Computing sync on word of %d blobs with pitch %d\n",
+  //              blob_count, pitch);
+  //      if (blob_count==8 && pitch==27)
+  //              projection->print(stdout,true);
+  if (pitch < 3)
+    pitch = 3;                   //nothing ludicrous
+  if ((pitch - 3) / 2 < pitch_error)
+    pitch_error = (pitch - 3) / 2;
+  min_it = *blob_it;
+  min_box = box_next (&min_it);  //get box
+  //      if (blob_count==8 && pitch==27)
+  //              tprintf("1st box at (%d,%d)->(%d,%d)\n",
+  //                      min_box.left(),min_box.bottom(),
+  //                      min_box.right(),min_box.top());
+                                 //left of word
+  left_edge = min_box.left () + pitch_error;
+  for (min_index = 1; min_index < blob_count; min_index++) {
+    min_box = box_next (&min_it);
+    //              if (blob_count==8 && pitch==27)
+    //                      tprintf("Box at (%d,%d)->(%d,%d)\n",
+    //                              min_box.left(),min_box.bottom(),
+    //                              min_box.right(),min_box.top());
+  }
+  right_edge = min_box.right (); //end of word
+  max_x = left_edge;
+                                 //min permissible
+  min_x = max_x - pitch + pitch_error * 2 + 1;
+  right_max = right_edge + pitch - pitch_error - 1;
+  segpts = new FPSEGPT_LIST;     //list of points
+  segpt_it.set_to_list (segpts);
+  for (x = min_x; x <= max_x; x++) {
+    segpt = new FPSEGPT (x);     //make a new one
+                                 //put in list
+    segpt_it.add_after_then_move (segpt);
+  }
+                                 //first segment
+  lattice_it.add_before_then_move (segpts);
+  min_index = 0;
+  region_index = 1;
+  best_cost = FLT_MAX;
+  best_end = nullptr;
+  min_it = *blob_it;
+  min_box = box_next (&min_it);  //first box
+  do {
+    left_best_x = -1;
+    right_best_x = -1;
+    segpts = new FPSEGPT_LIST;   //list of points
+    segpt_it.set_to_list (segpts);
+    min_x += pitch - pitch_error;//next limits
+    max_x += pitch + pitch_error;
+    while (min_box.right () < min_x && min_index < blob_count) {
+      min_index++;
+      min_box = box_next (&min_it);
+    }
+    max_it = min_it;
+    max_index = min_index;
+    max_box = min_box;
+    next_box = box_next (&max_it);
+    for (x = min_x; x <= max_x && x <= right_max; x++) {
+      while (x < right_edge && max_index < blob_count
+      && x > max_box.right ()) {
+        max_index++;
+        max_box = next_box;
+        next_box = box_next (&max_it);
+      }
+      if (x <= max_box.left () + pitch_error
+        || x >= max_box.right () - pitch_error || x >= right_edge
+        || (max_index < blob_count - 1 && x >= next_box.left ())
+        || (x - max_box.left () > pitch * pitsync_joined_edge
+      && max_box.right () - x > pitch * pitsync_joined_edge)) {
+      //                      || projection->local_min(x))
+        if (x - max_box.left () > 0
+          && x - max_box.left () <= pitch_error)
+                                 //dist to real break
+          offset = x - max_box.left ();
+        else if (max_box.right () - x > 0
+          && max_box.right () - x <= pitch_error
+          && (max_index >= blob_count - 1
+          || x < next_box.left ()))
+          offset = max_box.right () - x;
+        else
+          offset = 0;
+        //                              offset=pitsync_offset_freecut_fraction*projection->pile_count(x);
+        segpt = new FPSEGPT (x, false, offset, region_index,
+          pitch, pitch_error, lattice_it.data ());
+      }
+      else {
+        offset = projection->pile_count (x);
+        segpt = new FPSEGPT (x, true, offset, region_index,
+          pitch, pitch_error, lattice_it.data ());
+      }
+      if (segpt->previous () != nullptr) {
+        segpt_it.add_after_then_move (segpt);
+        if (x >= right_edge - pitch_error) {
+          segpt->terminal = true;//no more wanted
+          if (segpt->cost_function () < best_cost) {
+            best_cost = segpt->cost_function ();
+            //find least
+            best_end = segpt;
+            best_region_index = region_index;
+            left_best_x = x;
+            right_best_x = x;
+          }
+          else if (segpt->cost_function () == best_cost
+            && right_best_x == x - 1)
+            right_best_x = x;
+        }
+      }
+      else {
+        delete segpt;            //no good
+      }
+    }
+    if (segpts->empty ()) {
+      if (best_end != nullptr)
+        break;                   //already found one
+      make_illegal_segment (lattice_it.data (), min_box, min_it,
+        region_index, pitch, pitch_error, segpts);
+    }
+    else {
+      if (right_best_x > left_best_x + 1) {
+        left_best_x = (left_best_x + right_best_x + 1) / 2;
+        for (segpt_it.mark_cycle_pt (); !segpt_it.cycled_list ()
+          && segpt_it.data ()->position () != left_best_x;
+          segpt_it.forward ());
+        if (segpt_it.data ()->position () == left_best_x)
+                                 //middle of region
+          best_end = segpt_it.data ();
+      }
+    }
+                                 //new segment
+    lattice_it.add_before_then_move (segpts);
+    region_index++;
+  }
+  while (min_x < right_edge);
+  ASSERT_HOST (best_end != nullptr);//must always find some
+
+  for (lattice_it.mark_cycle_pt (); !lattice_it.cycled_list ();
+  lattice_it.forward ()) {
+    segpts = lattice_it.data ();
+    segpt_it.set_to_list (segpts);
+    //              if (blob_count==8 && pitch==27)
+    //              {
+    //                      for (segpt_it.mark_cycle_pt();!segpt_it.cycled_list();segpt_it.forward())
+    //                      {
+    //                              segpt=segpt_it.data();
+    //                              tprintf("At %d, (%x) cost=%g, m=%g, sq=%g, pred=%x\n",
+    //                                      segpt->position(),segpt,segpt->cost_function(),
+    //                                      segpt->sum(),segpt->squares(),segpt->previous());
+    //                      }
+    //                      tprintf("\n");
+    //              }
+    for (segpt_it.mark_cycle_pt (); !segpt_it.cycled_list ()
+      && segpt_it.data () != best_end; segpt_it.forward ());
+    if (segpt_it.data () == best_end) {
+                                 //save good one
+      segpt = segpt_it.extract ();
+      outseg_it.add_before_then_move (segpt);
+      best_end = segpt->previous ();
+    }
+  }
+  ASSERT_HOST (best_end == nullptr);
+  ASSERT_HOST (!outseg_it.empty ());
+  outseg_it.move_to_last ();
+  mean_sum = outseg_it.data ()->sum ();
+  mean_sum = mean_sum * mean_sum / best_region_index;
+  if (outseg_it.data ()->squares () - mean_sum < 0)
+    tprintf ("Impossible sqsum=%g, mean=%g, total=%d\n",
+      outseg_it.data ()->squares (), outseg_it.data ()->sum (),
+      best_region_index);
+  lattice.deep_clear ();         //shift the lot
+  return outseg_it.data ()->squares () - mean_sum;
+}
+
+
+/**********************************************************************
+ * make_illegal_segment
+ *
+ * Make a fake set of chop points due to having no legal places.
+ **********************************************************************/
+
+void make_illegal_segment(                          //find segmentation
+                          FPSEGPT_LIST *prev_list,  //previous segments
+                          TBOX blob_box,            //bounding box
+                          BLOBNBOX_IT blob_it,      //iterator
+                          int16_t region_index,     //number of segment
+                          int16_t pitch,            //pitch estimate
+                          int16_t pitch_error,      //tolerance
+                          FPSEGPT_LIST *seg_list    //output list
+                         ) {
+  int16_t x;                     //current coord
+  int16_t min_x = 0;             //in this region
+  int16_t max_x = 0;
+  int16_t offset;                //dist to edge
+  FPSEGPT *segpt;                //segment point
+  FPSEGPT *prevpt;               //previous point
+  float best_cost;               //best path
+  FPSEGPT_IT segpt_it = seg_list;//iterator
+                                 //previous points
+  FPSEGPT_IT prevpt_it = prev_list;
+
+  best_cost = FLT_MAX;
+  for (prevpt_it.mark_cycle_pt (); !prevpt_it.cycled_list ();
+  prevpt_it.forward ()) {
+    prevpt = prevpt_it.data ();
+    if (prevpt->cost_function () < best_cost) {
+                                 //find least
+      best_cost = prevpt->cost_function ();
+      min_x = prevpt->position ();
+      max_x = min_x;             //limits on coords
+    }
+    else if (prevpt->cost_function () == best_cost) {
+      max_x = prevpt->position ();
+    }
+  }
+  min_x += pitch - pitch_error;
+  max_x += pitch + pitch_error;
+  for (x = min_x; x <= max_x; x++) {
+    while (x > blob_box.right ()) {
+      blob_box = box_next (&blob_it);
+    }
+    offset = x - blob_box.left ();
+    if (blob_box.right () - x < offset)
+      offset = blob_box.right () - x;
+    segpt = new FPSEGPT (x, false, offset,
+      region_index, pitch, pitch_error, prev_list);
+    if (segpt->previous () != nullptr) {
+      ASSERT_HOST (offset >= 0);
+      fprintf (stderr, "made fake at %d\n", x);
+                                 //make one up
+      segpt_it.add_after_then_move (segpt);
+      segpt->faked = true;
+      segpt->fake_count++;
+    }
+    else
+      delete segpt;
+  }
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/textord/pitsync1.h b/tesseract/src/textord/pitsync1.h
new file mode 100644
index 00000000..310a6d8a
--- /dev/null
+++ b/tesseract/src/textord/pitsync1.h
@@ -0,0 +1,125 @@
+/**********************************************************************
+ * File:        pitsync1.h  (Formerly pitsync.h)
+ * Description: Code to find the optimum fixed pitch segmentation of some blobs.
+ * Author:    Ray Smith
+ * Created:   Thu Nov 19 11:48:05 GMT 1992
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef           PITSYNC1_H
+#define           PITSYNC1_H
+
+#include          "elst.h"
+#include          "clst.h"
+#include          "blobbox.h"
+#include          "params.h"
+#include          "statistc.h"
+#include          "pithsync.h"
+
+namespace tesseract {
+
+class FPSEGPT_LIST;
+
+class FPSEGPT : public ELIST_LINK
+{
+  public:
+    FPSEGPT() = default;
+    FPSEGPT(           //constructor
+            int16_t x);  //position
+    FPSEGPT(                           //constructor
+            int16_t x,                 //position
+            bool faking,               //faking this one
+            int16_t offset,            //extra cost dist
+            int16_t region_index,      //segment number
+            int16_t pitch,             //proposed pitch
+            int16_t pitch_error,       //allowed tolerance
+            FPSEGPT_LIST *prev_list);  //previous segment
+    FPSEGPT(FPCUTPT *cutpt);  //build from new type
+
+    int32_t position() {  // access func
+      return xpos;
+    }
+    double cost_function() {
+      return cost;
+    }
+    double squares() {
+      return sq_sum;
+    }
+    double sum() {
+      return mean_sum;
+    }
+    FPSEGPT *previous() {
+      return pred;
+    }
+    int16_t cheap_cuts() const {  //no of cheap cuts
+      return mid_cuts;
+    }
+
+    bool faked;                  //faked split point
+    bool terminal;               //successful end
+    int16_t fake_count;          //total fakes to here
+
+  private:
+    int16_t mid_cuts;            //no of cheap cuts
+    int32_t xpos;                //location
+    FPSEGPT *pred;               //optimal previous
+    double mean_sum;             //mean so far
+    double sq_sum;               //summed distsances
+    double cost;                 //cost function
+};
+
+ELISTIZEH (FPSEGPT) CLISTIZEH (FPSEGPT_LIST)
+extern
+INT_VAR_H (pitsync_linear_version, 0, "Use new fast algorithm");
+extern
+double_VAR_H (pitsync_joined_edge, 0.75,
+"Dist inside big blob for chopping");
+extern
+double_VAR_H (pitsync_offset_freecut_fraction, 0.25,
+"Fraction of cut for free cuts");
+extern
+INT_VAR_H (pitsync_fake_depth, 1, "Max advance fake generation");
+double check_pitch_sync(                        //find segmentation
+                        BLOBNBOX_IT *blob_it,   //blobs to do
+                        int16_t blob_count,       //no of blobs
+                        int16_t pitch,            //pitch estimate
+                        int16_t pitch_error,      //tolerance
+                        STATS *projection,      //vertical
+                        FPSEGPT_LIST *seg_list  //output list
+                       );
+void make_illegal_segment(                          //find segmentation
+                          FPSEGPT_LIST *prev_list,  //previous segments
+                          TBOX blob_box,             //bounding box
+                          BLOBNBOX_IT blob_it,      //iterator
+                          int16_t region_index,       //number of segment
+                          int16_t pitch,              //pitch estimate
+                          int16_t pitch_error,        //tolerance
+                          FPSEGPT_LIST *seg_list    //output list
+                         );
+int16_t vertical_torow_projection(                   //project whole row
+                                TO_ROW *row,       //row to do
+                                STATS *projection  //output
+                               );
+void vertical_cblob_projection(               //project outlines
+                               C_BLOB *blob,  //blob to project
+                               STATS *stats   //output
+                              );
+void vertical_coutline_projection(                     //project outlines
+                                  C_OUTLINE *outline,  //outline to project
+                                  STATS *stats         //output
+                                 );
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/textord/scanedg.cpp b/tesseract/src/textord/scanedg.cpp
new file mode 100644
index 00000000..fa0608cb
--- /dev/null
+++ b/tesseract/src/textord/scanedg.cpp
@@ -0,0 +1,405 @@
+/**********************************************************************
+ * File:        scanedg.cpp  (Formerly scanedge.c)
+ * Description: Raster scanning crack based edge extractor.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#include "scanedg.h"
+
+#include "crakedge.h"
+#include "edgloop.h"
+#include "pdblock.h"
+
+#include "allheaders.h"
+
+#include <memory>  // std::unique_ptr
+
+namespace tesseract {
+
+#define WHITE_PIX     1          /*thresholded colours */
+#define BLACK_PIX     0
+// Flips between WHITE_PIX and BLACK_PIX.
+#define FLIP_COLOUR(pix)  (1-(pix))
+
+struct CrackPos {
+  CRACKEDGE** free_cracks;   // Freelist for fast allocation.
+  int x;                     // Position of new edge.
+  int y;
+};
+
+static void free_crackedges(CRACKEDGE* start);
+
+static void join_edges(CRACKEDGE* edge1, CRACKEDGE* edge2,
+                       CRACKEDGE** free_cracks,
+                       C_OUTLINE_IT* outline_it);
+
+static void line_edges(int16_t x, int16_t y, int16_t xext, uint8_t uppercolour,
+                       uint8_t* bwpos,
+                       CRACKEDGE** prevline, CRACKEDGE** free_cracks,
+                       C_OUTLINE_IT* outline_it);
+
+static void make_margins(PDBLK* block, BLOCK_LINE_IT* line_it,
+                         uint8_t* pixels, uint8_t margin,
+                         int16_t left, int16_t right, int16_t y);
+
+static CRACKEDGE* h_edge(int sign, CRACKEDGE* join, CrackPos* pos);
+static CRACKEDGE* v_edge(int sign, CRACKEDGE* join, CrackPos* pos);
+
+/**********************************************************************
+ * block_edges
+ *
+ * Extract edges from a PDBLK.
+ **********************************************************************/
+
+void block_edges(Pix *t_pix,           // thresholded image
+                 PDBLK *block,         // block in image
+                 C_OUTLINE_IT* outline_it) {
+  ICOORD bleft;                  // bounding box
+  ICOORD tright;
+  BLOCK_LINE_IT line_it = block; // line iterator
+
+  int width = pixGetWidth(t_pix);
+  int height = pixGetHeight(t_pix);
+  int wpl = pixGetWpl(t_pix);
+                                 // lines in progress
+  std::unique_ptr<CRACKEDGE*[]> ptrline(new CRACKEDGE*[width + 1]);
+  CRACKEDGE *free_cracks = nullptr;
+
+  block->bounding_box(bleft, tright);  // block box
+  ASSERT_HOST(tright.x() <= width);
+  ASSERT_HOST(tright.y() <= height);
+  int block_width = tright.x() - bleft.x();
+  for (int x = block_width; x >= 0; x--)
+    ptrline[x] = nullptr;           //  no lines in progress
+
+  std::unique_ptr<uint8_t[]> bwline(new uint8_t[width]);
+
+  const uint8_t margin = WHITE_PIX;
+
+  for (int y = tright.y() - 1; y >= bleft.y() - 1; y--) {
+    if (y >= bleft.y() && y < tright.y()) {
+      // Get the binary pixels from the image.
+      l_uint32* line = pixGetData(t_pix) + wpl * (height - 1 - y);
+      for (int x = 0; x < block_width; ++x) {
+        bwline[x] = GET_DATA_BIT(line, x + bleft.x()) ^ 1;
+      }
+      make_margins(block, &line_it, bwline.get(), margin, bleft.x(), tright.x(), y);
+    } else {
+      memset(bwline.get(), margin, block_width * sizeof(bwline[0]));
+    }
+    line_edges(bleft.x(), y, block_width,
+               margin, bwline.get(), ptrline.get(), &free_cracks, outline_it);
+  }
+
+  free_crackedges(free_cracks);  // really free them
+}
+
+
+/**********************************************************************
+ * make_margins
+ *
+ * Get an image line and set to margin non-text pixels.
+ **********************************************************************/
+
+static
+void make_margins(                         //get a line
+                  PDBLK *block,            //block in image
+                  BLOCK_LINE_IT *line_it,  //for old style
+                  uint8_t *pixels,           //pixels to strip
+                  uint8_t margin,            //white-out pixel
+                  int16_t left,              //block edges
+                  int16_t right,
+                  int16_t y                  //line coord
+                 ) {
+  ICOORDELT_IT seg_it;
+  int32_t start;                   //of segment
+  int16_t xext;                    //of segment
+  int xindex;                    //index to pixel
+
+  if (block->poly_block () != nullptr) {
+    std::unique_ptr<PB_LINE_IT> lines(new PB_LINE_IT (block->poly_block ()));
+    const std::unique_ptr</*non-const*/ ICOORDELT_LIST> segments(
+        lines->get_line(y));
+    if (!segments->empty ()) {
+      seg_it.set_to_list(segments.get());
+      seg_it.mark_cycle_pt ();
+      start = seg_it.data ()->x ();
+      xext = seg_it.data ()->y ();
+      for (xindex = left; xindex < right; xindex++) {
+        if (xindex >= start && !seg_it.cycled_list ()) {
+          xindex = start + xext - 1;
+          seg_it.forward ();
+          start = seg_it.data ()->x ();
+          xext = seg_it.data ()->y ();
+        }
+        else
+          pixels[xindex - left] = margin;
+      }
+    }
+    else {
+      for (xindex = left; xindex < right; xindex++)
+        pixels[xindex - left] = margin;
+    }
+  }
+  else {
+    start = line_it->get_line (y, xext);
+    for (xindex = left; xindex < start; xindex++)
+      pixels[xindex - left] = margin;
+    for (xindex = start + xext; xindex < right; xindex++)
+      pixels[xindex - left] = margin;
+  }
+}
+
+/**********************************************************************
+ * line_edges
+ *
+ * Scan a line for edges and update the edges in progress.
+ * When edges close into loops, send them for approximation.
+ **********************************************************************/
+
+static
+void line_edges(int16_t x,                         // coord of line start
+                int16_t y,                         // coord of line
+                int16_t xext,                      // width of line
+                uint8_t uppercolour,               // start of prev line
+                uint8_t * bwpos,                   // thresholded line
+                CRACKEDGE ** prevline,           // edges in progress
+                CRACKEDGE **free_cracks,
+                C_OUTLINE_IT* outline_it) {
+  CrackPos pos = {free_cracks, x, y };
+  int xmax;                      // max x coord
+  int prevcolour;                // of previous pixel
+  CRACKEDGE *current;            // current h edge
+  CRACKEDGE *newcurrent;         // new h edge
+
+  xmax = x + xext;               // max allowable coord
+  prevcolour = uppercolour;      // forced plain margin
+  current = nullptr;                // nothing yet
+
+                                 // do each pixel
+  for (; pos.x < xmax; pos.x++, prevline++) {
+    const int colour = *bwpos++; // current pixel
+    if (*prevline != nullptr) {
+                                 // changed above
+                                 // change colour
+      uppercolour = FLIP_COLOUR(uppercolour);
+      if (colour == prevcolour) {
+        if (colour == uppercolour) {
+                                 // finish a line
+          join_edges(current, *prevline, free_cracks, outline_it);
+          current = nullptr;        // no edge now
+        } else {
+                                 // new horiz edge
+          current = h_edge(uppercolour - colour, *prevline, &pos);
+        }
+        *prevline = nullptr;        // no change this time
+      } else {
+        if (colour == uppercolour)
+          *prevline = v_edge(colour - prevcolour, *prevline, &pos);
+                                 // 8 vs 4 connection
+        else if (colour == WHITE_PIX) {
+          join_edges(current, *prevline, free_cracks, outline_it);
+          current = h_edge(uppercolour - colour, nullptr, &pos);
+          *prevline = v_edge(colour - prevcolour, current, &pos);
+        } else {
+          newcurrent = h_edge(uppercolour - colour, *prevline, &pos);
+          *prevline = v_edge(colour - prevcolour, current, &pos);
+          current = newcurrent;  // right going h edge
+        }
+        prevcolour = colour;     // remember new colour
+      }
+    } else {
+      if (colour != prevcolour) {
+        *prevline = current = v_edge(colour - prevcolour, current, &pos);
+        prevcolour = colour;
+      }
+      if (colour != uppercolour)
+        current = h_edge(uppercolour - colour, current, &pos);
+      else
+        current = nullptr;          // no edge now
+    }
+  }
+  if (current != nullptr) {
+                                 // out of block
+    if (*prevline != nullptr) {     // got one to join to?
+      join_edges(current, *prevline, free_cracks, outline_it);
+      *prevline = nullptr;          // tidy now
+    } else {
+                                 // fake vertical
+      *prevline = v_edge(FLIP_COLOUR(prevcolour)-prevcolour, current, &pos);
+    }
+  } else if (*prevline != nullptr) {
+                                 //continue fake
+    *prevline = v_edge(FLIP_COLOUR(prevcolour)-prevcolour, *prevline, &pos);
+  }
+}
+
+
+/**********************************************************************
+ * h_edge
+ *
+ * Create a new horizontal CRACKEDGE and join it to the given edge.
+ **********************************************************************/
+
+static
+CRACKEDGE *h_edge(int sign,                       // sign of edge
+                  CRACKEDGE* join,                // edge to join to
+                  CrackPos* pos) {
+  CRACKEDGE *newpt;              // return value
+
+  if (*pos->free_cracks != nullptr) {
+    newpt = *pos->free_cracks;
+    *pos->free_cracks = newpt->next;  // get one fast
+  } else {
+    newpt = new CRACKEDGE;
+  }
+  newpt->pos.set_y(pos->y + 1);       // coords of pt
+  newpt->stepy = 0;              // edge is horizontal
+
+  if (sign > 0) {
+    newpt->pos.set_x(pos->x + 1);     // start location
+    newpt->stepx = -1;
+    newpt->stepdir = 0;
+  } else {
+    newpt->pos.set_x(pos->x);        // start location
+    newpt->stepx = 1;
+    newpt->stepdir = 2;
+  }
+
+  if (join == nullptr) {
+    newpt->next = newpt;         // ptrs to other ends
+    newpt->prev = newpt;
+  } else {
+    if (newpt->pos.x() + newpt->stepx == join->pos.x()
+    && newpt->pos.y() == join->pos.y()) {
+      newpt->prev = join->prev;  // update other ends
+      newpt->prev->next = newpt;
+      newpt->next = join;        // join up
+      join->prev = newpt;
+    } else {
+      newpt->next = join->next;  // update other ends
+      newpt->next->prev = newpt;
+      newpt->prev = join;        // join up
+      join->next = newpt;
+    }
+  }
+  return newpt;
+}
+
+
+/**********************************************************************
+ * v_edge
+ *
+ * Create a new vertical CRACKEDGE and join it to the given edge.
+ **********************************************************************/
+
+static
+CRACKEDGE *v_edge(int sign,                       // sign of edge
+                  CRACKEDGE* join,
+                  CrackPos* pos) {
+  CRACKEDGE *newpt;              // return value
+
+  if (*pos->free_cracks != nullptr) {
+    newpt = *pos->free_cracks;
+    *pos->free_cracks = newpt->next;  // get one fast
+  } else {
+    newpt = new CRACKEDGE;
+  }
+  newpt->pos.set_x(pos->x);           // coords of pt
+  newpt->stepx = 0;              // edge is vertical
+
+  if (sign > 0) {
+    newpt->pos.set_y(pos->y);         // start location
+    newpt->stepy = 1;
+    newpt->stepdir = 3;
+  } else {
+    newpt->pos.set_y(pos->y + 1);     // start location
+    newpt->stepy = -1;
+    newpt->stepdir = 1;
+  }
+
+  if (join == nullptr) {
+    newpt->next = newpt;         //ptrs to other ends
+    newpt->prev = newpt;
+  } else {
+    if (newpt->pos.x() == join->pos.x()
+    && newpt->pos.y() + newpt->stepy == join->pos.y()) {
+      newpt->prev = join->prev;  // update other ends
+      newpt->prev->next = newpt;
+      newpt->next = join;        // join up
+      join->prev = newpt;
+    } else {
+      newpt->next = join->next;  // update other ends
+      newpt->next->prev = newpt;
+      newpt->prev = join;        // join up
+      join->next = newpt;
+    }
+  }
+  return newpt;
+}
+
+
+/**********************************************************************
+ * join_edges
+ *
+ * Join 2 edges together. Send the outline for approximation when a
+ * closed loop is formed.
+ **********************************************************************/
+
+static
+void join_edges(CRACKEDGE *edge1,  // edges to join
+                CRACKEDGE *edge2,   // no specific order
+                CRACKEDGE **free_cracks,
+                C_OUTLINE_IT* outline_it) {
+  if (edge1->pos.x() + edge1->stepx != edge2->pos.x()
+  || edge1->pos.y() + edge1->stepy != edge2->pos.y()) {
+    CRACKEDGE *tempedge = edge1;
+    edge1 = edge2;  // swap around
+    edge2 = tempedge;
+  }
+
+  if (edge1->next == edge2) {
+                                 // already closed
+    complete_edge(edge1, outline_it);
+                                 // attach freelist to end
+    edge1->prev->next = *free_cracks;
+    *free_cracks = edge1;         // and free list
+  } else {
+                                 // update opposite ends
+    edge2->prev->next = edge1->next;
+    edge1->next->prev = edge2->prev;
+    edge1->next = edge2;         // make joins
+    edge2->prev = edge1;
+  }
+}
+
+
+/**********************************************************************
+ * free_crackedges
+ *
+ * Really free the CRACKEDGEs by giving them back to delete.
+ **********************************************************************/
+
+static void free_crackedges(CRACKEDGE *start) {
+  CRACKEDGE *current;            // current edge to free
+  CRACKEDGE *next;               // next one to free
+
+  for (current = start; current != nullptr; current = next) {
+    next = current->next;
+    delete current;              // delete them all
+  }
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/textord/scanedg.h b/tesseract/src/textord/scanedg.h
new file mode 100644
index 00000000..96bf6478
--- /dev/null
+++ b/tesseract/src/textord/scanedg.h
@@ -0,0 +1,38 @@
+/**********************************************************************
+ * File:        scanedg.h  (Formerly scanedge.h)
+ * Description: Raster scanning crack based edge extractor.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef           SCANEDG_H
+#define           SCANEDG_H
+
+#include          "params.h"
+#include          "scrollview.h"
+
+struct Pix;
+
+namespace tesseract {
+
+class C_OUTLINE_IT;
+class PDBLK;
+
+void block_edges(Pix* t_image,         // thresholded image
+                 PDBLK* block,         // block in image
+                 C_OUTLINE_IT* outline_it);
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/textord/sortflts.cpp b/tesseract/src/textord/sortflts.cpp
new file mode 100644
index 00000000..01548e9f
--- /dev/null
+++ b/tesseract/src/textord/sortflts.cpp
@@ -0,0 +1,81 @@
+/**********************************************************************
+ * File:        sortflts.cpp  (Formerly sfloats.c)
+ * Description: Code to maintain a sorted list of floats.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1993, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#include          "sortflts.h"
+
+namespace tesseract {
+
+ELISTIZE (SORTED_FLOAT)
+/**
+ * @name SORTED_FLOATS::add
+ *
+ * Add a new entry to the sorted list of floats.
+ */
+void SORTED_FLOATS::add(  //add new entry
+                        float value,
+                        int32_t key) {
+  auto *new_float = new SORTED_FLOAT (value, key);
+
+  if (list.empty ())
+    it.add_after_stay_put (new_float);
+  else {
+    it.move_to_first ();
+    while (!it.at_last () && it.data ()->entry < value)
+      it.forward ();
+    if (it.data ()->entry < value)
+      it.add_after_stay_put (new_float);
+    else
+      it.add_before_stay_put (new_float);
+  }
+}
+
+
+/**
+ * @name SORTED_FLOATS::remove
+ *
+ * Remove an entry from the sorted list of floats.
+ */
+
+void SORTED_FLOATS::remove(  //remove the entry
+                           int32_t key) {
+  if (!list.empty ()) {
+    for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
+      if (it.data ()->address == key) {
+        delete it.extract ();
+        return;
+      }
+    }
+  }
+}
+
+
+/**
+ * @name SORTED_FLOATS::operator[]
+ *
+ * Return the floating point value of the given index into the list.
+ */
+
+float
+SORTED_FLOATS::operator[] (      //get an entry
+int32_t index                      //to list
+) {
+  it.move_to_first ();
+  return it.data_relative (index)->entry;
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/textord/sortflts.h b/tesseract/src/textord/sortflts.h
new file mode 100644
index 00000000..710a7a3d
--- /dev/null
+++ b/tesseract/src/textord/sortflts.h
@@ -0,0 +1,76 @@
+/**********************************************************************
+ * File:        sortflts.h  (Formerly sfloats.h)
+ * Description: Code to maintain a sorted list of floats.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1993, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef           SORTFLTS_H
+#define           SORTFLTS_H
+
+#include          "elst.h"
+
+namespace tesseract {
+
+class SORTED_FLOAT : public ELIST_LINK
+{
+  friend class SORTED_FLOATS;
+
+  public:
+    SORTED_FLOAT() = default;
+    SORTED_FLOAT(              //create one
+                 float value,  //value of entry
+                 int32_t key) {  //reference
+      entry = value;
+      address = key;
+    }
+  private:
+    float entry;                 //value of float
+    int32_t address;               //key
+};
+
+ELISTIZEH (SORTED_FLOAT)
+class SORTED_FLOATS
+{
+  public:
+    /** empty constructor */
+    SORTED_FLOATS() {
+      it.set_to_list (&list);
+    }
+    /**
+     * add sample
+     * @param value sample float
+     * @param key retrieval key
+     */
+    void add(float value,
+             int32_t key);
+    /**
+     * delete sample
+     * @param key key to delete
+     */
+    void remove(int32_t key);
+    /**
+     * index to list
+     * @param index item to get
+     */
+    float operator[] (int32_t index);
+
+  private:
+    SORTED_FLOAT_LIST list;      //list of floats
+    SORTED_FLOAT_IT it;          //iterator built-in
+};
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/textord/strokewidth.cpp b/tesseract/src/textord/strokewidth.cpp
new file mode 100644
index 00000000..6543c6ac
--- /dev/null
+++ b/tesseract/src/textord/strokewidth.cpp
@@ -0,0 +1,2030 @@
+///////////////////////////////////////////////////////////////////////
+// File:        strokewidth.cpp
+// Description: Subclass of BBGrid to find uniformity of strokewidth.
+// Author:      Ray Smith
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "strokewidth.h"
+
+#include <algorithm>
+#include <cmath>
+
+#include "blobbox.h"
+#include "colpartition.h"
+#include "colpartitiongrid.h"
+#include "imagefind.h"
+#include "linlsq.h"
+#include "statistc.h"
+#include "tabfind.h"
+#include "textlineprojection.h"
+#include "tordmain.h"  // For SetBlobStrokeWidth.
+
+namespace tesseract {
+
+#ifndef GRAPHICS_DISABLED
+static INT_VAR(textord_tabfind_show_strokewidths, 0, "Show stroke widths (ScrollView)");
+#else
+static INT_VAR(textord_tabfind_show_strokewidths, 0, "Show stroke widths");
+#endif
+static BOOL_VAR(textord_tabfind_only_strokewidths, false, "Only run stroke widths");
+
+/** Allowed proportional change in stroke width to be the same font. */
+const double kStrokeWidthFractionTolerance = 0.125;
+/**
+ * Allowed constant change in stroke width to be the same font.
+ * Really 1.5 pixels.
+ */
+const double kStrokeWidthTolerance = 1.5;
+// Same but for CJK we are a bit more generous.
+const double kStrokeWidthFractionCJK = 0.25;
+const double kStrokeWidthCJK = 2.0;
+// Radius in grid cells of search for broken CJK. Doesn't need to be very
+// large as the grid size should be about the size of a character anyway.
+const int kCJKRadius = 2;
+// Max distance fraction of size to join close but broken CJK characters.
+const double kCJKBrokenDistanceFraction = 0.25;
+// Max number of components in a broken CJK character.
+const int kCJKMaxComponents = 8;
+// Max aspect ratio of CJK broken characters when put back together.
+const double kCJKAspectRatio = 1.25;
+// Max increase in aspect ratio of CJK broken characters when merged.
+const double kCJKAspectRatioIncrease = 1.0625;
+// Max multiple of the grid size that will be used in computing median CJKsize.
+const int kMaxCJKSizeRatio = 5;
+// Min fraction of blobs broken CJK to iterate and run it again.
+const double kBrokenCJKIterationFraction = 0.125;
+// Multiple of gridsize as x-padding for a search box for diacritic base
+// characters.
+const double kDiacriticXPadRatio = 7.0;
+// Multiple of gridsize as y-padding for a search box for diacritic base
+// characters.
+const double kDiacriticYPadRatio = 1.75;
+// Min multiple of diacritic height that a neighbour must be to be a
+// convincing base character.
+const double kMinDiacriticSizeRatio = 1.0625;
+// Max multiple of a textline's median height as a threshold for the sum of
+// a diacritic's farthest x and y distances (gap + size).
+const double kMaxDiacriticDistanceRatio = 1.25;
+// Max x-gap between a diacritic and its base char as a fraction of the height
+// of the base char (allowing other blobs to fill the gap.)
+const double kMaxDiacriticGapToBaseCharHeight = 1.0;
+// Ratio between longest side of a line and longest side of a character.
+// (neighbor_min > blob_min * kLineTrapShortest &&
+//  neighbor_max < blob_max / kLineTrapLongest)
+// => neighbor is a grapheme and blob is a line.
+const int kLineTrapLongest = 4;
+// Ratio between shortest side of a line and shortest side of a character.
+const int kLineTrapShortest = 2;
+// Max aspect ratio of the total box before CountNeighbourGaps
+// decides immediately based on the aspect ratio.
+const int kMostlyOneDirRatio = 3;
+// Aspect ratio for a blob to be considered as line residue.
+const double kLineResidueAspectRatio = 8.0;
+// Padding ratio for line residue search box.
+const int kLineResiduePadRatio = 3;
+// Min multiple of neighbour size for a line residue to be genuine.
+const double kLineResidueSizeRatio = 1.75;
+// Aspect ratio filter for OSD.
+const float kSizeRatioToReject = 2.0;
+// Expansion factor for search box for good neighbours.
+const double kNeighbourSearchFactor = 2.5;
+// Factor of increase of overlap when adding diacritics to make an image noisy.
+const double kNoiseOverlapGrowthFactor = 4.0;
+// Fraction of the image size to add overlap when adding diacritics for an
+// image to qualify as noisy.
+const double kNoiseOverlapAreaFactor = 1.0 / 512;
+
+StrokeWidth::StrokeWidth(int gridsize,
+                         const ICOORD& bleft, const ICOORD& tright)
+  : BlobGrid(gridsize, bleft, tright), nontext_map_(nullptr), projection_(nullptr),
+    denorm_(nullptr), grid_box_(bleft, tright), rerotation_(1.0f, 0.0f) {
+  leaders_win_ = nullptr;
+  widths_win_ = nullptr;
+  initial_widths_win_ = nullptr;
+  chains_win_ = nullptr;
+  diacritics_win_ = nullptr;
+  textlines_win_ = nullptr;
+  smoothed_win_ = nullptr;
+}
+
+StrokeWidth::~StrokeWidth() {
+  if (widths_win_ != nullptr) {
+    #ifndef GRAPHICS_DISABLED
+    delete widths_win_->AwaitEvent(SVET_DESTROY);
+    #endif // !GRAPHICS_DISABLED
+    if (textord_tabfind_only_strokewidths)
+      exit(0);
+    delete widths_win_;
+  }
+  delete leaders_win_;
+  delete initial_widths_win_;
+  delete chains_win_;
+  delete textlines_win_;
+  delete smoothed_win_;
+  delete diacritics_win_;
+}
+
+// Sets the neighbours member of the medium-sized blobs in the block.
+// Searches on 4 sides of each blob for similar-sized, similar-strokewidth
+// blobs and sets pointers to the good neighbours.
+void StrokeWidth::SetNeighboursOnMediumBlobs(TO_BLOCK* block) {
+  // Run a preliminary strokewidth neighbour detection on the medium blobs.
+  InsertBlobList(&block->blobs);
+  BLOBNBOX_IT blob_it(&block->blobs);
+  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+    SetNeighbours(false, false, blob_it.data());
+  }
+  Clear();
+}
+
+// Sets the neighbour/textline writing direction members of the medium
+// and large blobs with optional repair of broken CJK characters first.
+// Repair of broken CJK is needed here because broken CJK characters
+// can fool the textline direction detection algorithm.
+void StrokeWidth::FindTextlineDirectionAndFixBrokenCJK(PageSegMode pageseg_mode,
+                                                       bool cjk_merge,
+                                                       TO_BLOCK* input_block) {
+  // Setup the grid with the remaining (non-noise) blobs.
+  InsertBlobs(input_block);
+  // Repair broken CJK characters if needed.
+  while (cjk_merge && FixBrokenCJK(input_block));
+  // Grade blobs by inspection of neighbours.
+  FindTextlineFlowDirection(pageseg_mode, false);
+  // Clear the grid ready for rotation or leader finding.
+  Clear();
+}
+
+// Helper to collect and count horizontal and vertical blobs from a list.
+static void CollectHorizVertBlobs(BLOBNBOX_LIST* input_blobs,
+                                  int* num_vertical_blobs,
+                                  int* num_horizontal_blobs,
+                                  BLOBNBOX_CLIST* vertical_blobs,
+                                  BLOBNBOX_CLIST* horizontal_blobs,
+                                  BLOBNBOX_CLIST* nondescript_blobs) {
+  BLOBNBOX_C_IT v_it(vertical_blobs);
+  BLOBNBOX_C_IT h_it(horizontal_blobs);
+  BLOBNBOX_C_IT n_it(nondescript_blobs);
+  BLOBNBOX_IT blob_it(input_blobs);
+  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+    BLOBNBOX* blob = blob_it.data();
+    const TBOX& box = blob->bounding_box();
+    float y_x = static_cast<float>(box.height()) / box.width();
+    float x_y = 1.0f / y_x;
+    // Select a >= 1.0 ratio
+    float ratio = x_y > y_x ? x_y : y_x;
+    // If the aspect ratio is small and we want them for osd, save the blob.
+    bool ok_blob = ratio <= kSizeRatioToReject;
+    if (blob->UniquelyVertical()) {
+      ++*num_vertical_blobs;
+      if (ok_blob) v_it.add_after_then_move(blob);
+    } else if (blob->UniquelyHorizontal()) {
+      ++*num_horizontal_blobs;
+      if (ok_blob) h_it.add_after_then_move(blob);
+    } else if (ok_blob) {
+      n_it.add_after_then_move(blob);
+    }
+  }
+}
+
+
+// Types all the blobs as vertical or horizontal text or unknown and
+// returns true if the majority are vertical.
+// If the blobs are rotated, it is necessary to call CorrectForRotation
+// after rotating everything, otherwise the work done here will be enough.
+// If osd_blobs is not null, a list of blobs from the dominant textline
+// direction are returned for use in orientation and script detection.
+bool StrokeWidth::TestVerticalTextDirection(double find_vertical_text_ratio,
+                                            TO_BLOCK* block,
+                                            BLOBNBOX_CLIST* osd_blobs) {
+  int vertical_boxes = 0;
+  int horizontal_boxes = 0;
+  // Count vertical normal and large blobs.
+  BLOBNBOX_CLIST vertical_blobs;
+  BLOBNBOX_CLIST horizontal_blobs;
+  BLOBNBOX_CLIST nondescript_blobs;
+  CollectHorizVertBlobs(&block->blobs, &vertical_boxes, &horizontal_boxes,
+                        &vertical_blobs, &horizontal_blobs, &nondescript_blobs);
+  CollectHorizVertBlobs(&block->large_blobs, &vertical_boxes, &horizontal_boxes,
+                        &vertical_blobs, &horizontal_blobs, &nondescript_blobs);
+  if (textord_debug_tabfind)
+    tprintf("TextDir hbox=%d vs vbox=%d, %dH, %dV, %dN osd blobs\n",
+            horizontal_boxes, vertical_boxes,
+            horizontal_blobs.length(), vertical_blobs.length(),
+            nondescript_blobs.length());
+  if (osd_blobs != nullptr && vertical_boxes == 0 && horizontal_boxes == 0) {
+    // Only nondescript blobs available, so return those.
+    BLOBNBOX_C_IT osd_it(osd_blobs);
+    osd_it.add_list_after(&nondescript_blobs);
+    return false;
+  }
+  int min_vert_boxes = static_cast<int>((vertical_boxes + horizontal_boxes) *
+                                        find_vertical_text_ratio);
+  if (vertical_boxes >= min_vert_boxes) {
+    if (osd_blobs != nullptr) {
+      BLOBNBOX_C_IT osd_it(osd_blobs);
+      osd_it.add_list_after(&vertical_blobs);
+    }
+    return true;
+  } else {
+    if (osd_blobs != nullptr) {
+      BLOBNBOX_C_IT osd_it(osd_blobs);
+      osd_it.add_list_after(&horizontal_blobs);
+    }
+    return false;
+  }
+}
+
+// Corrects the data structures for the given rotation.
+void StrokeWidth::CorrectForRotation(const FCOORD& rotation,
+                                     ColPartitionGrid* part_grid) {
+  Init(part_grid->gridsize(), part_grid->bleft(), part_grid->tright());
+  grid_box_ = TBOX(bleft(), tright());
+  rerotation_.set_x(rotation.x());
+  rerotation_.set_y(-rotation.y());
+}
+
+// Finds leader partitions and inserts them into the given part_grid.
+void StrokeWidth::FindLeaderPartitions(TO_BLOCK* block,
+                                       ColPartitionGrid* part_grid) {
+  Clear();
+  // Find and isolate leaders in the noise list.
+  ColPartition_LIST leader_parts;
+  FindLeadersAndMarkNoise(block, &leader_parts);
+  // Setup the strokewidth grid with the block's remaining (non-noise) blobs.
+  InsertBlobList(&block->blobs);
+  // Mark blobs that have leader neighbours.
+  for (ColPartition_IT it(&leader_parts); !it.empty(); it.forward()) {
+    ColPartition* part = it.extract();
+    part->ClaimBoxes();
+    MarkLeaderNeighbours(part, LR_LEFT);
+    MarkLeaderNeighbours(part, LR_RIGHT);
+    part_grid->InsertBBox(true, true, part);
+  }
+}
+
+// Finds and marks noise those blobs that look like bits of vertical lines
+// that would otherwise screw up layout analysis.
+void StrokeWidth::RemoveLineResidue(ColPartition_LIST* big_part_list) {
+  BlobGridSearch gsearch(this);
+  BLOBNBOX* bbox;
+  // For every vertical line-like bbox in the grid, search its neighbours
+  // to find the tallest, and if the original box is taller by sufficient
+  // margin, then call it line residue and delete it.
+  gsearch.StartFullSearch();
+  while ((bbox = gsearch.NextFullSearch()) != nullptr) {
+    TBOX box = bbox->bounding_box();
+    if (box.height() < box.width() * kLineResidueAspectRatio)
+      continue;
+    // Set up a rectangle search around the blob to find the size of its
+    // neighbours.
+    int padding = box.height() * kLineResiduePadRatio;
+    TBOX search_box = box;
+    search_box.pad(padding, padding);
+    bool debug = AlignedBlob::WithinTestRegion(2, box.left(),
+                                               box.bottom());
+    // Find the largest object in the search box not equal to bbox.
+    BlobGridSearch rsearch(this);
+    int max_height = 0;
+    BLOBNBOX* n;
+    rsearch.StartRectSearch(search_box);
+    while ((n = rsearch.NextRectSearch()) != nullptr) {
+      if (n == bbox) continue;
+      TBOX nbox = n->bounding_box();
+      if (nbox.height() > max_height) {
+        max_height = nbox.height();
+      }
+    }
+    if (debug) {
+      tprintf("Max neighbour size=%d for candidate line box at:", max_height);
+      box.print();
+    }
+    if (max_height * kLineResidueSizeRatio < box.height()) {
+      #ifndef GRAPHICS_DISABLED
+      if (leaders_win_ != nullptr) {
+        // We are debugging, so display deleted in pink blobs in the same
+        // window that we use to display leader detection.
+        leaders_win_->Pen(ScrollView::PINK);
+        leaders_win_->Rectangle(box.left(), box.bottom(),
+                                box.right(), box.top());
+      }
+      #endif // !GRAPHICS_DISABLED
+      ColPartition::MakeBigPartition(bbox, big_part_list);
+    }
+  }
+}
+
+// Types all the blobs as vertical text or horizontal text or unknown and
+// puts them into initial ColPartitions in the supplied part_grid.
+// rerotation determines how to get back to the image coordinates from the
+// blob coordinates (since they may have been rotated for vertical text).
+// block is the single block for the whole page or rectangle to be OCRed.
+// nontext_pix (full-size), is a binary mask used to prevent merges across
+// photo/text boundaries. It is not kept beyond this function.
+// denorm provides a mapping back to the image from the current blob
+// coordinate space.
+// projection provides a measure of textline density over the image and
+// provides functions to assist with diacritic detection. It should be a
+// pointer to a new TextlineProjection, and will be setup here.
+// part_grid is the output grid of textline partitions.
+// Large blobs that cause overlap are put in separate partitions and added
+// to the big_parts list.
+void StrokeWidth::GradeBlobsIntoPartitions(
+    PageSegMode pageseg_mode, const FCOORD& rerotation, TO_BLOCK* block,
+    Pix* nontext_pix, const DENORM* denorm, bool cjk_script,
+    TextlineProjection* projection, BLOBNBOX_LIST* diacritic_blobs,
+    ColPartitionGrid* part_grid, ColPartition_LIST* big_parts) {
+  nontext_map_ = nontext_pix;
+  projection_ = projection;
+  denorm_ = denorm;
+  // Clear and re Insert to take advantage of the tab stops in the blobs.
+  Clear();
+  // Setup the strokewidth grid with the remaining non-noise, non-leader blobs.
+  InsertBlobs(block);
+
+  // Run FixBrokenCJK() again if the page is CJK.
+  if (cjk_script) {
+    FixBrokenCJK(block);
+  }
+  FindTextlineFlowDirection(pageseg_mode, false);
+  projection_->ConstructProjection(block, rerotation, nontext_map_);
+#ifndef GRAPHICS_DISABLED
+  if (textord_tabfind_show_strokewidths) {
+    ScrollView* line_blobs_win = MakeWindow(0, 0, "Initial textline Blobs");
+    projection_->PlotGradedBlobs(&block->blobs, line_blobs_win);
+    projection_->PlotGradedBlobs(&block->small_blobs, line_blobs_win);
+  }
+#endif
+  projection_->MoveNonTextlineBlobs(&block->blobs, &block->noise_blobs);
+  projection_->MoveNonTextlineBlobs(&block->small_blobs, &block->noise_blobs);
+  // Clear and re Insert to take advantage of the removed diacritics.
+  Clear();
+  InsertBlobs(block);
+  FCOORD skew;
+  FindTextlineFlowDirection(pageseg_mode, true);
+  PartitionFindResult r =
+      FindInitialPartitions(pageseg_mode, rerotation, true, block,
+                            diacritic_blobs, part_grid, big_parts, &skew);
+  if (r == PFR_NOISE) {
+    tprintf("Detected %d diacritics\n", diacritic_blobs->length());
+    // Noise was found, and removed.
+    Clear();
+    InsertBlobs(block);
+    FindTextlineFlowDirection(pageseg_mode, true);
+    r = FindInitialPartitions(pageseg_mode, rerotation, false, block,
+                              diacritic_blobs, part_grid, big_parts, &skew);
+  }
+  nontext_map_ = nullptr;
+  projection_ = nullptr;
+  denorm_ = nullptr;
+}
+
+static void PrintBoxWidths(BLOBNBOX* neighbour) {
+  const TBOX& nbox = neighbour->bounding_box();
+  tprintf("Box (%d,%d)->(%d,%d): h-width=%.1f, v-width=%.1f p-width=%1.f\n",
+          nbox.left(), nbox.bottom(), nbox.right(), nbox.top(),
+          neighbour->horz_stroke_width(), neighbour->vert_stroke_width(),
+          2.0 * neighbour->cblob()->area()/neighbour->cblob()->perimeter());
+}
+
+/** Handles a click event in a display window. */
+void StrokeWidth::HandleClick(int x, int y) {
+  BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>::HandleClick(x, y);
+  // Run a radial search for blobs that overlap.
+  BlobGridSearch radsearch(this);
+  radsearch.StartRadSearch(x, y, 1);
+  BLOBNBOX* neighbour;
+  FCOORD click(static_cast<float>(x), static_cast<float>(y));
+  while ((neighbour = radsearch.NextRadSearch()) != nullptr) {
+    TBOX nbox = neighbour->bounding_box();
+    if (nbox.contains(click) && neighbour->cblob() != nullptr) {
+      PrintBoxWidths(neighbour);
+      if (neighbour->neighbour(BND_LEFT) != nullptr)
+        PrintBoxWidths(neighbour->neighbour(BND_LEFT));
+      if (neighbour->neighbour(BND_RIGHT) != nullptr)
+        PrintBoxWidths(neighbour->neighbour(BND_RIGHT));
+      if (neighbour->neighbour(BND_ABOVE) != nullptr)
+        PrintBoxWidths(neighbour->neighbour(BND_ABOVE));
+      if (neighbour->neighbour(BND_BELOW) != nullptr)
+        PrintBoxWidths(neighbour->neighbour(BND_BELOW));
+      int gaps[BND_COUNT];
+      neighbour->NeighbourGaps(gaps);
+      tprintf("Left gap=%d, right=%d, above=%d, below=%d, horz=%d, vert=%d\n"
+              "Good=    %d        %d        %d        %d\n",
+              gaps[BND_LEFT], gaps[BND_RIGHT],
+              gaps[BND_ABOVE], gaps[BND_BELOW],
+              neighbour->horz_possible(),
+              neighbour->vert_possible(),
+              neighbour->good_stroke_neighbour(BND_LEFT),
+              neighbour->good_stroke_neighbour(BND_RIGHT),
+              neighbour->good_stroke_neighbour(BND_ABOVE),
+              neighbour->good_stroke_neighbour(BND_BELOW));
+      break;
+    }
+  }
+}
+
+// Detects and marks leader dots/dashes.
+//    Leaders are horizontal chains of small or noise blobs that look
+//    monospace according to ColPartition::MarkAsLeaderIfMonospaced().
+// Detected leaders become the only occupants of the block->small_blobs list.
+// Non-leader small blobs get moved to the blobs list.
+// Non-leader noise blobs remain singletons in the noise list.
+// All small and noise blobs in high density regions are marked BTFT_NONTEXT.
+// block is the single block for the whole page or rectangle to be OCRed.
+// leader_parts is the output.
+void StrokeWidth::FindLeadersAndMarkNoise(TO_BLOCK* block,
+                                          ColPartition_LIST* leader_parts) {
+  InsertBlobList(&block->small_blobs);
+  InsertBlobList(&block->noise_blobs);
+  BlobGridSearch gsearch(this);
+  BLOBNBOX* bbox;
+  // For every bbox in the grid, set its neighbours.
+  gsearch.StartFullSearch();
+  while ((bbox = gsearch.NextFullSearch()) != nullptr) {
+    SetNeighbours(true, false, bbox);
+  }
+  ColPartition_IT part_it(leader_parts);
+  gsearch.StartFullSearch();
+  while ((bbox = gsearch.NextFullSearch()) != nullptr) {
+    if (bbox->flow() == BTFT_NONE) {
+      if (bbox->neighbour(BND_RIGHT) == nullptr &&
+          bbox->neighbour(BND_LEFT) == nullptr)
+        continue;
+      // Put all the linked blobs into a ColPartition.
+      ColPartition* part = new ColPartition(BRT_UNKNOWN, ICOORD(0, 1));
+      BLOBNBOX* blob;
+      for (blob = bbox; blob != nullptr && blob->flow() == BTFT_NONE;
+           blob = blob->neighbour(BND_RIGHT))
+        part->AddBox(blob);
+      for (blob = bbox->neighbour(BND_LEFT); blob != nullptr &&
+           blob->flow() == BTFT_NONE;
+           blob = blob->neighbour(BND_LEFT))
+        part->AddBox(blob);
+      if (part->MarkAsLeaderIfMonospaced())
+        part_it.add_after_then_move(part);
+      else
+        delete part;
+    }
+  }
+#ifndef GRAPHICS_DISABLED
+  if (textord_tabfind_show_strokewidths) {
+    leaders_win_ = DisplayGoodBlobs("LeaderNeighbours", 0, 0);
+  }
+#endif
+  // Move any non-leaders from the small to the blobs list, as they are
+  // most likely dashes or broken characters.
+  BLOBNBOX_IT blob_it(&block->blobs);
+  BLOBNBOX_IT small_it(&block->small_blobs);
+  for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) {
+    BLOBNBOX* blob = small_it.data();
+    if (blob->flow() != BTFT_LEADER) {
+      if (blob->flow() == BTFT_NEIGHBOURS)
+        blob->set_flow(BTFT_NONE);
+      blob->ClearNeighbours();
+      blob_it.add_to_end(small_it.extract());
+    }
+  }
+  // Move leaders from the noise list to the small list, leaving the small
+  // list exclusively leaders, so they don't get processed further,
+  // and the remaining small blobs all in the noise list.
+  BLOBNBOX_IT noise_it(&block->noise_blobs);
+  for (noise_it.mark_cycle_pt(); !noise_it.cycled_list(); noise_it.forward()) {
+    BLOBNBOX* blob = noise_it.data();
+    if (blob->flow() == BTFT_LEADER || blob->joined_to_prev()) {
+      small_it.add_to_end(noise_it.extract());
+    } else if (blob->flow() == BTFT_NEIGHBOURS) {
+      blob->set_flow(BTFT_NONE);
+      blob->ClearNeighbours();
+    }
+  }
+  // Clear the grid as we don't want the small stuff hanging around in it.
+  Clear();
+}
+
+/** Inserts the block blobs (normal and large) into this grid.
+ * Blobs remain owned by the block. */
+void StrokeWidth::InsertBlobs(TO_BLOCK* block) {
+  InsertBlobList(&block->blobs);
+  InsertBlobList(&block->large_blobs);
+}
+
+// Checks the left or right side of the given leader partition and sets the
+// (opposite) leader_on_right or leader_on_left flags for blobs
+// that are next to the given side of the given leader partition.
+void StrokeWidth::MarkLeaderNeighbours(const ColPartition* part,
+                                       LeftOrRight side) {
+  const TBOX& part_box = part->bounding_box();
+  BlobGridSearch blobsearch(this);
+  // Search to the side of the leader for the nearest neighbour.
+  BLOBNBOX* best_blob = nullptr;
+  int best_gap = 0;
+  blobsearch.StartSideSearch(side == LR_LEFT ? part_box.left()
+                                             : part_box.right(),
+                             part_box.bottom(), part_box.top());
+  BLOBNBOX* blob;
+  while ((blob = blobsearch.NextSideSearch(side == LR_LEFT)) != nullptr) {
+    const TBOX& blob_box = blob->bounding_box();
+    if (!blob_box.y_overlap(part_box))
+      continue;
+    int x_gap = blob_box.x_gap(part_box);
+    if (x_gap > 2 * gridsize()) {
+      break;
+    } else if (best_blob == nullptr || x_gap < best_gap) {
+      best_blob = blob;
+      best_gap = x_gap;
+    }
+  }
+  if (best_blob != nullptr) {
+    if (side == LR_LEFT)
+      best_blob->set_leader_on_right(true);
+    else
+      best_blob->set_leader_on_left(true);
+    #ifndef GRAPHICS_DISABLED
+    if (leaders_win_ != nullptr) {
+      leaders_win_->Pen(side == LR_LEFT ? ScrollView::RED : ScrollView::GREEN);
+      const TBOX& blob_box = best_blob->bounding_box();
+      leaders_win_->Rectangle(blob_box.left(), blob_box.bottom(),
+                              blob_box.right(), blob_box.top());
+    }
+    #endif // !GRAPHICS_DISABLED
+  }
+}
+
+// Helper to compute the UQ of the square-ish CJK characters.
+static int UpperQuartileCJKSize(int gridsize, BLOBNBOX_LIST* blobs) {
+  STATS sizes(0, gridsize * kMaxCJKSizeRatio);
+  BLOBNBOX_IT it(blobs);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    BLOBNBOX* blob = it.data();
+    int width = blob->bounding_box().width();
+    int height = blob->bounding_box().height();
+    if (width <= height * kCJKAspectRatio && height < width * kCJKAspectRatio)
+      sizes.add(height, 1);
+  }
+  return static_cast<int>(sizes.ile(0.75f) + 0.5);
+}
+
+// Fix broken CJK characters, using the fake joined blobs mechanism.
+// Blobs are really merged, ie the master takes all the outlines and the
+// others are deleted.
+// Returns true if sufficient blobs are merged that it may be worth running
+// again, due to a better estimate of character size.
+bool StrokeWidth::FixBrokenCJK(TO_BLOCK* block) {
+  BLOBNBOX_LIST* blobs = &block->blobs;
+  int median_height = UpperQuartileCJKSize(gridsize(), blobs);
+  int max_dist = static_cast<int>(median_height * kCJKBrokenDistanceFraction);
+  int max_height = static_cast<int>(median_height * kCJKAspectRatio);
+  int num_fixed = 0;
+  BLOBNBOX_IT blob_it(blobs);
+
+  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+    BLOBNBOX* blob = blob_it.data();
+    if (blob->cblob() == nullptr || blob->cblob()->out_list()->empty())
+      continue;
+    TBOX bbox = blob->bounding_box();
+    bool debug = AlignedBlob::WithinTestRegion(3, bbox.left(),
+                                               bbox.bottom());
+    if (debug) {
+      tprintf("Checking for Broken CJK (max size=%d):", max_height);
+      bbox.print();
+    }
+    // Generate a list of blobs that overlap or are near enough to merge.
+    BLOBNBOX_CLIST overlapped_blobs;
+    AccumulateOverlaps(blob, debug, max_height, max_dist,
+                       &bbox, &overlapped_blobs);
+    if (!overlapped_blobs.empty()) {
+      // There are overlapping blobs, so qualify them as being satisfactory
+      // before removing them from the grid and replacing them with the union.
+      // The final box must be roughly square.
+      if (bbox.width() > bbox.height() * kCJKAspectRatio ||
+          bbox.height() > bbox.width() * kCJKAspectRatio) {
+        if (debug) {
+          tprintf("Bad final aspectratio:");
+          bbox.print();
+        }
+        continue;
+      }
+      // There can't be too many blobs to merge.
+      if (overlapped_blobs.length() >= kCJKMaxComponents) {
+        if (debug)
+          tprintf("Too many neighbours: %d\n", overlapped_blobs.length());
+        continue;
+      }
+      // The strokewidths must match amongst the join candidates.
+      BLOBNBOX_C_IT n_it(&overlapped_blobs);
+      for (n_it.mark_cycle_pt(); !n_it.cycled_list(); n_it.forward()) {
+        BLOBNBOX* neighbour = nullptr;
+        neighbour = n_it.data();
+        if (!blob->MatchingStrokeWidth(*neighbour, kStrokeWidthFractionCJK,
+                                       kStrokeWidthCJK))
+          break;
+      }
+      if (!n_it.cycled_list()) {
+        if (debug) {
+          tprintf("Bad stroke widths:");
+          PrintBoxWidths(blob);
+        }
+        continue;  // Not good enough.
+      }
+
+      // Merge all the candidates into blob.
+      // We must remove blob from the grid and reinsert it after merging
+      // to maintain the integrity of the grid.
+      RemoveBBox(blob);
+      // Everything else will be calculated later.
+      for (n_it.mark_cycle_pt(); !n_it.cycled_list(); n_it.forward()) {
+        BLOBNBOX* neighbour = n_it.data();
+        RemoveBBox(neighbour);
+        // Mark empty blob for deletion.
+        neighbour->set_region_type(BRT_NOISE);
+        blob->really_merge(neighbour);
+        if (rerotation_.x() != 1.0f || rerotation_.y() != 0.0f) {
+          blob->rotate_box(rerotation_);
+        }
+      }
+      InsertBBox(true, true, blob);
+      ++num_fixed;
+      if (debug) {
+        tprintf("Done! Final box:");
+        bbox.print();
+      }
+    }
+  }
+  // Count remaining blobs.
+  int num_remaining = 0;
+  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+    BLOBNBOX* blob = blob_it.data();
+    if (blob->cblob() != nullptr && !blob->cblob()->out_list()->empty()) {
+      ++num_remaining;
+    }
+  }
+  // Permanently delete all the marked blobs after first removing all
+  // references in the neighbour members.
+  block->DeleteUnownedNoise();
+  return num_fixed > num_remaining * kBrokenCJKIterationFraction;
+}
+
+// Helper function to determine whether it is reasonable to merge the
+// bbox and the nbox for repairing broken CJK.
+// The distance apart must not exceed max_dist, the combined size must
+// not exceed max_size, and the aspect ratio must either improve or at
+// least not get worse by much.
+static bool AcceptableCJKMerge(const TBOX& bbox, const TBOX& nbox,
+                               bool debug, int max_size, int max_dist,
+                               int* x_gap, int* y_gap) {
+  *x_gap = bbox.x_gap(nbox);
+  *y_gap = bbox.y_gap(nbox);
+  TBOX merged(nbox);
+  merged += bbox;
+  if (debug) {
+    tprintf("gaps = %d, %d, merged_box:", *x_gap, *y_gap);
+    merged.print();
+  }
+  if (*x_gap <= max_dist && *y_gap <= max_dist &&
+      merged.width() <= max_size && merged.height() <= max_size) {
+    // Close enough to call overlapping. Check aspect ratios.
+    double old_ratio = static_cast<double>(bbox.width()) / bbox.height();
+    if (old_ratio < 1.0) old_ratio = 1.0 / old_ratio;
+    double new_ratio = static_cast<double>(merged.width()) / merged.height();
+    if (new_ratio < 1.0) new_ratio = 1.0 / new_ratio;
+    if (new_ratio <= old_ratio * kCJKAspectRatioIncrease)
+      return true;
+  }
+  return false;
+}
+
+// Collect blobs that overlap or are within max_dist of the input bbox.
+// Return them in the list of blobs and expand the bbox to be the union
+// of all the boxes. not_this is excluded from the search, as are blobs
+// that cause the merged box to exceed max_size in either dimension.
+void StrokeWidth::AccumulateOverlaps(const BLOBNBOX* not_this, bool debug,
+                                     int max_size, int max_dist,
+                                     TBOX* bbox, BLOBNBOX_CLIST* blobs) {
+  // While searching, nearests holds the nearest failed blob in each
+  // direction. When we have a nearest in each of the 4 directions, then
+  // the search is over, and at this point the final bbox must not overlap
+  // any of the nearests.
+  BLOBNBOX* nearests[BND_COUNT];
+  for (auto & nearest : nearests) {
+    nearest = nullptr;
+  }
+  int x = (bbox->left() + bbox->right()) / 2;
+  int y = (bbox->bottom() + bbox->top()) / 2;
+  // Run a radial search for blobs that overlap or are sufficiently close.
+  BlobGridSearch radsearch(this);
+  radsearch.StartRadSearch(x, y, kCJKRadius);
+  BLOBNBOX* neighbour;
+  while ((neighbour = radsearch.NextRadSearch()) != nullptr) {
+    if (neighbour == not_this) continue;
+    TBOX nbox = neighbour->bounding_box();
+    int x_gap, y_gap;
+    if (AcceptableCJKMerge(*bbox, nbox, debug, max_size, max_dist,
+                           &x_gap, &y_gap)) {
+      // Close enough to call overlapping. Merge boxes.
+      *bbox += nbox;
+      blobs->add_sorted(SortByBoxLeft<BLOBNBOX>, true, neighbour);
+      if (debug) {
+        tprintf("Added:");
+        nbox.print();
+      }
+      // Since we merged, search the nearests, as some might now me mergeable.
+      for (int dir = 0; dir < BND_COUNT; ++dir) {
+        if (nearests[dir] == nullptr) continue;
+        nbox = nearests[dir]->bounding_box();
+        if (AcceptableCJKMerge(*bbox, nbox, debug, max_size,
+                               max_dist, &x_gap, &y_gap)) {
+          // Close enough to call overlapping. Merge boxes.
+          *bbox += nbox;
+          blobs->add_sorted(SortByBoxLeft<BLOBNBOX>, true, nearests[dir]);
+          if (debug) {
+            tprintf("Added:");
+            nbox.print();
+          }
+          nearests[dir] = nullptr;
+          dir = -1;  // Restart the search.
+        }
+      }
+    } else if (x_gap < 0 && x_gap <= y_gap) {
+      // A vertical neighbour. Record the nearest.
+      BlobNeighbourDir dir = nbox.top() > bbox->top() ? BND_ABOVE : BND_BELOW;
+      if (nearests[dir] == nullptr ||
+          y_gap < bbox->y_gap(nearests[dir]->bounding_box())) {
+        nearests[dir] = neighbour;
+      }
+    } else if (y_gap < 0 && y_gap <= x_gap) {
+      // A horizontal neighbour. Record the nearest.
+      BlobNeighbourDir dir = nbox.left() > bbox->left() ? BND_RIGHT : BND_LEFT;
+      if (nearests[dir] == nullptr ||
+          x_gap < bbox->x_gap(nearests[dir]->bounding_box())) {
+        nearests[dir] = neighbour;
+      }
+    }
+    // If all nearests are non-null, then we have finished.
+    if (nearests[BND_LEFT] && nearests[BND_RIGHT] &&
+        nearests[BND_ABOVE] && nearests[BND_BELOW])
+      break;
+  }
+  // Final overlap with a nearest is not allowed.
+  for (auto & nearest : nearests) {
+    if (nearest == nullptr) continue;
+    const TBOX& nbox = nearest->bounding_box();
+    if (debug) {
+      tprintf("Testing for overlap with:");
+      nbox.print();
+    }
+    if (bbox->overlap(nbox)) {
+      blobs->shallow_clear();
+      if (debug)
+        tprintf("Final box overlaps nearest\n");
+      return;
+    }
+  }
+}
+
+// For each blob in this grid, Finds the textline direction to be horizontal
+// or vertical according to distance to neighbours and 1st and 2nd order
+// neighbours. Non-text tends to end up without a definite direction.
+// Result is setting of the neighbours and vert_possible/horz_possible
+// flags in the BLOBNBOXes currently in this grid.
+// This function is called more than once if page orientation is uncertain,
+// so display_if_debugging is true on the final call to display the results.
+void StrokeWidth::FindTextlineFlowDirection(PageSegMode pageseg_mode,
+                                            bool display_if_debugging) {
+  BlobGridSearch gsearch(this);
+  BLOBNBOX* bbox;
+  // For every bbox in the grid, set its neighbours.
+  gsearch.StartFullSearch();
+  while ((bbox = gsearch.NextFullSearch()) != nullptr) {
+    SetNeighbours(false, display_if_debugging, bbox);
+  }
+  // Where vertical or horizontal wins by a big margin, clarify it.
+  gsearch.StartFullSearch();
+  while ((bbox = gsearch.NextFullSearch()) != nullptr) {
+    SimplifyObviousNeighbours(bbox);
+  }
+  // Now try to make the blobs only vertical or horizontal using neighbours.
+  gsearch.StartFullSearch();
+  while ((bbox = gsearch.NextFullSearch()) != nullptr) {
+    if (FindingVerticalOnly(pageseg_mode)) {
+      bbox->set_vert_possible(true);
+      bbox->set_horz_possible(false);
+    } else if (FindingHorizontalOnly(pageseg_mode)) {
+      bbox->set_vert_possible(false);
+      bbox->set_horz_possible(true);
+    } else {
+      SetNeighbourFlows(bbox);
+    }
+  }
+#ifndef GRAPHICS_DISABLED
+  if ((textord_tabfind_show_strokewidths  && display_if_debugging) ||
+      textord_tabfind_show_strokewidths > 1) {
+    initial_widths_win_ = DisplayGoodBlobs("InitialStrokewidths", 400, 0);
+  }
+#endif
+  // Improve flow direction with neighbours.
+  gsearch.StartFullSearch();
+  while ((bbox = gsearch.NextFullSearch()) != nullptr) {
+    SmoothNeighbourTypes(pageseg_mode, false, bbox);
+  }
+  // Now allow reset of firm values to fix renegades.
+  gsearch.StartFullSearch();
+  while ((bbox = gsearch.NextFullSearch()) != nullptr) {
+    SmoothNeighbourTypes(pageseg_mode, true, bbox);
+  }
+  // Repeat.
+  gsearch.StartFullSearch();
+  while ((bbox = gsearch.NextFullSearch()) != nullptr) {
+    SmoothNeighbourTypes(pageseg_mode, true, bbox);
+  }
+#ifndef GRAPHICS_DISABLED
+  if ((textord_tabfind_show_strokewidths  && display_if_debugging) ||
+      textord_tabfind_show_strokewidths > 1) {
+    widths_win_ = DisplayGoodBlobs("ImprovedStrokewidths", 800, 0);
+  }
+#endif
+}
+
+// Sets the neighbours and good_stroke_neighbours members of the blob by
+// searching close on all 4 sides.
+// When finding leader dots/dashes, there is a slightly different rule for
+// what makes a good neighbour.
+void StrokeWidth::SetNeighbours(bool leaders, bool activate_line_trap,
+                                BLOBNBOX* blob) {
+  int line_trap_count = 0;
+  for (int dir = 0; dir < BND_COUNT; ++dir) {
+    auto bnd = static_cast<BlobNeighbourDir>(dir);
+    line_trap_count += FindGoodNeighbour(bnd, leaders, blob);
+  }
+  if (line_trap_count > 0 && activate_line_trap) {
+    // It looks like a line so isolate it by clearing its neighbours.
+    blob->ClearNeighbours();
+    const TBOX& box = blob->bounding_box();
+    blob->set_region_type(box.width() > box.height() ? BRT_HLINE : BRT_VLINE);
+  }
+}
+
+
+// Sets the good_stroke_neighbours member of the blob if it has a
+// GoodNeighbour on the given side.
+// Also sets the neighbour in the blob, whether or not a good one is found.
+// Returns the number of blobs in the nearby search area that would lead us to
+// believe that this blob is a line separator.
+// Leaders get extra special lenient treatment.
+int StrokeWidth::FindGoodNeighbour(BlobNeighbourDir dir, bool leaders,
+                                   BLOBNBOX* blob) {
+  // Search for neighbours that overlap vertically.
+  TBOX blob_box = blob->bounding_box();
+  bool debug = AlignedBlob::WithinTestRegion(2, blob_box.left(),
+                                             blob_box.bottom());
+  if (debug) {
+    tprintf("FGN in dir %d for blob:", dir);
+    blob_box.print();
+  }
+  int top = blob_box.top();
+  int bottom = blob_box.bottom();
+  int left = blob_box.left();
+  int right = blob_box.right();
+  int width = right - left;
+  int height = top - bottom;
+
+  // A trap to detect lines tests for the min dimension of neighbours
+  // being larger than a multiple of the min dimension of the line
+  // and the larger dimension being smaller than a fraction of the max
+  // dimension of the line.
+  int line_trap_max = std::max(width, height) / kLineTrapLongest;
+  int line_trap_min = std::min(width, height) * kLineTrapShortest;
+  int line_trap_count = 0;
+
+  int min_good_overlap = (dir == BND_LEFT || dir == BND_RIGHT)
+                       ? height / 2 : width / 2;
+  int min_decent_overlap = (dir == BND_LEFT || dir == BND_RIGHT)
+                       ? height / 3 : width / 3;
+  if (leaders)
+    min_good_overlap = min_decent_overlap = 1;
+
+  int search_pad = static_cast<int>(
+      sqrt(static_cast<double>(width * height)) * kNeighbourSearchFactor);
+  if (gridsize() > search_pad)
+    search_pad = gridsize();
+  TBOX search_box = blob_box;
+  // Pad the search in the appropriate direction.
+  switch (dir) {
+  case BND_LEFT:
+    search_box.set_left(search_box.left() - search_pad);
+    break;
+  case BND_RIGHT:
+    search_box.set_right(search_box.right() + search_pad);
+    break;
+  case BND_BELOW:
+    search_box.set_bottom(search_box.bottom() - search_pad);
+    break;
+  case BND_ABOVE:
+    search_box.set_top(search_box.top() + search_pad);
+    break;
+  case BND_COUNT:
+    return 0;
+  }
+
+  BlobGridSearch rectsearch(this);
+  rectsearch.StartRectSearch(search_box);
+  BLOBNBOX* best_neighbour = nullptr;
+  double best_goodness = 0.0;
+  bool best_is_good = false;
+  BLOBNBOX* neighbour;
+  while ((neighbour = rectsearch.NextRectSearch()) != nullptr) {
+    TBOX nbox = neighbour->bounding_box();
+    if (neighbour == blob)
+      continue;
+    int mid_x = (nbox.left() + nbox.right()) / 2;
+    if (mid_x < blob->left_rule() || mid_x > blob->right_rule())
+      continue;  // In a different column.
+    if (debug) {
+      tprintf("Neighbour at:");
+      nbox.print();
+    }
+
+    // Last-minute line detector. There is a small upper limit to the line
+    // width accepted by the morphological line detector.
+    int n_width = nbox.width();
+    int n_height = nbox.height();
+    if (std::min(n_width, n_height) > line_trap_min &&
+            std::max(n_width, n_height) < line_trap_max)
+      ++line_trap_count;
+    // Heavily joined text, such as Arabic may have very different sizes when
+    // looking at the maxes, but the heights may be almost identical, so check
+    // for a difference in height if looking sideways or width vertically.
+    if (TabFind::VeryDifferentSizes(std::max(n_width, n_height),
+                                    std::max(width, height)) &&
+        (((dir == BND_LEFT || dir ==BND_RIGHT) &&
+            TabFind::DifferentSizes(n_height, height)) ||
+         ((dir == BND_BELOW || dir ==BND_ABOVE) &&
+             TabFind::DifferentSizes(n_width, width)))) {
+      if (debug) tprintf("Bad size\n");
+      continue;  // Could be a different font size or non-text.
+    }
+    // Amount of vertical overlap between the blobs.
+    int overlap;
+    // If the overlap is along the short side of the neighbour, and it
+    // is fully overlapped, then perp_overlap holds the length of the long
+    // side of the neighbour. A measure to include hyphens and dashes as
+    // legitimate neighbours.
+    int perp_overlap;
+    int gap;
+    if (dir == BND_LEFT || dir == BND_RIGHT) {
+      overlap = std::min(static_cast<int>(nbox.top()), top) - std::max(static_cast<int>(nbox.bottom()), bottom);
+      if (overlap == nbox.height() && nbox.width() > nbox.height())
+        perp_overlap = nbox.width();
+      else
+        perp_overlap = overlap;
+      gap = dir == BND_LEFT ? left - nbox.left() : nbox.right() - right;
+      if (gap <= 0) {
+        if (debug) tprintf("On wrong side\n");
+        continue;  // On the wrong side.
+      }
+      gap -= n_width;
+    } else {
+      overlap = std::min(static_cast<int>(nbox.right()), right) - std::max(static_cast<int>(nbox.left()), left);
+      if (overlap == nbox.width() && nbox.height() > nbox.width())
+        perp_overlap = nbox.height();
+      else
+        perp_overlap = overlap;
+      gap = dir == BND_BELOW ? bottom - nbox.bottom() : nbox.top() - top;
+      if (gap <= 0) {
+        if (debug) tprintf("On wrong side\n");
+        continue;  // On the wrong side.
+      }
+      gap -= n_height;
+    }
+    if (-gap > overlap) {
+      if (debug) tprintf("Overlaps wrong way\n");
+      continue;  // Overlaps the wrong way.
+    }
+    if (perp_overlap < min_decent_overlap) {
+      if (debug) tprintf("Doesn't overlap enough\n");
+      continue;  // Doesn't overlap enough.
+    }
+    bool bad_sizes = TabFind::DifferentSizes(height, n_height) &&
+                     TabFind::DifferentSizes(width, n_width);
+    bool is_good = overlap >= min_good_overlap && !bad_sizes &&
+                   blob->MatchingStrokeWidth(*neighbour,
+                                             kStrokeWidthFractionTolerance,
+                                             kStrokeWidthTolerance);
+    // Best is a fuzzy combination of gap, overlap and is good.
+    // Basically if you make one thing twice as good without making
+    // anything else twice as bad, then it is better.
+    if (gap < 1) gap = 1;
+    double goodness = (1.0 + is_good) * overlap / gap;
+    if (debug) {
+      tprintf("goodness = %g vs best of %g, good=%d, overlap=%d, gap=%d\n",
+              goodness, best_goodness, is_good, overlap, gap);
+    }
+    if (goodness > best_goodness) {
+      best_neighbour = neighbour;
+      best_goodness = goodness;
+      best_is_good = is_good;
+    }
+  }
+  blob->set_neighbour(dir, best_neighbour, best_is_good);
+  return line_trap_count;
+}
+
+// Helper to get a list of 1st-order neighbours.
+static void ListNeighbours(const BLOBNBOX* blob,
+                           BLOBNBOX_CLIST* neighbours) {
+  for (int dir = 0; dir < BND_COUNT; ++dir) {
+    auto bnd = static_cast<BlobNeighbourDir>(dir);
+    BLOBNBOX* neighbour = blob->neighbour(bnd);
+    if (neighbour != nullptr) {
+      neighbours->add_sorted(SortByBoxLeft<BLOBNBOX>, true, neighbour);
+    }
+  }
+}
+
+// Helper to get a list of 1st and 2nd order neighbours.
+static void List2ndNeighbours(const BLOBNBOX* blob,
+                              BLOBNBOX_CLIST* neighbours) {
+  ListNeighbours(blob, neighbours);
+  for (int dir = 0; dir < BND_COUNT; ++dir) {
+    auto bnd = static_cast<BlobNeighbourDir>(dir);
+    BLOBNBOX* neighbour = blob->neighbour(bnd);
+    if (neighbour != nullptr) {
+      ListNeighbours(neighbour, neighbours);
+    }
+  }
+}
+
+// Helper to get a list of 1st, 2nd and 3rd order neighbours.
+static void List3rdNeighbours(const BLOBNBOX* blob,
+                              BLOBNBOX_CLIST* neighbours) {
+  List2ndNeighbours(blob, neighbours);
+  for (int dir = 0; dir < BND_COUNT; ++dir) {
+    auto bnd = static_cast<BlobNeighbourDir>(dir);
+    BLOBNBOX* neighbour = blob->neighbour(bnd);
+    if (neighbour != nullptr) {
+      List2ndNeighbours(neighbour, neighbours);
+    }
+  }
+}
+
+// Helper to count the evidence for verticalness or horizontalness
+// in a list of neighbours.
+static void CountNeighbourGaps(bool debug, BLOBNBOX_CLIST* neighbours,
+                               int* pure_h_count, int* pure_v_count) {
+  if (neighbours->length() <= kMostlyOneDirRatio)
+    return;
+  BLOBNBOX_C_IT it(neighbours);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    BLOBNBOX* blob = it.data();
+    int h_min, h_max, v_min, v_max;
+    blob->MinMaxGapsClipped(&h_min, &h_max, &v_min, &v_max);
+    if (debug)
+      tprintf("Hgaps [%d,%d], vgaps [%d,%d]:", h_min, h_max, v_min, v_max);
+    if (h_max < v_min ||
+        blob->leader_on_left() || blob->leader_on_right()) {
+      // Horizontal gaps are clear winners. Count a pure horizontal.
+      ++*pure_h_count;
+      if (debug) tprintf("Horz at:");
+    } else if (v_max < h_min) {
+      // Vertical gaps are clear winners. Clear a pure vertical.
+      ++*pure_v_count;
+      if (debug) tprintf("Vert at:");
+    } else {
+      if (debug) tprintf("Neither at:");
+    }
+    if (debug)
+      blob->bounding_box().print();
+  }
+}
+
+// Makes the blob to be only horizontal or vertical where evidence
+// is clear based on gaps of 2nd order neighbours, or definite individual
+// blobs.
+void StrokeWidth::SetNeighbourFlows(BLOBNBOX* blob) {
+  if (blob->DefiniteIndividualFlow())
+    return;
+  bool debug = AlignedBlob::WithinTestRegion(2, blob->bounding_box().left(),
+                                             blob->bounding_box().bottom());
+  if (debug) {
+    tprintf("SetNeighbourFlows (current flow=%d, type=%d) on:",
+            blob->flow(), blob->region_type());
+    blob->bounding_box().print();
+  }
+  BLOBNBOX_CLIST neighbours;
+  List3rdNeighbours(blob, &neighbours);
+  // The number of pure horizontal and vertical neighbours.
+  int pure_h_count = 0;
+  int pure_v_count = 0;
+  CountNeighbourGaps(debug, &neighbours, &pure_h_count, &pure_v_count);
+  if (debug) {
+    HandleClick(blob->bounding_box().left() + 1,
+                blob->bounding_box().bottom() + 1);
+    tprintf("SetFlows: h_count=%d, v_count=%d\n",
+            pure_h_count, pure_v_count);
+  }
+  if (!neighbours.empty()) {
+    blob->set_vert_possible(true);
+    blob->set_horz_possible(true);
+    if (pure_h_count > 2 * pure_v_count) {
+      // Horizontal gaps are clear winners. Clear vertical neighbours.
+      blob->set_vert_possible(false);
+    } else if (pure_v_count > 2 * pure_h_count) {
+      // Vertical gaps are clear winners. Clear horizontal neighbours.
+      blob->set_horz_possible(false);
+    }
+  } else {
+    // Lonely blob. Can't tell its flow direction.
+    blob->set_vert_possible(false);
+    blob->set_horz_possible(false);
+  }
+}
+
+
+// Helper to count the number of horizontal and vertical blobs in a list.
+static void CountNeighbourTypes(BLOBNBOX_CLIST* neighbours,
+                                int* pure_h_count, int* pure_v_count) {
+  BLOBNBOX_C_IT it(neighbours);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    BLOBNBOX* blob = it.data();
+    if (blob->UniquelyHorizontal())
+      ++*pure_h_count;
+    if (blob->UniquelyVertical())
+      ++*pure_v_count;
+  }
+}
+
+// Nullify the neighbours in the wrong directions where the direction
+// is clear-cut based on a distance margin. Good for isolating vertical
+// text from neighbouring horizontal text.
+void StrokeWidth::SimplifyObviousNeighbours(BLOBNBOX* blob) {
+  // Case 1: We have text that is likely several characters, blurry and joined
+  //         together.
+  if ((blob->bounding_box().width() > 3 * blob->area_stroke_width() &&
+       blob->bounding_box().height() > 3 * blob->area_stroke_width())) {
+    // The blob is complex (not stick-like).
+    if (blob->bounding_box().width() > 4 * blob->bounding_box().height()) {
+      // Horizontal conjoined text.
+      blob->set_neighbour(BND_ABOVE, nullptr, false);
+      blob->set_neighbour(BND_BELOW, nullptr, false);
+      return;
+    }
+    if (blob->bounding_box().height() > 4 * blob->bounding_box().width()) {
+      // Vertical conjoined text.
+      blob->set_neighbour(BND_LEFT, nullptr, false);
+      blob->set_neighbour(BND_RIGHT, nullptr, false);
+      return;
+    }
+  }
+
+  // Case 2: This blob is likely a single character.
+  int margin = gridsize() / 2;
+  int h_min, h_max, v_min, v_max;
+  blob->MinMaxGapsClipped(&h_min, &h_max, &v_min, &v_max);
+  if ((h_max + margin < v_min && h_max < margin / 2) ||
+      blob->leader_on_left() || blob->leader_on_right()) {
+    // Horizontal gaps are clear winners. Clear vertical neighbours.
+    blob->set_neighbour(BND_ABOVE, nullptr, false);
+    blob->set_neighbour(BND_BELOW, nullptr, false);
+  } else if (v_max + margin < h_min && v_max < margin / 2) {
+    // Vertical gaps are clear winners. Clear horizontal neighbours.
+    blob->set_neighbour(BND_LEFT, nullptr, false);
+    blob->set_neighbour(BND_RIGHT, nullptr, false);
+  }
+}
+
+// Smoothes the vertical/horizontal type of the blob based on the
+// 2nd-order neighbours. If reset_all is true, then all blobs are
+// changed. Otherwise, only ambiguous blobs are processed.
+void StrokeWidth::SmoothNeighbourTypes(PageSegMode pageseg_mode, bool reset_all,
+                                       BLOBNBOX* blob) {
+  if ((blob->vert_possible() && blob->horz_possible()) || reset_all) {
+    // There are both horizontal and vertical so try to fix it.
+    BLOBNBOX_CLIST neighbours;
+    List2ndNeighbours(blob, &neighbours);
+    // The number of pure horizontal and vertical neighbours.
+    int pure_h_count = 0;
+    int pure_v_count = 0;
+    CountNeighbourTypes(&neighbours, &pure_h_count, &pure_v_count);
+    if (AlignedBlob::WithinTestRegion(2, blob->bounding_box().left(),
+                                      blob->bounding_box().bottom())) {
+      HandleClick(blob->bounding_box().left() + 1,
+                  blob->bounding_box().bottom() + 1);
+      tprintf("pure_h=%d, pure_v=%d\n",
+              pure_h_count, pure_v_count);
+    }
+    if (pure_h_count > pure_v_count && !FindingVerticalOnly(pageseg_mode)) {
+      // Horizontal gaps are clear winners. Clear vertical neighbours.
+      blob->set_vert_possible(false);
+      blob->set_horz_possible(true);
+    } else if (pure_v_count > pure_h_count &&
+               !FindingHorizontalOnly(pageseg_mode)) {
+      // Vertical gaps are clear winners. Clear horizontal neighbours.
+      blob->set_horz_possible(false);
+      blob->set_vert_possible(true);
+    }
+  } else if (AlignedBlob::WithinTestRegion(2, blob->bounding_box().left(),
+                                    blob->bounding_box().bottom())) {
+    HandleClick(blob->bounding_box().left() + 1,
+                blob->bounding_box().bottom() + 1);
+    tprintf("Clean on pass 3!\n");
+  }
+}
+
+// Partition creation. Accumulates vertical and horizontal text chains,
+// puts the remaining blobs in as unknowns, and then merges/splits to
+// minimize overlap and smoothes the types with neighbours and the color
+// image if provided. rerotation is used to rotate the coordinate space
+// back to the nontext_map_ image.
+// If find_problems is true, detects possible noise pollution by the amount
+// of partition overlap that is created by the diacritics. If excessive, the
+// noise is separated out into diacritic blobs, and PFR_NOISE is returned.
+// [TODO(rays): if the partition overlap is caused by heavy skew, deskews
+// the components, saves the skew_angle and returns PFR_SKEW.] If the return
+// is not PFR_OK, the job is incomplete, and FindInitialPartitions must be
+// called again after cleaning up the partly done work.
+PartitionFindResult StrokeWidth::FindInitialPartitions(
+    PageSegMode pageseg_mode, const FCOORD& rerotation, bool find_problems,
+    TO_BLOCK* block, BLOBNBOX_LIST* diacritic_blobs,
+    ColPartitionGrid* part_grid, ColPartition_LIST* big_parts,
+    FCOORD* skew_angle) {
+  if (!FindingHorizontalOnly(pageseg_mode)) FindVerticalTextChains(part_grid);
+  if (!FindingVerticalOnly(pageseg_mode)) FindHorizontalTextChains(part_grid);
+#ifndef GRAPHICS_DISABLED
+  if (textord_tabfind_show_strokewidths) {
+    chains_win_ = MakeWindow(0, 400, "Initial text chains");
+    part_grid->DisplayBoxes(chains_win_);
+    projection_->DisplayProjection();
+  }
+#endif
+  if (find_problems) {
+    // TODO(rays) Do something to find skew, set skew_angle and return if there
+    // is some.
+  }
+  part_grid->SplitOverlappingPartitions(big_parts);
+  EasyMerges(part_grid);
+  RemoveLargeUnusedBlobs(block, part_grid, big_parts);
+  TBOX grid_box(bleft(), tright());
+  while (part_grid->GridSmoothNeighbours(BTFT_CHAIN, nontext_map_, grid_box,
+                                         rerotation));
+  while (part_grid->GridSmoothNeighbours(BTFT_NEIGHBOURS, nontext_map_,
+                                         grid_box, rerotation));
+  int pre_overlap = part_grid->ComputeTotalOverlap(nullptr);
+  TestDiacritics(part_grid, block);
+  MergeDiacritics(block, part_grid);
+  if (find_problems && diacritic_blobs != nullptr &&
+      DetectAndRemoveNoise(pre_overlap, grid_box, block, part_grid,
+                           diacritic_blobs)) {
+    return PFR_NOISE;
+  }
+#ifndef GRAPHICS_DISABLED
+  if (textord_tabfind_show_strokewidths) {
+    textlines_win_ = MakeWindow(400, 400, "GoodTextline blobs");
+    part_grid->DisplayBoxes(textlines_win_);
+    diacritics_win_ = DisplayDiacritics("Diacritics", 0, 0, block);
+  }
+#endif
+  PartitionRemainingBlobs(pageseg_mode, part_grid);
+  part_grid->SplitOverlappingPartitions(big_parts);
+  EasyMerges(part_grid);
+  while (part_grid->GridSmoothNeighbours(BTFT_CHAIN, nontext_map_, grid_box,
+                                         rerotation));
+  while (part_grid->GridSmoothNeighbours(BTFT_NEIGHBOURS, nontext_map_,
+                                         grid_box, rerotation));
+  // Now eliminate strong stuff in a sea of the opposite.
+  while (part_grid->GridSmoothNeighbours(BTFT_STRONG_CHAIN, nontext_map_,
+                                         grid_box, rerotation));
+#ifndef GRAPHICS_DISABLED
+  if (textord_tabfind_show_strokewidths) {
+    smoothed_win_ = MakeWindow(800, 400, "Smoothed blobs");
+    part_grid->DisplayBoxes(smoothed_win_);
+  }
+#endif
+  return PFR_OK;
+}
+
+// Detects noise by a significant increase in partition overlap from
+// pre_overlap to now, and removes noise from the union of all the overlapping
+// partitions, placing the blobs in diacritic_blobs. Returns true if any noise
+// was found and removed.
+bool StrokeWidth::DetectAndRemoveNoise(int pre_overlap, const TBOX& grid_box,
+                                       TO_BLOCK* block,
+                                       ColPartitionGrid* part_grid,
+                                       BLOBNBOX_LIST* diacritic_blobs) {
+  ColPartitionGrid* noise_grid = nullptr;
+  int post_overlap = part_grid->ComputeTotalOverlap(&noise_grid);
+  if (pre_overlap == 0) pre_overlap = 1;
+  BLOBNBOX_IT diacritic_it(diacritic_blobs);
+  if (noise_grid != nullptr) {
+    if (post_overlap > pre_overlap * kNoiseOverlapGrowthFactor &&
+        post_overlap > grid_box.area() * kNoiseOverlapAreaFactor) {
+      // This is noisy enough to fix.
+#ifndef GRAPHICS_DISABLED
+      if (textord_tabfind_show_strokewidths) {
+        ScrollView* noise_win = MakeWindow(1000, 500, "Noise Areas");
+        noise_grid->DisplayBoxes(noise_win);
+      }
+#endif
+      part_grid->DeleteNonLeaderParts();
+      BLOBNBOX_IT blob_it(&block->noise_blobs);
+      ColPartitionGridSearch rsearch(noise_grid);
+      for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+        BLOBNBOX* blob = blob_it.data();
+        blob->ClearNeighbours();
+        if (!blob->IsDiacritic() || blob->owner() != nullptr)
+          continue;  // Not a noise candidate.
+        TBOX search_box(blob->bounding_box());
+        search_box.pad(gridsize(), gridsize());
+        rsearch.StartRectSearch(search_box);
+        ColPartition* part = rsearch.NextRectSearch();
+        if (part != nullptr) {
+          // Consider blob as possible noise.
+          blob->set_owns_cblob(true);
+          blob->compute_bounding_box();
+          diacritic_it.add_after_then_move(blob_it.extract());
+        }
+      }
+      noise_grid->DeleteParts();
+      delete noise_grid;
+      return true;
+    }
+    noise_grid->DeleteParts();
+    delete noise_grid;
+  }
+  return false;
+}
+
+// Helper verifies that blob's neighbour in direction dir is good to add to a
+// vertical text chain by returning the neighbour if it is not null, not owned,
+// and not uniquely horizontal, as well as its neighbour in the opposite
+// direction is blob.
+static BLOBNBOX* MutualUnusedVNeighbour(const BLOBNBOX* blob,
+                                        BlobNeighbourDir dir) {
+  BLOBNBOX* next_blob = blob->neighbour(dir);
+  if (next_blob == nullptr || next_blob->owner() != nullptr ||
+      next_blob->UniquelyHorizontal())
+    return nullptr;
+  if (next_blob->neighbour(DirOtherWay(dir)) == blob)
+    return next_blob;
+  return nullptr;
+}
+
+// Finds vertical chains of text-like blobs and puts them in ColPartitions.
+void StrokeWidth::FindVerticalTextChains(ColPartitionGrid* part_grid) {
+  // A PageSegMode that forces vertical textlines with the current rotation.
+  PageSegMode pageseg_mode =
+      rerotation_.y() == 0.0f ? PSM_SINGLE_BLOCK_VERT_TEXT : PSM_SINGLE_COLUMN;
+  BlobGridSearch gsearch(this);
+  BLOBNBOX* bbox;
+  gsearch.StartFullSearch();
+  while ((bbox = gsearch.NextFullSearch()) != nullptr) {
+    // Only process boxes that have no horizontal hope and have not yet
+    // been included in a chain.
+    BLOBNBOX* blob;
+    if (bbox->owner() == nullptr && bbox->UniquelyVertical() &&
+        (blob = MutualUnusedVNeighbour(bbox, BND_ABOVE)) != nullptr) {
+      // Put all the linked blobs into a ColPartition.
+      ColPartition* part = new ColPartition(BRT_VERT_TEXT, ICOORD(0, 1));
+      part->AddBox(bbox);
+      while (blob != nullptr) {
+        part->AddBox(blob);
+        blob = MutualUnusedVNeighbour(blob, BND_ABOVE);
+      }
+      blob = MutualUnusedVNeighbour(bbox, BND_BELOW);
+      while (blob != nullptr) {
+        part->AddBox(blob);
+        blob = MutualUnusedVNeighbour(blob, BND_BELOW);
+      }
+      CompletePartition(pageseg_mode, part, part_grid);
+    }
+  }
+}
+
+// Helper verifies that blob's neighbour in direction dir is good to add to a
+// horizontal text chain by returning the neighbour if it is not null, not
+// owned, and not uniquely vertical, as well as its neighbour in the opposite
+// direction is blob.
+static BLOBNBOX* MutualUnusedHNeighbour(const BLOBNBOX* blob,
+                                        BlobNeighbourDir dir) {
+  BLOBNBOX* next_blob = blob->neighbour(dir);
+  if (next_blob == nullptr || next_blob->owner() != nullptr ||
+      next_blob->UniquelyVertical())
+    return nullptr;
+  if (next_blob->neighbour(DirOtherWay(dir)) == blob)
+    return next_blob;
+  return nullptr;
+}
+
+// Finds horizontal chains of text-like blobs and puts them in ColPartitions.
+void StrokeWidth::FindHorizontalTextChains(ColPartitionGrid* part_grid) {
+  // A PageSegMode that forces horizontal textlines with the current rotation.
+  PageSegMode pageseg_mode =
+      rerotation_.y() == 0.0f ? PSM_SINGLE_COLUMN : PSM_SINGLE_BLOCK_VERT_TEXT;
+  BlobGridSearch gsearch(this);
+  BLOBNBOX* bbox;
+  gsearch.StartFullSearch();
+  while ((bbox = gsearch.NextFullSearch()) != nullptr) {
+    BLOBNBOX* blob;
+    if (bbox->owner() == nullptr && bbox->UniquelyHorizontal() &&
+        (blob = MutualUnusedHNeighbour(bbox, BND_RIGHT)) != nullptr) {
+      // Put all the linked blobs into a ColPartition.
+      ColPartition* part = new ColPartition(BRT_TEXT, ICOORD(0, 1));
+      part->AddBox(bbox);
+      while (blob != nullptr) {
+        part->AddBox(blob);
+        blob = MutualUnusedHNeighbour(blob, BND_RIGHT);
+      }
+      blob = MutualUnusedHNeighbour(bbox, BND_LEFT);
+      while (blob != nullptr) {
+        part->AddBox(blob);
+        blob = MutualUnusedVNeighbour(blob, BND_LEFT);
+      }
+      CompletePartition(pageseg_mode, part, part_grid);
+    }
+  }
+}
+
+// Finds diacritics and saves their base character in the blob.
+// The objective is to move all diacritics to the noise_blobs list, so
+// they don't mess up early textline finding/merging, or force splits
+// on textlines that overlap a bit. Blobs that become diacritics must be
+// either part of no ColPartition (nullptr owner) or in a small partition in
+// which ALL the blobs are diacritics, in which case the partition is
+// exploded (deleted) back to its blobs.
+void StrokeWidth::TestDiacritics(ColPartitionGrid* part_grid, TO_BLOCK* block) {
+  BlobGrid small_grid(gridsize(), bleft(), tright());
+  small_grid.InsertBlobList(&block->noise_blobs);
+  small_grid.InsertBlobList(&block->blobs);
+  int medium_diacritics = 0;
+  int small_diacritics = 0;
+  BLOBNBOX_IT small_it(&block->noise_blobs);
+  for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) {
+    BLOBNBOX* blob = small_it.data();
+    if (blob->owner() == nullptr && !blob->IsDiacritic() &&
+        DiacriticBlob(&small_grid, blob)) {
+      ++small_diacritics;
+    }
+  }
+  BLOBNBOX_IT blob_it(&block->blobs);
+  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+    BLOBNBOX* blob = blob_it.data();
+    if (blob->IsDiacritic()) {
+      small_it.add_to_end(blob_it.extract());
+      continue;  // Already a diacritic.
+    }
+    ColPartition* part = blob->owner();
+    if (part == nullptr && DiacriticBlob(&small_grid, blob)) {
+      ++medium_diacritics;
+      RemoveBBox(blob);
+      small_it.add_to_end(blob_it.extract());
+    } else if (part != nullptr && !part->block_owned() &&
+        part->boxes_count() < 3) {
+      // We allow blobs in small partitions to become diacritics if ALL the
+      // blobs in the partition qualify as we can then cleanly delete the
+      // partition, turn all the blobs in it to diacritics and they can be
+      // merged into the base character partition more easily than merging
+      // the partitions.
+      BLOBNBOX_C_IT box_it(part->boxes());
+      for (box_it.mark_cycle_pt(); !box_it.cycled_list() &&
+           DiacriticBlob(&small_grid, box_it.data());
+           box_it.forward());
+      if (box_it.cycled_list()) {
+        // They are all good.
+        while (!box_it.empty()) {
+          // Liberate the blob from its partition so it can be treated
+          // as a diacritic and merged explicitly with the base part.
+          // The blob is really owned by the block. The partition "owner"
+          // is nulled to allow the blob to get merged with its base character
+          // partition.
+          BLOBNBOX* box = box_it.extract();
+          box->set_owner(nullptr);
+          box_it.forward();
+          ++medium_diacritics;
+          // We remove the blob from the grid so it isn't found by subsequent
+          // searches where we might not want to include diacritics.
+          RemoveBBox(box);
+        }
+        // We only move the one blob to the small list here, but the others
+        // all get moved by the test at the top of the loop.
+        small_it.add_to_end(blob_it.extract());
+        part_grid->RemoveBBox(part);
+        delete part;
+      }
+    } else if (AlignedBlob::WithinTestRegion(2, blob->bounding_box().left(),
+                                             blob->bounding_box().bottom())) {
+      tprintf("Blob not available to be a diacritic at:");
+      blob->bounding_box().print();
+    }
+  }
+  if (textord_tabfind_show_strokewidths) {
+    tprintf("Found %d small diacritics, %d medium\n",
+            small_diacritics, medium_diacritics);
+  }
+}
+
+// Searches this grid for an appropriately close and sized neighbour of the
+// given [small] blob. If such a blob is found, the diacritic base is saved
+// in the blob and true is returned.
+// The small_grid is a secondary grid that contains the small/noise objects
+// that are not in this grid, but may be useful for determining a connection
+// between blob and its potential base character. (See DiacriticXGapFilled.)
+bool StrokeWidth::DiacriticBlob(BlobGrid* small_grid, BLOBNBOX* blob) {
+  if (BLOBNBOX::UnMergeableType(blob->region_type()) ||
+      blob->region_type() == BRT_VERT_TEXT)
+    return false;
+  TBOX small_box(blob->bounding_box());
+  bool debug = AlignedBlob::WithinTestRegion(2, small_box.left(),
+                                             small_box.bottom());
+  if (debug) {
+    tprintf("Testing blob for diacriticness at:");
+    small_box.print();
+  }
+  int x = (small_box.left() + small_box.right()) / 2;
+  int y = (small_box.bottom() + small_box.top()) / 2;
+  int grid_x, grid_y;
+  GridCoords(x, y, &grid_x, &grid_y);
+  int height = small_box.height();
+  // Setup a rectangle search to find its nearest base-character neighbour.
+  // We keep 2 different best candidates:
+  // best_x_overlap is a category of base characters that have an overlap in x
+  // (like a acute) in which we look for the least y-gap, computed using the
+  // projection to favor base characters in the same textline.
+  // best_y_overlap is a category of base characters that have no x overlap,
+  // (nominally a y-overlap is preferrecd but not essential) in which we
+  // look for the least weighted sum of x-gap and y-gap, with x-gap getting
+  // a lower weight to catch quotes at the end of a textline.
+  // NOTE that x-gap and y-gap are measured from the nearest side of the base
+  // character to the FARTHEST side of the diacritic to allow small diacritics
+  // to be a reasonable distance away, but not big diacritics.
+  BLOBNBOX* best_x_overlap = nullptr;
+  BLOBNBOX* best_y_overlap = nullptr;
+  int best_total_dist = 0;
+  int best_y_gap = 0;
+  TBOX best_xbox;
+  // TODO(rays) the search box could be setup using the projection as a guide.
+  TBOX search_box(small_box);
+  int x_pad = IntCastRounded(gridsize() * kDiacriticXPadRatio);
+  int y_pad = IntCastRounded(gridsize() * kDiacriticYPadRatio);
+  search_box.pad(x_pad, y_pad);
+  BlobGridSearch rsearch(this);
+  rsearch.SetUniqueMode(true);
+  int min_height = height * kMinDiacriticSizeRatio;
+  rsearch.StartRectSearch(search_box);
+  BLOBNBOX* neighbour;
+  while ((neighbour = rsearch.NextRectSearch()) != nullptr) {
+    if (BLOBNBOX::UnMergeableType(neighbour->region_type()) ||
+        neighbour == blob || neighbour->owner() == blob->owner())
+      continue;
+    TBOX nbox = neighbour->bounding_box();
+    if (neighbour->owner() == nullptr || neighbour->owner()->IsVerticalType() ||
+        (neighbour->flow() != BTFT_CHAIN &&
+            neighbour->flow() != BTFT_STRONG_CHAIN)) {
+      if (debug) {
+        tprintf("Neighbour not strong enough:");
+        nbox.print();
+      }
+      continue;  // Diacritics must be attached to strong text.
+    }
+    if (nbox.height() < min_height) {
+      if (debug) {
+        tprintf("Neighbour not big enough:");
+        nbox.print();
+      }
+      continue;  // Too small to be the base character.
+    }
+    int x_gap = small_box.x_gap(nbox);
+    int y_gap = small_box.y_gap(nbox);
+    int total_distance = projection_->DistanceOfBoxFromBox(small_box, nbox,
+                                                           true, denorm_,
+                                                           debug);
+    if (debug) tprintf("xgap=%d, y=%d, total dist=%d\n",
+                       x_gap, y_gap, total_distance);
+    if (total_distance >
+        neighbour->owner()->median_height() * kMaxDiacriticDistanceRatio) {
+      if (debug) {
+        tprintf("Neighbour with median size %d too far away:",
+                neighbour->owner()->median_height());
+        neighbour->bounding_box().print();
+      }
+      continue;  // Diacritics must not be too distant.
+    }
+    if (x_gap <= 0) {
+      if (debug) {
+        tprintf("Computing reduced box for :");
+        nbox.print();
+      }
+      int left = small_box.left() - small_box.width();
+      int right = small_box.right() + small_box.width();
+      nbox = neighbour->BoundsWithinLimits(left, right);
+      y_gap = small_box.y_gap(nbox);
+      if (best_x_overlap == nullptr || y_gap < best_y_gap) {
+        best_x_overlap = neighbour;
+        best_xbox = nbox;
+        best_y_gap = y_gap;
+        if (debug) {
+          tprintf("New best:");
+          nbox.print();
+        }
+      } else if (debug) {
+        tprintf("Shrunken box doesn't win:");
+        nbox.print();
+      }
+    } else if (blob->ConfirmNoTabViolation(*neighbour)) {
+      if (best_y_overlap == nullptr || total_distance < best_total_dist) {
+        if (debug) {
+          tprintf("New best y overlap:");
+          nbox.print();
+        }
+        best_y_overlap = neighbour;
+        best_total_dist = total_distance;
+      } else if (debug) {
+        tprintf("New y overlap box doesn't win:");
+        nbox.print();
+      }
+    } else if (debug) {
+      tprintf("Neighbour wrong side of a tab:");
+      nbox.print();
+    }
+  }
+  if (best_x_overlap != nullptr &&
+      (best_y_overlap == nullptr ||
+       best_xbox.major_y_overlap(best_y_overlap->bounding_box()))) {
+    blob->set_diacritic_box(best_xbox);
+    blob->set_base_char_blob(best_x_overlap);
+    if (debug) {
+      tprintf("DiacriticBlob OK! (x-overlap:");
+      small_box.print();
+      best_xbox.print();
+    }
+    return true;
+  }
+  if (best_y_overlap != nullptr &&
+      DiacriticXGapFilled(small_grid, small_box,
+                          best_y_overlap->bounding_box()) &&
+      NoNoiseInBetween(small_box, best_y_overlap->bounding_box())) {
+    blob->set_diacritic_box(best_y_overlap->bounding_box());
+    blob->set_base_char_blob(best_y_overlap);
+    if (debug) {
+      tprintf("DiacriticBlob OK! (y-overlap:");
+      small_box.print();
+      best_y_overlap->bounding_box().print();
+    }
+    return true;
+  }
+  if (debug) {
+    tprintf("DiacriticBlob fails:");
+    small_box.print();
+    tprintf("Best x+y gap = %d, y = %d\n", best_total_dist, best_y_gap);
+    if (best_y_overlap != nullptr) {
+      tprintf("XGapFilled=%d, NoiseBetween=%d\n",
+              DiacriticXGapFilled(small_grid, small_box,
+                                  best_y_overlap->bounding_box()),
+              NoNoiseInBetween(small_box, best_y_overlap->bounding_box()));
+    }
+  }
+  return false;
+}
+
+// Returns true if there is no gap between the base char and the diacritic
+// bigger than a fraction of the height of the base char:
+// Eg: line end.....'
+// The quote is a long way from the end of the line, yet it needs to be a
+// diacritic. To determine that the quote is not part of an image, or
+// a different text block, we check for other marks in the gap between
+// the base char and the diacritic.
+//                          '<--Diacritic
+// |---------|
+// |         |<-toobig-gap->
+// | Base    |<ok gap>
+// |---------|        x<-----Dot occupying gap
+// The grid is const really.
+bool StrokeWidth::DiacriticXGapFilled(BlobGrid* grid,
+                                      const TBOX& diacritic_box,
+                                      const TBOX& base_box) {
+  // Since most gaps are small, use an iterative algorithm to search the gap.
+  int max_gap = IntCastRounded(base_box.height() *
+                               kMaxDiacriticGapToBaseCharHeight);
+  TBOX occupied_box(base_box);
+  int diacritic_gap;
+  while ((diacritic_gap = diacritic_box.x_gap(occupied_box)) > max_gap) {
+    TBOX search_box(occupied_box);
+    if (diacritic_box.left() > search_box.right()) {
+      // We are looking right.
+      search_box.set_left(search_box.right());
+      search_box.set_right(search_box.left() + max_gap);
+    } else {
+      // We are looking left.
+      search_box.set_right(search_box.left());
+      search_box.set_left(search_box.left() - max_gap);
+    }
+    BlobGridSearch rsearch(grid);
+    rsearch.StartRectSearch(search_box);
+    BLOBNBOX* neighbour;
+    while ((neighbour = rsearch.NextRectSearch()) != nullptr) {
+      const TBOX& nbox = neighbour->bounding_box();
+      if (nbox.x_gap(diacritic_box) < diacritic_gap) {
+        if (nbox.left() < occupied_box.left())
+          occupied_box.set_left(nbox.left());
+        if (nbox.right() > occupied_box.right())
+          occupied_box.set_right(nbox.right());
+        break;
+      }
+    }
+    if (neighbour == nullptr)
+      return false;  // Found a big gap.
+  }
+  return true;  // The gap was filled.
+}
+
+// Merges diacritics with the ColPartition of the base character blob.
+void StrokeWidth::MergeDiacritics(TO_BLOCK* block,
+                                  ColPartitionGrid* part_grid) {
+  BLOBNBOX_IT small_it(&block->noise_blobs);
+  for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) {
+    BLOBNBOX* blob = small_it.data();
+    if (blob->base_char_blob() != nullptr) {
+      ColPartition* part = blob->base_char_blob()->owner();
+      // The base character must be owned by a partition and that partition
+      // must not be on the big_parts list (not block owned).
+      if (part != nullptr && !part->block_owned() && blob->owner() == nullptr &&
+          blob->IsDiacritic()) {
+        // The partition has to be removed from the grid and reinserted
+        // because its bounding box may change.
+        part_grid->RemoveBBox(part);
+        part->AddBox(blob);
+        blob->set_region_type(part->blob_type());
+        blob->set_flow(part->flow());
+        blob->set_owner(part);
+        part_grid->InsertBBox(true, true, part);
+      }
+      // Set all base chars to nullptr before any blobs get deleted.
+      blob->set_base_char_blob(nullptr);
+    }
+  }
+}
+
+// Any blobs on the large_blobs list of block that are still unowned by a
+// ColPartition, are probably drop-cap or vertically touching so the blobs
+// are removed to the big_parts list and treated separately.
+void StrokeWidth::RemoveLargeUnusedBlobs(TO_BLOCK* block,
+                                         ColPartitionGrid* part_grid,
+                                         ColPartition_LIST* big_parts) {
+  BLOBNBOX_IT large_it(&block->large_blobs);
+  for (large_it.mark_cycle_pt(); !large_it.cycled_list(); large_it.forward()) {
+    BLOBNBOX* blob = large_it.data();
+    ColPartition* big_part = blob->owner();
+    if (big_part == nullptr) {
+      // Large blobs should have gone into partitions by now if they are
+      // genuine characters, so move any unowned ones out to the big parts
+      // list. This will include drop caps and vertically touching characters.
+      ColPartition::MakeBigPartition(blob, big_parts);
+    }
+  }
+}
+
+// All remaining unused blobs are put in individual ColPartitions.
+void StrokeWidth::PartitionRemainingBlobs(PageSegMode pageseg_mode,
+                                          ColPartitionGrid* part_grid) {
+  BlobGridSearch gsearch(this);
+  BLOBNBOX* bbox;
+  int prev_grid_x = -1;
+  int prev_grid_y = -1;
+  BLOBNBOX_CLIST cell_list;
+  BLOBNBOX_C_IT cell_it(&cell_list);
+  bool cell_all_noise = true;
+  gsearch.StartFullSearch();
+  while ((bbox = gsearch.NextFullSearch()) != nullptr) {
+    int grid_x = gsearch.GridX();
+    int grid_y = gsearch.GridY();
+    if (grid_x != prev_grid_x || grid_y != prev_grid_y) {
+      // New cell. Process old cell.
+      MakePartitionsFromCellList(pageseg_mode, cell_all_noise, part_grid,
+                                 &cell_list);
+      cell_it.set_to_list(&cell_list);
+      prev_grid_x = grid_x;
+      prev_grid_y = grid_y;
+      cell_all_noise = true;
+    }
+    if (bbox->owner() == nullptr) {
+      cell_it.add_to_end(bbox);
+      if (bbox->flow() != BTFT_NONTEXT)
+        cell_all_noise = false;
+    } else {
+      cell_all_noise = false;
+    }
+  }
+  MakePartitionsFromCellList(pageseg_mode, cell_all_noise, part_grid,
+                             &cell_list);
+}
+
+// If combine, put all blobs in the cell_list into a single partition, otherwise
+// put each one into its own partition.
+void StrokeWidth::MakePartitionsFromCellList(PageSegMode pageseg_mode,
+                                             bool combine,
+                                             ColPartitionGrid* part_grid,
+                                             BLOBNBOX_CLIST* cell_list) {
+  if (cell_list->empty())
+    return;
+  BLOBNBOX_C_IT cell_it(cell_list);
+  if (combine) {
+    BLOBNBOX* bbox = cell_it.extract();
+    ColPartition* part = new ColPartition(bbox->region_type(), ICOORD(0, 1));
+    part->AddBox(bbox);
+    part->set_flow(bbox->flow());
+    for (cell_it.forward(); !cell_it.empty(); cell_it.forward()) {
+      part->AddBox(cell_it.extract());
+    }
+    CompletePartition(pageseg_mode, part, part_grid);
+  } else {
+    for (; !cell_it.empty(); cell_it.forward()) {
+      BLOBNBOX* bbox = cell_it.extract();
+      ColPartition* part = new ColPartition(bbox->region_type(), ICOORD(0, 1));
+      part->set_flow(bbox->flow());
+      part->AddBox(bbox);
+      CompletePartition(pageseg_mode, part, part_grid);
+    }
+  }
+}
+
+// Helper function to finish setting up a ColPartition and insert into
+// part_grid.
+void StrokeWidth::CompletePartition(PageSegMode pageseg_mode,
+                                    ColPartition* part,
+                                    ColPartitionGrid* part_grid) {
+  part->ComputeLimits();
+  TBOX box = part->bounding_box();
+  bool debug = AlignedBlob::WithinTestRegion(2, box.left(),
+                                             box.bottom());
+  int value = projection_->EvaluateColPartition(*part, denorm_, debug);
+  // Override value if pageseg_mode disagrees.
+  if (value > 0 && FindingVerticalOnly(pageseg_mode)) {
+    value = part->boxes_count() == 1 ? 0 : -2;
+  } else if (value < 0 && FindingHorizontalOnly(pageseg_mode)) {
+    value = part->boxes_count() == 1 ? 0 : 2;
+  }
+  part->SetRegionAndFlowTypesFromProjectionValue(value);
+  part->ClaimBoxes();
+  part_grid->InsertBBox(true, true, part);
+}
+
+// Merge partitions where the merge appears harmless.
+// As this
+void StrokeWidth::EasyMerges(ColPartitionGrid* part_grid) {
+  using namespace std::placeholders;  // for _1, _2
+  part_grid->Merges(
+      std::bind(&StrokeWidth::OrientationSearchBox, this, _1, _2),
+      std::bind(&StrokeWidth::ConfirmEasyMerge, this, _1, _2));
+}
+
+// Compute a search box based on the orientation of the partition.
+// Returns true if a suitable box can be calculated.
+// Callback for EasyMerges.
+bool StrokeWidth::OrientationSearchBox(ColPartition* part, TBOX* box) {
+  if (part->IsVerticalType()) {
+    box->set_top(box->top() + box->width());
+    box->set_bottom(box->bottom() - box->width());
+  } else {
+    box->set_left(box->left() - box->height());
+    box->set_right(box->right() + box->height());
+  }
+  return true;
+}
+
+// Merge confirmation callback for EasyMerges.
+bool StrokeWidth::ConfirmEasyMerge(const ColPartition* p1,
+                                   const ColPartition* p2) {
+  ASSERT_HOST(p1 != nullptr && p2 != nullptr);
+  ASSERT_HOST(!p1->IsEmpty() && !p2->IsEmpty());
+  if ((p1->flow() == BTFT_NONTEXT && p2->flow() >= BTFT_CHAIN) ||
+      (p1->flow() >= BTFT_CHAIN && p2->flow() == BTFT_NONTEXT))
+    return false;  // Don't merge confirmed image with text.
+  if ((p1->IsVerticalType() || p2->IsVerticalType()) &&
+       p1->HCoreOverlap(*p2) <= 0 &&
+       ((!p1->IsSingleton() &&
+         !p2->IsSingleton()) ||
+        !p1->bounding_box().major_overlap(p2->bounding_box())))
+    return false;  // Overlap must be in the text line.
+  if ((p1->IsHorizontalType() || p2->IsHorizontalType()) &&
+      p1->VCoreOverlap(*p2) <= 0 &&
+      ((!p1->IsSingleton() &&
+        !p2->IsSingleton()) ||
+       (!p1->bounding_box().major_overlap(p2->bounding_box()) &&
+        !p1->OKDiacriticMerge(*p2, false) &&
+        !p2->OKDiacriticMerge(*p1, false))))
+    return false;  // Overlap must be in the text line.
+  if (!p1->ConfirmNoTabViolation(*p2))
+    return false;
+  if (p1->flow() <= BTFT_NONTEXT && p2->flow() <= BTFT_NONTEXT)
+    return true;
+  return NoNoiseInBetween(p1->bounding_box(), p2->bounding_box());
+}
+
+// Returns true if there is no significant noise in between the boxes.
+bool StrokeWidth::NoNoiseInBetween(const TBOX& box1, const TBOX& box2) const {
+  return ImageFind::BlankImageInBetween(box1, box2, grid_box_, rerotation_,
+                                        nontext_map_);
+}
+
+#ifndef GRAPHICS_DISABLED
+
+/** Displays the blobs colored according to the number of good neighbours
+ * and the vertical/horizontal flow.
+ */
+ScrollView* StrokeWidth::DisplayGoodBlobs(const char* window_name,
+                                          int x, int y) {
+  auto window = MakeWindow(x, y, window_name);
+  // For every blob in the grid, display it.
+  window->Brush(ScrollView::NONE);
+
+  // For every bbox in the grid, display it.
+  BlobGridSearch gsearch(this);
+  gsearch.StartFullSearch();
+  BLOBNBOX* bbox;
+  while ((bbox = gsearch.NextFullSearch()) != nullptr) {
+    const TBOX& box = bbox->bounding_box();
+    int left_x = box.left();
+    int right_x = box.right();
+    int top_y = box.top();
+    int bottom_y = box.bottom();
+    int goodness = bbox->GoodTextBlob();
+    BlobRegionType blob_type = bbox->region_type();
+    if (bbox->UniquelyVertical())
+      blob_type = BRT_VERT_TEXT;
+    if (bbox->UniquelyHorizontal())
+      blob_type = BRT_TEXT;
+    BlobTextFlowType flow = bbox->flow();
+    if (flow == BTFT_NONE) {
+      if (goodness == 0)
+        flow = BTFT_NEIGHBOURS;
+      else if (goodness == 1)
+        flow = BTFT_CHAIN;
+      else
+        flow = BTFT_STRONG_CHAIN;
+    }
+    window->Pen(BLOBNBOX::TextlineColor(blob_type, flow));
+    window->Rectangle(left_x, bottom_y, right_x, top_y);
+  }
+  window->Update();
+  return window;
+}
+
+static void DrawDiacriticJoiner(const BLOBNBOX* blob, ScrollView* window) {
+  const TBOX& blob_box(blob->bounding_box());
+  int top = std::max(static_cast<int>(blob_box.top()), blob->base_char_top());
+  int bottom = std::min(static_cast<int>(blob_box.bottom()), blob->base_char_bottom());
+  int x = (blob_box.left() + blob_box.right()) / 2;
+  window->Line(x, top, x, bottom);
+}
+
+// Displays blobs colored according to whether or not they are diacritics.
+ScrollView* StrokeWidth::DisplayDiacritics(const char* window_name,
+                                           int x, int y, TO_BLOCK* block) {
+  auto window = MakeWindow(x, y, window_name);
+  // For every blob in the grid, display it.
+  window->Brush(ScrollView::NONE);
+
+  BLOBNBOX_IT it(&block->blobs);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    BLOBNBOX* blob = it.data();
+    if (blob->IsDiacritic()) {
+      window->Pen(ScrollView::GREEN);
+      DrawDiacriticJoiner(blob, window);
+    } else {
+      window->Pen(blob->BoxColor());
+    }
+    const TBOX& box = blob->bounding_box();
+    window->Rectangle(box.left(), box. bottom(), box.right(), box.top());
+  }
+  it.set_to_list(&block->noise_blobs);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    BLOBNBOX* blob = it.data();
+    if (blob->IsDiacritic()) {
+      window->Pen(ScrollView::GREEN);
+      DrawDiacriticJoiner(blob, window);
+    } else {
+      window->Pen(ScrollView::WHITE);
+    }
+    const TBOX& box = blob->bounding_box();
+    window->Rectangle(box.left(), box. bottom(), box.right(), box.top());
+  }
+  window->Update();
+  return window;
+}
+
+#endif // !GRAPHICS_DISABLED
+
+}  // namespace tesseract.
diff --git a/tesseract/src/textord/strokewidth.h b/tesseract/src/textord/strokewidth.h
new file mode 100644
index 00000000..81b07c55
--- /dev/null
+++ b/tesseract/src/textord/strokewidth.h
@@ -0,0 +1,355 @@
+///////////////////////////////////////////////////////////////////////
+// File:        strokewidth.h
+// Description: Subclass of BBGrid to find uniformity of strokewidth.
+// Author:      Ray Smith
+// Created:     Mon Mar 31 16:17:01 PST 2008
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TEXTORD_STROKEWIDTH_H_
+#define TESSERACT_TEXTORD_STROKEWIDTH_H_
+
+#include "blobbox.h"        // BlobNeighourDir.
+#include "blobgrid.h"         // Base class.
+#include "colpartitiongrid.h"
+#include "textlineprojection.h"
+
+class DENORM;
+class ScrollView;
+class TO_BLOCK;
+
+namespace tesseract {
+
+class ColPartition_LIST;
+class TabFind;
+class TextlineProjection;
+
+// Misc enums to clarify bool arguments for direction-controlling args.
+enum LeftOrRight {
+  LR_LEFT,
+  LR_RIGHT
+};
+
+// Return value from FindInitialPartitions indicates detection of severe
+// skew or noise.
+enum PartitionFindResult {
+  PFR_OK,    // Everything is OK.
+  PFR_SKEW,  // Skew was detected and rotated.
+  PFR_NOISE  // Noise was detected and removed.
+};
+
+/**
+ * The StrokeWidth class holds all the normal and large blobs.
+ * It is used to find good large blobs and move them to the normal blobs
+ * by virtue of having a reasonable strokewidth compatible neighbour.
+ */
+class StrokeWidth : public BlobGrid {
+ public:
+  StrokeWidth(int gridsize, const ICOORD& bleft, const ICOORD& tright);
+  ~StrokeWidth() override;
+
+  // Sets the neighbours member of the medium-sized blobs in the block.
+  // Searches on 4 sides of each blob for similar-sized, similar-strokewidth
+  // blobs and sets pointers to the good neighbours.
+  void SetNeighboursOnMediumBlobs(TO_BLOCK* block);
+
+  // Sets the neighbour/textline writing direction members of the medium
+  // and large blobs with optional repair of broken CJK characters first.
+  // Repair of broken CJK is needed here because broken CJK characters
+  // can fool the textline direction detection algorithm.
+  void FindTextlineDirectionAndFixBrokenCJK(PageSegMode pageseg_mode,
+                                            bool cjk_merge,
+                                            TO_BLOCK* input_block);
+
+  // To save computation, the process of generating partitions is broken
+  // into the following 4 steps:
+  // TestVerticalTextDirection
+  // CorrectForRotation (used only if a rotation is to be applied)
+  // FindLeaderPartitions
+  // GradeBlobsIntoPartitions.
+  // These functions are all required, in sequence, except for
+  // CorrectForRotation, which is not needed if no rotation is applied.
+
+  // Types all the blobs as vertical or horizontal text or unknown and
+  // returns true if the majority are vertical.
+  // If the blobs are rotated, it is necessary to call CorrectForRotation
+  // after rotating everything, otherwise the work done here will be enough.
+  // If osd_blobs is not null, a list of blobs from the dominant textline
+  // direction are returned for use in orientation and script detection.
+  // find_vertical_text_ratio should be textord_tabfind_vertical_text_ratio.
+  bool TestVerticalTextDirection(double find_vertical_text_ratio,
+                                 TO_BLOCK* block,
+                                 BLOBNBOX_CLIST* osd_blobs);
+
+  // Corrects the data structures for the given rotation.
+  void CorrectForRotation(const FCOORD& rerotation,
+                          ColPartitionGrid* part_grid);
+
+  // Finds leader partitions and inserts them into the give grid.
+  void FindLeaderPartitions(TO_BLOCK* block,
+                            ColPartitionGrid* part_grid);
+
+  // Finds and marks noise those blobs that look like bits of vertical lines
+  // that would otherwise screw up layout analysis.
+  void RemoveLineResidue(ColPartition_LIST* big_part_list);
+
+  // Types all the blobs as vertical text or horizontal text or unknown and
+  // puts them into initial ColPartitions in the supplied part_grid.
+  // rerotation determines how to get back to the image coordinates from the
+  // blob coordinates (since they may have been rotated for vertical text).
+  // block is the single block for the whole page or rectangle to be OCRed.
+  // nontext_pix (full-size), is a binary mask used to prevent merges across
+  // photo/text boundaries. It is not kept beyond this function.
+  // denorm provides a mapping back to the image from the current blob
+  // coordinate space.
+  // projection provides a measure of textline density over the image and
+  // provides functions to assist with diacritic detection. It should be a
+  // pointer to a new TextlineProjection, and will be setup here.
+  // part_grid is the output grid of textline partitions.
+  // Large blobs that cause overlap are put in separate partitions and added
+  // to the big_parts list.
+  void GradeBlobsIntoPartitions(PageSegMode pageseg_mode,
+                                const FCOORD& rerotation, TO_BLOCK* block,
+                                Pix* nontext_pix, const DENORM* denorm,
+                                bool cjk_script, TextlineProjection* projection,
+                                BLOBNBOX_LIST* diacritic_blobs,
+                                ColPartitionGrid* part_grid,
+                                ColPartition_LIST* big_parts);
+
+  // Handles a click event in a display window.
+  void HandleClick(int x, int y) override;
+
+ private:
+  // Computes the noise_density_ by summing the number of elements in a
+  // neighbourhood of each grid cell.
+  void ComputeNoiseDensity(TO_BLOCK* block, TabFind* line_grid);
+
+  // Detects and marks leader dots/dashes.
+  //    Leaders are horizontal chains of small or noise blobs that look
+  //    monospace according to ColPartition::MarkAsLeaderIfMonospaced().
+  // Detected leaders become the only occupants of the block->small_blobs list.
+  // Non-leader small blobs get moved to the blobs list.
+  // Non-leader noise blobs remain singletons in the noise list.
+  // All small and noise blobs in high density regions are marked BTFT_NONTEXT.
+  // block is the single block for the whole page or rectangle to be OCRed.
+  // leader_parts is the output.
+  void FindLeadersAndMarkNoise(TO_BLOCK* block,
+                               ColPartition_LIST* leader_parts);
+
+  /** Inserts the block blobs (normal and large) into this grid.
+   * Blobs remain owned by the block. */
+  void InsertBlobs(TO_BLOCK* block);
+
+  // Fix broken CJK characters, using the fake joined blobs mechanism.
+  // Blobs are really merged, ie the master takes all the outlines and the
+  // others are deleted.
+  // Returns true if sufficient blobs are merged that it may be worth running
+  // again, due to a better estimate of character size.
+  bool FixBrokenCJK(TO_BLOCK* block);
+
+  // Collect blobs that overlap or are within max_dist of the input bbox.
+  // Return them in the list of blobs and expand the bbox to be the union
+  // of all the boxes. not_this is excluded from the search, as are blobs
+  // that cause the merged box to exceed max_size in either dimension.
+  void AccumulateOverlaps(const BLOBNBOX* not_this, bool debug,
+                          int max_size, int max_dist,
+                          TBOX* bbox, BLOBNBOX_CLIST* blobs);
+
+  // For each blob in this grid, Finds the textline direction to be horizontal
+  // or vertical according to distance to neighbours and 1st and 2nd order
+  // neighbours. Non-text tends to end up without a definite direction.
+  // Result is setting of the neighbours and vert_possible/horz_possible
+  // flags in the BLOBNBOXes currently in this grid.
+  // This function is called more than once if page orientation is uncertain,
+  // so display_if_debugging is true on the final call to display the results.
+  void FindTextlineFlowDirection(PageSegMode pageseg_mode,
+                                 bool display_if_debugging);
+
+  // Sets the neighbours and good_stroke_neighbours members of the blob by
+  // searching close on all 4 sides.
+  // When finding leader dots/dashes, there is a slightly different rule for
+  // what makes a good neighbour.
+  // If activate_line_trap, then line-like objects are found and isolated.
+  void SetNeighbours(bool leaders, bool activate_line_trap, BLOBNBOX* blob);
+
+  // Sets the good_stroke_neighbours member of the blob if it has a
+  // GoodNeighbour on the given side.
+  // Also sets the neighbour in the blob, whether or not a good one is found.
+  // Return value is the number of neighbours in the line trap size range.
+  // Leaders get extra special lenient treatment.
+  int FindGoodNeighbour(BlobNeighbourDir dir, bool leaders, BLOBNBOX* blob);
+
+  // Makes the blob to be only horizontal or vertical where evidence
+  // is clear based on gaps of 2nd order neighbours.
+  void SetNeighbourFlows(BLOBNBOX* blob);
+
+  // Nullify the neighbours in the wrong directions where the direction
+  // is clear-cut based on a distance margin. Good for isolating vertical
+  // text from neighbouring horizontal text.
+  void SimplifyObviousNeighbours(BLOBNBOX* blob);
+
+  // Smoothes the vertical/horizontal type of the blob based on the
+  // 2nd-order neighbours. If reset_all is true, then all blobs are
+  // changed. Otherwise, only ambiguous blobs are processed.
+  void SmoothNeighbourTypes(PageSegMode pageseg_mode, bool desperate,
+                            BLOBNBOX* blob);
+
+  // Checks the left or right side of the given leader partition and sets the
+  // (opposite) leader_on_right or leader_on_left flags for blobs
+  // that are next to the given side of the given leader partition.
+  void MarkLeaderNeighbours(const ColPartition* part, LeftOrRight side);
+
+  // Partition creation. Accumulates vertical and horizontal text chains,
+  // puts the remaining blobs in as unknowns, and then merges/splits to
+  // minimize overlap and smoothes the types with neighbours and the color
+  // image if provided. rerotation is used to rotate the coordinate space
+  // back to the nontext_map_ image.
+  // If find_problems is true, detects possible noise pollution by the amount
+  // of partition overlap that is created by the diacritics. If excessive, the
+  // noise is separated out into diacritic blobs, and PFR_NOISE is returned.
+  // [TODO(rays): if the partition overlap is caused by heavy skew, deskews
+  // the components, saves the skew_angle and returns PFR_SKEW.] If the return
+  // is not PFR_OK, the job is incomplete, and FindInitialPartitions must be
+  // called again after cleaning up the partly done work.
+  PartitionFindResult FindInitialPartitions(PageSegMode pageseg_mode,
+                                            const FCOORD& rerotation,
+                                            bool find_problems, TO_BLOCK* block,
+                                            BLOBNBOX_LIST* diacritic_blobs,
+                                            ColPartitionGrid* part_grid,
+                                            ColPartition_LIST* big_parts,
+                                            FCOORD* skew_angle);
+  // Detects noise by a significant increase in partition overlap from
+  // pre_overlap to now, and removes noise from the union of all the overlapping
+  // partitions, placing the blobs in diacritic_blobs. Returns true if any noise
+  // was found and removed.
+  bool DetectAndRemoveNoise(int pre_overlap, const TBOX& grid_box,
+                            TO_BLOCK* block, ColPartitionGrid* part_grid,
+                            BLOBNBOX_LIST* diacritic_blobs);
+  // Finds vertical chains of text-like blobs and puts them in ColPartitions.
+  void FindVerticalTextChains(ColPartitionGrid* part_grid);
+  // Finds horizontal chains of text-like blobs and puts them in ColPartitions.
+  void FindHorizontalTextChains(ColPartitionGrid* part_grid);
+  // Finds diacritics and saves their base character in the blob.
+  void TestDiacritics(ColPartitionGrid* part_grid, TO_BLOCK* block);
+  // Searches this grid for an appropriately close and sized neighbour of the
+  // given [small] blob. If such a blob is found, the diacritic base is saved
+  // in the blob and true is returned.
+  // The small_grid is a secondary grid that contains the small/noise objects
+  // that are not in this grid, but may be useful for determining a connection
+  // between blob and its potential base character. (See DiacriticXGapFilled.)
+  bool DiacriticBlob(BlobGrid* small_grid, BLOBNBOX* blob);
+  // Returns true if there is no gap between the base char and the diacritic
+  // bigger than a fraction of the height of the base char:
+  // Eg: line end.....'
+  // The quote is a long way from the end of the line, yet it needs to be a
+  // diacritic. To determine that the quote is not part of an image, or
+  // a different text block, we check for other marks in the gap between
+  // the base char and the diacritic.
+  //                          '<--Diacritic
+  // |---------|
+  // |         |<-toobig-gap->
+  // | Base    |<ok gap>
+  // |---------|        x<-----Dot occupying gap
+  // The grid is const really.
+  bool DiacriticXGapFilled(BlobGrid* grid, const TBOX& diacritic_box,
+                           const TBOX& base_box);
+  // Merges diacritics with the ColPartition of the base character blob.
+  void MergeDiacritics(TO_BLOCK* block, ColPartitionGrid* part_grid);
+  // Any blobs on the large_blobs list of block that are still unowned by a
+  // ColPartition, are probably drop-cap or vertically touching so the blobs
+  // are removed to the big_parts list and treated separately.
+  void RemoveLargeUnusedBlobs(TO_BLOCK* block,
+                              ColPartitionGrid* part_grid,
+                              ColPartition_LIST* big_parts);
+
+    // All remaining unused blobs are put in individual ColPartitions.
+  void PartitionRemainingBlobs(PageSegMode pageseg_mode,
+                               ColPartitionGrid* part_grid);
+
+  // If combine, put all blobs in the cell_list into a single partition,
+  // otherwise put each one into its own partition.
+  void MakePartitionsFromCellList(PageSegMode pageseg_mode, bool combine,
+                                  ColPartitionGrid* part_grid,
+                                  BLOBNBOX_CLIST* cell_list);
+
+  // Helper function to finish setting up a ColPartition and insert into
+  // part_grid.
+  void CompletePartition(PageSegMode pageseg_mode, ColPartition* part,
+                         ColPartitionGrid* part_grid);
+
+  // Helper returns true if we are looking only for vertical textlines,
+  // taking into account any rotation that has been done.
+  bool FindingVerticalOnly(PageSegMode pageseg_mode) const {
+    if (rerotation_.y() == 0.0f) {
+      return pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT;
+    }
+    return !PSM_ORIENTATION_ENABLED(pageseg_mode) &&
+           pageseg_mode != PSM_SINGLE_BLOCK_VERT_TEXT;
+  }
+  // Helper returns true if we are looking only for horizontal textlines,
+  // taking into account any rotation that has been done.
+  bool FindingHorizontalOnly(PageSegMode pageseg_mode) const {
+    if (rerotation_.y() == 0.0f) {
+      return !PSM_ORIENTATION_ENABLED(pageseg_mode) &&
+             pageseg_mode != PSM_SINGLE_BLOCK_VERT_TEXT;
+    }
+    return pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT;
+  }
+
+  // Merge partitions where the merge appears harmless.
+  void EasyMerges(ColPartitionGrid* part_grid);
+
+  // Compute a search box based on the orientation of the partition.
+  // Returns true if a suitable box can be calculated.
+  // Callback for EasyMerges.
+  bool OrientationSearchBox(ColPartition* part, TBOX* box);
+
+  // Merge confirmation callback for EasyMerges.
+  bool ConfirmEasyMerge(const ColPartition* p1, const ColPartition* p2);
+
+  // Returns true if there is no significant noise in between the boxes.
+  bool NoNoiseInBetween(const TBOX& box1, const TBOX& box2) const;
+
+  // Displays the blobs colored according to the number of good neighbours
+  // and the vertical/horizontal flow.
+  ScrollView* DisplayGoodBlobs(const char* window_name, int x, int y);
+
+  // Displays blobs colored according to whether or not they are diacritics.
+  ScrollView* DisplayDiacritics(const char* window_name,
+                                int x, int y, TO_BLOCK* block);
+
+ private:
+  // Image map of photo/noise areas on the page. Borrowed pointer (not owned.)
+  Pix* nontext_map_;
+  // Textline projection map. Borrowed pointer.
+  TextlineProjection* projection_;
+  // DENORM used by projection_ to get back to image coords. Borrowed pointer.
+  const DENORM* denorm_;
+  // Bounding box of the grid.
+  TBOX grid_box_;
+  // Rerotation to get back to the original image.
+  FCOORD rerotation_;
+  // Windows for debug display.
+  ScrollView* leaders_win_;
+  ScrollView* initial_widths_win_;
+  ScrollView* widths_win_;
+  ScrollView* chains_win_;
+  ScrollView* diacritics_win_;
+  ScrollView* textlines_win_;
+  ScrollView* smoothed_win_;
+};
+
+}  // namespace tesseract.
+
+#endif  // TESSERACT_TEXTORD_STROKEWIDTH_H_
diff --git a/tesseract/src/textord/tabfind.cpp b/tesseract/src/textord/tabfind.cpp
new file mode 100644
index 00000000..c88421c1
--- /dev/null
+++ b/tesseract/src/textord/tabfind.cpp
@@ -0,0 +1,1438 @@
+///////////////////////////////////////////////////////////////////////
+// File:        tabfind.cpp
+// Description: Subclass of BBGrid to find vertically aligned blobs.
+// Author:      Ray Smith
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "tabfind.h"
+#include "alignedblob.h"
+#include "colpartitiongrid.h"
+#include "detlinefit.h"
+#include "host.h"              // for NearlyEqual
+#include "linefind.h"
+
+#include <algorithm>
+
+namespace tesseract {
+
+// Multiple of box size to search for initial gaps.
+const int kTabRadiusFactor = 5;
+// Min and Max multiple of height to search vertically when extrapolating.
+const int kMinVerticalSearch = 3;
+const int kMaxVerticalSearch = 12;
+const int kMaxRaggedSearch = 25;
+// Minimum number of lines in a column width to make it interesting.
+const int kMinLinesInColumn = 10;
+// Minimum width of a column to be interesting.
+const int kMinColumnWidth = 200;
+// Minimum fraction of total column lines for a column to be interesting.
+const double kMinFractionalLinesInColumn = 0.125;
+// Fraction of height used as alignment tolerance for aligned tabs.
+const double kAlignedFraction = 0.03125;
+// Maximum gutter width (in absolute inch) that we care about
+const double kMaxGutterWidthAbsolute = 2.00;
+// Multiplier of gridsize for min gutter width of TT_MAYBE_RAGGED blobs.
+const int kRaggedGutterMultiple = 5;
+// Min aspect ratio of tall objects to be considered a separator line.
+// (These will be ignored in searching the gutter for obstructions.)
+const double kLineFragmentAspectRatio = 10.0;
+// Min number of points to accept after evaluation.
+const int kMinEvaluatedTabs = 3;
+// Up to 30 degrees is allowed for rotations of diacritic blobs.
+// Keep this value slightly larger than kCosSmallAngle in blobbox.cpp
+// so that the assert there never fails.
+const double kCosMaxSkewAngle = 0.866025;
+
+static BOOL_VAR(textord_tabfind_show_initialtabs, false, "Show tab candidates");
+static BOOL_VAR(textord_tabfind_show_finaltabs, false, "Show tab vectors");
+
+TabFind::TabFind(int gridsize, const ICOORD& bleft, const ICOORD& tright,
+                 TabVector_LIST* vlines, int vertical_x, int vertical_y,
+                 int resolution)
+  : AlignedBlob(gridsize, bleft, tright),
+    resolution_(resolution),
+    image_origin_(0, tright.y() - 1),
+    v_it_(&vectors_) {
+  width_cb_ = nullptr;
+  v_it_.add_list_after(vlines);
+  SetVerticalSkewAndParallelize(vertical_x, vertical_y);
+  using namespace std::placeholders;  // for _1
+  width_cb_ = std::bind(&TabFind::CommonWidth, this, _1);
+}
+
+TabFind::~TabFind() {
+}
+
+///////////////// PUBLIC functions (mostly used by TabVector). //////////////
+
+// Insert a list of blobs into the given grid (not necessarily this).
+// If take_ownership is true, then the blobs are removed from the source list.
+// See InsertBlob for the other arguments.
+// It would seem to make more sense to swap this and grid, but this way
+// around allows grid to not be derived from TabFind, eg a ColPartitionGrid,
+// while the grid that provides the tab stops(this) has to be derived from
+// TabFind.
+void TabFind::InsertBlobsToGrid(bool h_spread, bool v_spread,
+                                BLOBNBOX_LIST* blobs,
+                                BBGrid<BLOBNBOX, BLOBNBOX_CLIST,
+                                       BLOBNBOX_C_IT>* grid) {
+  BLOBNBOX_IT blob_it(blobs);
+  int b_count = 0;
+  int reject_count = 0;
+  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+    BLOBNBOX* blob = blob_it.data();
+//    if (InsertBlob(true, true, blob, grid)) {
+    if (InsertBlob(h_spread, v_spread, blob, grid)) {
+      ++b_count;
+    } else {
+      ++reject_count;
+    }
+  }
+  if (textord_debug_tabfind) {
+    tprintf("Inserted %d blobs into grid, %d rejected.\n",
+            b_count, reject_count);
+  }
+}
+
+// Insert a single blob into the given grid (not necessarily this).
+// If h_spread, then all cells covered horizontally by the box are
+// used, otherwise, just the bottom-left. Similarly for v_spread.
+// A side effect is that the left and right rule edges of the blob are
+// set according to the tab vectors in this (not grid).
+bool TabFind::InsertBlob(bool h_spread, bool v_spread, BLOBNBOX* blob,
+                         BBGrid<BLOBNBOX, BLOBNBOX_CLIST,
+                                BLOBNBOX_C_IT>* grid) {
+  TBOX box = blob->bounding_box();
+  blob->set_left_rule(LeftEdgeForBox(box, false, false));
+  blob->set_right_rule(RightEdgeForBox(box, false, false));
+  blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false));
+  blob->set_right_crossing_rule(RightEdgeForBox(box, true, false));
+  if (blob->joined_to_prev())
+    return false;
+  grid->InsertBBox(h_spread, v_spread, blob);
+  return true;
+}
+
+// Calls SetBlobRuleEdges for all the blobs in the given block.
+void TabFind::SetBlockRuleEdges(TO_BLOCK* block) {
+  SetBlobRuleEdges(&block->blobs);
+  SetBlobRuleEdges(&block->small_blobs);
+  SetBlobRuleEdges(&block->noise_blobs);
+  SetBlobRuleEdges(&block->large_blobs);
+}
+
+// Sets the left and right rule and crossing_rules for the blobs in the given
+// list by fiding the next outermost tabvectors for each blob.
+void TabFind::SetBlobRuleEdges(BLOBNBOX_LIST* blobs) {
+  BLOBNBOX_IT blob_it(blobs);
+  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+    BLOBNBOX* blob = blob_it.data();
+    TBOX box = blob->bounding_box();
+    blob->set_left_rule(LeftEdgeForBox(box, false, false));
+    blob->set_right_rule(RightEdgeForBox(box, false, false));
+    blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false));
+    blob->set_right_crossing_rule(RightEdgeForBox(box, true, false));
+  }
+}
+
+// Returns the gutter width of the given TabVector between the given y limits.
+// Also returns x-shift to be added to the vector to clear any intersecting
+// blobs. The shift is deducted from the returned gutter.
+// If ignore_unmergeables is true, then blobs of UnMergeableType are
+// ignored as if they don't exist. (Used for text on image.)
+// max_gutter_width is used as the maximum width worth searching for in case
+// there is nothing near the TabVector.
+int TabFind::GutterWidth(int bottom_y, int top_y, const TabVector& v,
+                         bool ignore_unmergeables, int max_gutter_width,
+                         int* required_shift) {
+  bool right_to_left = v.IsLeftTab();
+  int bottom_x = v.XAtY(bottom_y);
+  int top_x = v.XAtY(top_y);
+  int start_x = right_to_left ? std::max(top_x, bottom_x) : std::min(top_x, bottom_x);
+  BlobGridSearch sidesearch(this);
+  sidesearch.StartSideSearch(start_x, bottom_y, top_y);
+  int min_gap = max_gutter_width;
+  *required_shift = 0;
+  BLOBNBOX* blob = nullptr;
+  while ((blob = sidesearch.NextSideSearch(right_to_left)) != nullptr) {
+    const TBOX& box = blob->bounding_box();
+    if (box.bottom() >= top_y || box.top() <= bottom_y)
+      continue;  // Doesn't overlap enough.
+    if (box.height() >= gridsize() * 2 &&
+        box.height() > box.width() * kLineFragmentAspectRatio) {
+      // Skip likely separator line residue.
+      continue;
+    }
+    if (ignore_unmergeables && BLOBNBOX::UnMergeableType(blob->region_type()))
+      continue;  // Skip non-text if required.
+    int mid_y = (box.bottom() + box.top()) / 2;
+    // We use the x at the mid-y so that the required_shift guarantees
+    // to clear all the blobs on the tab-stop. If we use the min/max
+    // of x at top/bottom of the blob, then exactness would be required,
+    // which is not a good thing.
+    int tab_x = v.XAtY(mid_y);
+    int gap;
+    if (right_to_left) {
+      gap = tab_x - box.right();
+      if (gap < 0 && box.left() - tab_x < *required_shift)
+        *required_shift = box.left() - tab_x;
+    } else {
+      gap = box.left() - tab_x;
+      if (gap < 0 && box.right() - tab_x > *required_shift)
+        *required_shift = box.right() - tab_x;
+    }
+    if (gap > 0 && gap < min_gap)
+      min_gap = gap;
+  }
+  // Result may be negative, in which case,  this is a really bad tabstop.
+  return min_gap - abs(*required_shift);
+}
+
+// Find the gutter width and distance to inner neighbour for the given blob.
+void TabFind::GutterWidthAndNeighbourGap(int tab_x, int mean_height,
+                                         int max_gutter, bool left,
+                                         BLOBNBOX* bbox, int* gutter_width,
+                                         int* neighbour_gap) {
+  const TBOX& box = bbox->bounding_box();
+  // The gutter and internal sides of the box.
+  int gutter_x = left ? box.left() : box.right();
+  int internal_x = left ? box.right() : box.left();
+  // On ragged edges, the gutter side of the box is away from the tabstop.
+  int tab_gap = left ? gutter_x - tab_x : tab_x - gutter_x;
+  *gutter_width = max_gutter;
+  // If the box is away from the tabstop, we need to increase
+  // the allowed gutter width.
+  if (tab_gap > 0)
+    *gutter_width += tab_gap;
+  bool debug = WithinTestRegion(2, box.left(), box.bottom());
+  if (debug)
+    tprintf("Looking in gutter\n");
+  // Find the nearest blob on the outside of the column.
+  BLOBNBOX* gutter_bbox = AdjacentBlob(bbox, left,
+                                       bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0,
+                                       *gutter_width, box.top(), box.bottom());
+  if (gutter_bbox != nullptr) {
+    const TBOX& gutter_box = gutter_bbox->bounding_box();
+    *gutter_width = left ? tab_x - gutter_box.right()
+                        : gutter_box.left() - tab_x;
+  }
+  if (*gutter_width >= max_gutter) {
+    // If there is no box because a tab was in the way, get the tab coord.
+    TBOX gutter_box(box);
+    if (left) {
+      gutter_box.set_left(tab_x - max_gutter - 1);
+      gutter_box.set_right(tab_x - max_gutter);
+      int tab_gutter = RightEdgeForBox(gutter_box, true, false);
+      if (tab_gutter < tab_x - 1)
+        *gutter_width = tab_x - tab_gutter;
+    } else {
+      gutter_box.set_left(tab_x + max_gutter);
+      gutter_box.set_right(tab_x + max_gutter + 1);
+      int tab_gutter = LeftEdgeForBox(gutter_box, true, false);
+      if (tab_gutter > tab_x + 1)
+        *gutter_width = tab_gutter - tab_x;
+    }
+  }
+  if (*gutter_width > max_gutter)
+    *gutter_width = max_gutter;
+  // Now look for a neighbour on the inside.
+  if (debug)
+    tprintf("Looking for neighbour\n");
+  BLOBNBOX* neighbour = AdjacentBlob(bbox, !left,
+                                     bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0,
+                                     *gutter_width, box.top(), box.bottom());
+  int neighbour_edge = left ? RightEdgeForBox(box, true, false)
+                            : LeftEdgeForBox(box, true, false);
+  if (neighbour != nullptr) {
+    const TBOX& n_box = neighbour->bounding_box();
+    if (debug) {
+      tprintf("Found neighbour:");
+      n_box.print();
+    }
+    if (left && n_box.left() < neighbour_edge)
+      neighbour_edge = n_box.left();
+    else if (!left && n_box.right() > neighbour_edge)
+      neighbour_edge = n_box.right();
+  }
+  *neighbour_gap = left ? neighbour_edge - internal_x
+                        : internal_x - neighbour_edge;
+}
+
+// Return the x-coord that corresponds to the right edge for the given
+// box. If there is a rule line to the right that vertically overlaps it,
+// then return the x-coord of the rule line, otherwise return the right
+// edge of the page. For details see RightTabForBox below.
+int TabFind::RightEdgeForBox(const TBOX& box, bool crossing, bool extended) {
+  TabVector* v = RightTabForBox(box, crossing, extended);
+  return v == nullptr ? tright_.x() : v->XAtY((box.top() + box.bottom()) / 2);
+}
+// As RightEdgeForBox, but finds the left Edge instead.
+int TabFind::LeftEdgeForBox(const TBOX& box, bool crossing, bool extended) {
+  TabVector* v = LeftTabForBox(box, crossing, extended);
+  return v == nullptr ? bleft_.x() : v->XAtY((box.top() + box.bottom()) / 2);
+}
+
+// This comment documents how this function works.
+// For its purpose and arguments, see the comment in tabfind.h.
+// TabVectors are stored sorted by perpendicular distance of middle from
+// the global mean vertical vector. Since the individual vectors can have
+// differing directions, their XAtY for a given y is not necessarily in the
+// right order. Therefore the search has to be run with a margin.
+// The middle of a vector that passes through (x,y) cannot be higher than
+// halfway from y to the top, or lower than halfway from y to the bottom
+// of the coordinate range; therefore, the search margin is the range of
+// sort keys between these halfway points. Any vector with a sort key greater
+// than the upper margin must be to the right of x at y, and likewise any
+// vector with a sort key less than the lower margin must pass to the left
+// of x at y.
+TabVector* TabFind::RightTabForBox(const TBOX& box, bool crossing,
+                                   bool extended) {
+  if (v_it_.empty())
+    return nullptr;
+  int top_y = box.top();
+  int bottom_y = box.bottom();
+  int mid_y = (top_y + bottom_y) / 2;
+  int right = crossing ? (box.left() + box.right()) / 2 : box.right();
+  int min_key, max_key;
+  SetupTabSearch(right, mid_y, &min_key, &max_key);
+  // Position the iterator at the first TabVector with sort_key >= min_key.
+  while (!v_it_.at_first() && v_it_.data()->sort_key() >= min_key)
+    v_it_.backward();
+  while (!v_it_.at_last() && v_it_.data()->sort_key() < min_key)
+    v_it_.forward();
+  // Find the leftmost tab vector that overlaps and has XAtY(mid_y) >= right.
+  TabVector* best_v = nullptr;
+  int best_x = -1;
+  int key_limit = -1;
+  do {
+    TabVector* v = v_it_.data();
+    int x = v->XAtY(mid_y);
+    if (x >= right &&
+        (v->VOverlap(top_y, bottom_y) > 0 ||
+         (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) {
+      if (best_v == nullptr || x < best_x) {
+        best_v = v;
+        best_x = x;
+        // We can guarantee that no better vector can be found if the
+        // sort key exceeds that of the best by max_key - min_key.
+        key_limit = v->sort_key() + max_key - min_key;
+      }
+    }
+    // Break when the search is done to avoid wrapping the iterator and
+    // thereby potentially slowing the next search.
+    if (v_it_.at_last() ||
+        (best_v != nullptr && v->sort_key() > key_limit))
+      break;  // Prevent restarting list for next call.
+    v_it_.forward();
+  } while (!v_it_.at_first());
+  return best_v;
+}
+
+// As RightTabForBox, but finds the left TabVector instead.
+TabVector* TabFind::LeftTabForBox(const TBOX& box, bool crossing,
+                                  bool extended) {
+  if (v_it_.empty())
+    return nullptr;
+  int top_y = box.top();
+  int bottom_y = box.bottom();
+  int mid_y = (top_y + bottom_y) / 2;
+  int left = crossing ? (box.left() + box.right()) / 2 : box.left();
+  int min_key, max_key;
+  SetupTabSearch(left, mid_y, &min_key, &max_key);
+  // Position the iterator at the last TabVector with sort_key <= max_key.
+  while (!v_it_.at_last() && v_it_.data()->sort_key() <= max_key)
+    v_it_.forward();
+  while (!v_it_.at_first() && v_it_.data()->sort_key() > max_key) {
+    v_it_.backward();
+  }
+  // Find the rightmost tab vector that overlaps and has XAtY(mid_y) <= left.
+  TabVector* best_v = nullptr;
+  int best_x = -1;
+  int key_limit = -1;
+  do {
+    TabVector* v = v_it_.data();
+    int x = v->XAtY(mid_y);
+    if (x <= left &&
+        (v->VOverlap(top_y, bottom_y) > 0 ||
+         (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) {
+      if (best_v == nullptr || x > best_x) {
+        best_v = v;
+        best_x = x;
+        // We can guarantee that no better vector can be found if the
+        // sort key is less than that of the best by max_key - min_key.
+        key_limit = v->sort_key() - (max_key - min_key);
+      }
+    }
+    // Break when the search is done to avoid wrapping the iterator and
+    // thereby potentially slowing the next search.
+    if (v_it_.at_first() ||
+        (best_v != nullptr && v->sort_key() < key_limit))
+      break;  // Prevent restarting list for next call.
+    v_it_.backward();
+  } while (!v_it_.at_last());
+  return best_v;
+}
+
+// Return true if the given width is close to one of the common
+// widths in column_widths_.
+bool TabFind::CommonWidth(int width) {
+  width /= kColumnWidthFactor;
+  ICOORDELT_IT it(&column_widths_);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    ICOORDELT* w = it.data();
+    if (w->x() - 1 <= width && width <= w->y() + 1)
+      return true;
+  }
+  return false;
+}
+
+// Return true if the sizes are more than a
+// factor of 2 different.
+bool TabFind::DifferentSizes(int size1, int size2) {
+  return size1 > size2 * 2 || size2 > size1 * 2;
+}
+
+// Return true if the sizes are more than a
+// factor of 5 different.
+bool TabFind::VeryDifferentSizes(int size1, int size2) {
+  return size1 > size2 * 5 || size2 > size1 * 5;
+}
+
+///////////////// PROTECTED functions (used by ColumnFinder). //////////////
+
+// Top-level function to find TabVectors in an input page block.
+// Returns false if the detected skew angle is impossible.
+// Applies the detected skew angle to deskew the tabs, blobs and part_grid.
+bool TabFind::FindTabVectors(TabVector_LIST* hlines,
+                             BLOBNBOX_LIST* image_blobs, TO_BLOCK* block,
+                             int min_gutter_width,
+                             double tabfind_aligned_gap_fraction,
+                             ColPartitionGrid* part_grid,
+                             FCOORD* deskew, FCOORD* reskew) {
+  ScrollView* tab_win = FindInitialTabVectors(image_blobs, min_gutter_width,
+                                              tabfind_aligned_gap_fraction,
+                                              block);
+  ComputeColumnWidths(tab_win, part_grid);
+  TabVector::MergeSimilarTabVectors(vertical_skew_, &vectors_, this);
+  SortVectors();
+  CleanupTabs();
+  if (!Deskew(hlines, image_blobs, block, deskew, reskew))
+    return false;  // Skew angle is too large.
+  part_grid->Deskew(*deskew);
+  ApplyTabConstraints();
+  #ifndef GRAPHICS_DISABLED
+  if (textord_tabfind_show_finaltabs) {
+    tab_win = MakeWindow(640, 50, "FinalTabs");
+    DisplayBoxes(tab_win);
+    DisplayTabs("FinalTabs", tab_win);
+    tab_win = DisplayTabVectors(tab_win);
+  }
+  #endif // !GRAPHICS_DISABLED
+  return true;
+}
+
+// Top-level function to not find TabVectors in an input page block,
+// but setup for single column mode.
+void TabFind::DontFindTabVectors(BLOBNBOX_LIST* image_blobs, TO_BLOCK* block,
+                                 FCOORD* deskew, FCOORD* reskew) {
+  InsertBlobsToGrid(false, false, image_blobs, this);
+  InsertBlobsToGrid(true, false, &block->blobs, this);
+  deskew->set_x(1.0f);
+  deskew->set_y(0.0f);
+  reskew->set_x(1.0f);
+  reskew->set_y(0.0f);
+}
+
+// Cleans up the lists of blobs in the block ready for use by TabFind.
+// Large blobs that look like text are moved to the main blobs list.
+// Main blobs that are superseded by the image blobs are deleted.
+void TabFind::TidyBlobs(TO_BLOCK* block) {
+  BLOBNBOX_IT large_it = &block->large_blobs;
+  BLOBNBOX_IT blob_it = &block->blobs;
+  int b_count = 0;
+  for (large_it.mark_cycle_pt(); !large_it.cycled_list(); large_it.forward()) {
+    BLOBNBOX* large_blob = large_it.data();
+    if (large_blob->owner() != nullptr) {
+      blob_it.add_to_end(large_it.extract());
+      ++b_count;
+    }
+  }
+  if (textord_debug_tabfind) {
+    tprintf("Moved %d large blobs to normal list\n",
+            b_count);
+    #ifndef GRAPHICS_DISABLED
+    ScrollView* rej_win = MakeWindow(500, 300, "Image blobs");
+    block->plot_graded_blobs(rej_win);
+    block->plot_noise_blobs(rej_win);
+    rej_win->Update();
+    #endif // !GRAPHICS_DISABLED
+  }
+  block->DeleteUnownedNoise();
+}
+
+// Helper function to setup search limits for *TabForBox.
+void TabFind::SetupTabSearch(int x, int y, int* min_key, int* max_key) {
+  int key1 = TabVector::SortKey(vertical_skew_, x, (y + tright_.y()) / 2);
+  int key2 = TabVector::SortKey(vertical_skew_, x, (y + bleft_.y()) / 2);
+  *min_key = std::min(key1, key2);
+  *max_key = std::max(key1, key2);
+}
+
+#ifndef GRAPHICS_DISABLED
+
+ScrollView* TabFind::DisplayTabVectors(ScrollView* tab_win) {
+  // For every vector, display it.
+  TabVector_IT it(&vectors_);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    TabVector* vector = it.data();
+    vector->Display(tab_win);
+  }
+  tab_win->Update();
+  return tab_win;
+}
+
+#endif
+
+// PRIVATE CODE.
+//
+// First part of FindTabVectors, which may be used twice if the text
+// is mostly of vertical alignment.
+ScrollView* TabFind::FindInitialTabVectors(BLOBNBOX_LIST* image_blobs,
+                                           int min_gutter_width,
+                                           double tabfind_aligned_gap_fraction,
+                                           TO_BLOCK* block) {
+#ifndef GRAPHICS_DISABLED
+  if (textord_tabfind_show_initialtabs) {
+    ScrollView* line_win = MakeWindow(0, 0, "VerticalLines");
+    line_win = DisplayTabVectors(line_win);
+  }
+#endif
+  // Prepare the grid.
+  if (image_blobs != nullptr)
+    InsertBlobsToGrid(true, false, image_blobs, this);
+  InsertBlobsToGrid(true, false, &block->blobs, this);
+  ScrollView* initial_win = FindTabBoxes(min_gutter_width,
+                                         tabfind_aligned_gap_fraction);
+  FindAllTabVectors(min_gutter_width);
+
+  TabVector::MergeSimilarTabVectors(vertical_skew_, &vectors_, this);
+  SortVectors();
+  EvaluateTabs();
+#ifndef GRAPHICS_DISABLED
+  if (textord_tabfind_show_initialtabs && initial_win != nullptr)
+    initial_win = DisplayTabVectors(initial_win);
+#endif
+  MarkVerticalText();
+  return initial_win;
+}
+
+#ifndef GRAPHICS_DISABLED
+
+// Helper displays all the boxes in the given vector on the given window.
+static void DisplayBoxVector(const GenericVector<BLOBNBOX*>& boxes,
+                             ScrollView* win) {
+  for (int i = 0; i < boxes.size(); ++i) {
+    TBOX box = boxes[i]->bounding_box();
+    int left_x = box.left();
+    int right_x = box.right();
+    int top_y = box.top();
+    int bottom_y = box.bottom();
+    ScrollView::Color box_color = boxes[i]->BoxColor();
+    win->Pen(box_color);
+    win->Rectangle(left_x, bottom_y, right_x, top_y);
+  }
+  win->Update();
+}
+
+#endif // !GRAPHICS_DISABLED
+
+// For each box in the grid, decide whether it is a candidate tab-stop,
+// and if so add it to the left/right tab boxes.
+ScrollView* TabFind::FindTabBoxes(int min_gutter_width,
+                                  double tabfind_aligned_gap_fraction) {
+  left_tab_boxes_.clear();
+  right_tab_boxes_.clear();
+  // For every bbox in the grid, determine whether it uses a tab on an edge.
+  GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> gsearch(this);
+  gsearch.StartFullSearch();
+  BLOBNBOX* bbox;
+  while ((bbox = gsearch.NextFullSearch()) != nullptr) {
+    if (TestBoxForTabs(bbox, min_gutter_width, tabfind_aligned_gap_fraction)) {
+      // If it is any kind of tab, insert it into the vectors.
+      if (bbox->left_tab_type() != TT_NONE)
+        left_tab_boxes_.push_back(bbox);
+      if (bbox->right_tab_type() != TT_NONE)
+        right_tab_boxes_.push_back(bbox);
+    }
+  }
+  // Sort left tabs by left and right by right to see the outermost one first
+  // on a ragged tab.
+  left_tab_boxes_.sort(SortByBoxLeft<BLOBNBOX>);
+  right_tab_boxes_.sort(SortRightToLeft<BLOBNBOX>);
+  ScrollView* tab_win = nullptr;
+  #ifndef GRAPHICS_DISABLED
+  if (textord_tabfind_show_initialtabs) {
+    tab_win = MakeWindow(0, 100, "InitialTabs");
+    tab_win->Pen(ScrollView::BLUE);
+    tab_win->Brush(ScrollView::NONE);
+    // Display the left and right tab boxes.
+    DisplayBoxVector(left_tab_boxes_, tab_win);
+    DisplayBoxVector(right_tab_boxes_, tab_win);
+    tab_win = DisplayTabs("Tabs", tab_win);
+  }
+  #endif // !GRAPHICS_DISABLED
+  return tab_win;
+}
+
+bool TabFind::TestBoxForTabs(BLOBNBOX* bbox, int min_gutter_width,
+                             double tabfind_aligned_gap_fraction) {
+  GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> radsearch(this);
+  TBOX box = bbox->bounding_box();
+  // If there are separator lines, get the column edges.
+  int left_column_edge = bbox->left_rule();
+  int right_column_edge = bbox->right_rule();
+  // The edges of the bounding box of the blob being processed.
+  int left_x = box.left();
+  int right_x = box.right();
+  int top_y = box.top();
+  int bottom_y = box.bottom();
+  int height = box.height();
+  bool debug = WithinTestRegion(3, left_x, top_y);
+  if (debug) {
+    tprintf("Column edges for blob at (%d,%d)->(%d,%d) are [%d, %d]\n",
+            left_x, top_y, right_x, bottom_y,
+            left_column_edge, right_column_edge);
+  }
+  // Compute a search radius based on a multiple of the height.
+  int radius = (height * kTabRadiusFactor + gridsize_ - 1) / gridsize_;
+  radsearch.StartRadSearch((left_x + right_x)/2, (top_y + bottom_y)/2, radius);
+  // In Vertical Page mode, once we have an estimate of the vertical line
+  // spacing, the minimum amount of gutter space before a possible tab is
+  // increased under the assumption that column partition is always larger
+  // than line spacing.
+  int min_spacing =
+      static_cast<int>(height * tabfind_aligned_gap_fraction);
+  if (min_gutter_width > min_spacing)
+    min_spacing = min_gutter_width;
+  int min_ragged_gutter = kRaggedGutterMultiple * gridsize();
+  if (min_gutter_width > min_ragged_gutter)
+    min_ragged_gutter = min_gutter_width;
+  int target_right = left_x - min_spacing;
+  int target_left = right_x + min_spacing;
+  // We will be evaluating whether the left edge could be a left tab, and
+  // whether the right edge could be a right tab.
+  // A box can be a tab if its bool is_(left/right)_tab remains true, meaning
+  // that no blobs have been found in the gutter during the radial search.
+  // A box can also be a tab if there are objects in the gutter only above
+  // or only below, and there are aligned objects on the opposite side, but
+  // not too many unaligned objects. The maybe_(left/right)_tab_up counts
+  // aligned objects above and negatively counts unaligned objects above,
+  // and is set to -INT32_MAX if a gutter object is found above.
+  // The other 3 maybe ints work similarly for the other sides.
+  // These conditions are very strict, to minimize false positives, and really
+  // only aligned tabs and outermost ragged tab blobs will qualify, so we
+  // also have maybe_ragged_left/right with less stringent rules.
+  // A blob that is maybe_ragged_left/right will be further qualified later,
+  // using the min_ragged_gutter.
+  bool is_left_tab = true;
+  bool is_right_tab = true;
+  bool maybe_ragged_left = true;
+  bool maybe_ragged_right = true;
+  int maybe_left_tab_up = 0;
+  int maybe_right_tab_up = 0;
+  int maybe_left_tab_down = 0;
+  int maybe_right_tab_down = 0;
+  if (bbox->leader_on_left()) {
+    is_left_tab = false;
+    maybe_ragged_left = false;
+    maybe_left_tab_up = -INT32_MAX;
+    maybe_left_tab_down = -INT32_MAX;
+  }
+  if (bbox->leader_on_right()) {
+    is_right_tab = false;
+    maybe_ragged_right = false;
+    maybe_right_tab_up = -INT32_MAX;
+    maybe_right_tab_down = -INT32_MAX;
+  }
+  int alignment_tolerance = static_cast<int>(resolution_ * kAlignedFraction);
+  BLOBNBOX* neighbour = nullptr;
+  while ((neighbour = radsearch.NextRadSearch()) != nullptr) {
+    if (neighbour == bbox)
+      continue;
+    TBOX nbox = neighbour->bounding_box();
+    int n_left = nbox.left();
+    int n_right = nbox.right();
+    if (debug)
+      tprintf("Neighbour at (%d,%d)->(%d,%d)\n",
+              n_left, nbox.bottom(), n_right, nbox.top());
+    // If the neighbouring blob is the wrong side of a separator line, then it
+    // "doesn't exist" as far as we are concerned.
+    if (n_right > right_column_edge || n_left < left_column_edge ||
+        left_x < neighbour->left_rule() || right_x > neighbour->right_rule())
+      continue;  // Separator line in the way.
+    int n_mid_x = (n_left + n_right) / 2;
+    int n_mid_y = (nbox.top() + nbox.bottom()) / 2;
+    if (n_mid_x <= left_x && n_right >= target_right) {
+      if (debug)
+        tprintf("Not a left tab\n");
+      is_left_tab = false;
+      if (n_mid_y < top_y)
+        maybe_left_tab_down = -INT32_MAX;
+      if (n_mid_y > bottom_y)
+        maybe_left_tab_up = -INT32_MAX;
+    } else if (NearlyEqual(left_x, n_left, alignment_tolerance)) {
+      if (debug)
+        tprintf("Maybe a left tab\n");
+      if (n_mid_y > top_y && maybe_left_tab_up > -INT32_MAX)
+        ++maybe_left_tab_up;
+      if (n_mid_y < bottom_y && maybe_left_tab_down > -INT32_MAX)
+        ++maybe_left_tab_down;
+    } else if (n_left < left_x && n_right >= left_x) {
+      // Overlaps but not aligned so negative points on a maybe.
+      if (debug)
+        tprintf("Maybe Not a left tab\n");
+      if (n_mid_y > top_y && maybe_left_tab_up > -INT32_MAX)
+        --maybe_left_tab_up;
+      if (n_mid_y < bottom_y && maybe_left_tab_down > -INT32_MAX)
+        --maybe_left_tab_down;
+    }
+    if (n_left < left_x && nbox.y_overlap(box) && n_right >= target_right) {
+      maybe_ragged_left = false;
+      if (debug)
+        tprintf("Not a ragged left\n");
+    }
+    if (n_mid_x >= right_x && n_left <= target_left) {
+      if (debug)
+        tprintf("Not a right tab\n");
+      is_right_tab = false;
+      if (n_mid_y < top_y)
+        maybe_right_tab_down = -INT32_MAX;
+      if (n_mid_y > bottom_y)
+        maybe_right_tab_up = -INT32_MAX;
+    } else if (NearlyEqual(right_x, n_right, alignment_tolerance)) {
+      if (debug)
+        tprintf("Maybe a right tab\n");
+      if (n_mid_y > top_y && maybe_right_tab_up > -INT32_MAX)
+        ++maybe_right_tab_up;
+      if (n_mid_y < bottom_y && maybe_right_tab_down > -INT32_MAX)
+        ++maybe_right_tab_down;
+    } else if (n_right > right_x && n_left <= right_x) {
+      // Overlaps but not aligned so negative points on a maybe.
+      if (debug)
+        tprintf("Maybe Not a right tab\n");
+      if (n_mid_y > top_y && maybe_right_tab_up > -INT32_MAX)
+        --maybe_right_tab_up;
+      if (n_mid_y < bottom_y && maybe_right_tab_down > -INT32_MAX)
+        --maybe_right_tab_down;
+    }
+    if (n_right > right_x && nbox.y_overlap(box) && n_left <= target_left) {
+      maybe_ragged_right = false;
+      if (debug)
+        tprintf("Not a ragged right\n");
+    }
+    if (maybe_left_tab_down == -INT32_MAX && maybe_left_tab_up == -INT32_MAX &&
+        maybe_right_tab_down == -INT32_MAX && maybe_right_tab_up == -INT32_MAX)
+      break;
+  }
+  if (is_left_tab || maybe_left_tab_up > 1 || maybe_left_tab_down > 1) {
+    bbox->set_left_tab_type(TT_MAYBE_ALIGNED);
+  } else if (maybe_ragged_left && ConfirmRaggedLeft(bbox, min_ragged_gutter)) {
+    bbox->set_left_tab_type(TT_MAYBE_RAGGED);
+  } else {
+    bbox->set_left_tab_type(TT_NONE);
+  }
+  if (is_right_tab || maybe_right_tab_up > 1 || maybe_right_tab_down > 1) {
+    bbox->set_right_tab_type(TT_MAYBE_ALIGNED);
+  } else if (maybe_ragged_right &&
+             ConfirmRaggedRight(bbox, min_ragged_gutter)) {
+    bbox->set_right_tab_type(TT_MAYBE_RAGGED);
+  } else {
+    bbox->set_right_tab_type(TT_NONE);
+  }
+  if (debug) {
+    tprintf("Left result = %s, Right result=%s\n",
+            bbox->left_tab_type() == TT_MAYBE_ALIGNED ? "Aligned" :
+            (bbox->left_tab_type() == TT_MAYBE_RAGGED ? "Ragged" : "None"),
+            bbox->right_tab_type() == TT_MAYBE_ALIGNED ? "Aligned" :
+            (bbox->right_tab_type() == TT_MAYBE_RAGGED ? "Ragged" : "None"));
+  }
+  return bbox->left_tab_type() != TT_NONE || bbox->right_tab_type() != TT_NONE;
+}
+
+// Returns true if there is nothing in the rectangle of width min_gutter to
+// the left of bbox.
+bool TabFind::ConfirmRaggedLeft(BLOBNBOX* bbox, int min_gutter) {
+  TBOX search_box(bbox->bounding_box());
+  search_box.set_right(search_box.left());
+  search_box.set_left(search_box.left() - min_gutter);
+  return NothingYOverlapsInBox(search_box, bbox->bounding_box());
+}
+
+// Returns true if there is nothing in the rectangle of width min_gutter to
+// the right of bbox.
+bool TabFind::ConfirmRaggedRight(BLOBNBOX* bbox, int min_gutter) {
+  TBOX search_box(bbox->bounding_box());
+  search_box.set_left(search_box.right());
+  search_box.set_right(search_box.right() + min_gutter);
+  return NothingYOverlapsInBox(search_box, bbox->bounding_box());
+}
+
+// Returns true if there is nothing in the given search_box that vertically
+// overlaps target_box other than target_box itself.
+bool TabFind::NothingYOverlapsInBox(const TBOX& search_box,
+                                    const TBOX& target_box) {
+  BlobGridSearch rsearch(this);
+  rsearch.StartRectSearch(search_box);
+  BLOBNBOX* blob;
+  while ((blob = rsearch.NextRectSearch()) != nullptr) {
+    const TBOX& box = blob->bounding_box();
+    if (box.y_overlap(target_box) && !(box == target_box))
+      return false;
+  }
+  return true;
+}
+
+void TabFind::FindAllTabVectors(int min_gutter_width) {
+  // A list of vectors that will be created in estimating the skew.
+  TabVector_LIST dummy_vectors;
+  // An estimate of the vertical direction, revised as more lines are added.
+  int vertical_x = 0;
+  int vertical_y = 1;
+  // Find an estimate of the vertical direction by finding some tab vectors.
+  // Slowly up the search size until we get some vectors.
+  for (int search_size = kMinVerticalSearch; search_size < kMaxVerticalSearch;
+       search_size += kMinVerticalSearch) {
+    int vector_count = FindTabVectors(search_size, TA_LEFT_ALIGNED,
+                                      min_gutter_width,
+                                      &dummy_vectors,
+                                      &vertical_x, &vertical_y);
+    vector_count += FindTabVectors(search_size, TA_RIGHT_ALIGNED,
+                                   min_gutter_width,
+                                   &dummy_vectors,
+                                   &vertical_x, &vertical_y);
+    if (vector_count > 0)
+      break;
+  }
+  // Get rid of the test vectors and reset the types of the tabs.
+  dummy_vectors.clear();
+  for (int i = 0; i < left_tab_boxes_.size(); ++i) {
+    BLOBNBOX* bbox = left_tab_boxes_[i];
+    if (bbox->left_tab_type() == TT_CONFIRMED)
+      bbox->set_left_tab_type(TT_MAYBE_ALIGNED);
+  }
+  for (int i = 0; i < right_tab_boxes_.size(); ++i) {
+    BLOBNBOX* bbox = right_tab_boxes_[i];
+    if (bbox->right_tab_type() == TT_CONFIRMED)
+      bbox->set_right_tab_type(TT_MAYBE_ALIGNED);
+  }
+  if (textord_debug_tabfind) {
+    tprintf("Beginning real tab search with vertical = %d,%d...\n",
+            vertical_x, vertical_y);
+  }
+  // Now do the real thing ,but keep the vectors in the dummy_vectors list
+  // until they are all done, so we don't get the tab vectors confused with
+  // the rule line vectors.
+  FindTabVectors(kMaxVerticalSearch, TA_LEFT_ALIGNED, min_gutter_width,
+                 &dummy_vectors, &vertical_x, &vertical_y);
+  FindTabVectors(kMaxVerticalSearch, TA_RIGHT_ALIGNED, min_gutter_width,
+                 &dummy_vectors, &vertical_x, &vertical_y);
+  FindTabVectors(kMaxRaggedSearch, TA_LEFT_RAGGED, min_gutter_width,
+                 &dummy_vectors, &vertical_x, &vertical_y);
+  FindTabVectors(kMaxRaggedSearch, TA_RIGHT_RAGGED, min_gutter_width,
+                 &dummy_vectors, &vertical_x, &vertical_y);
+  // Now add the vectors to the vectors_ list.
+  TabVector_IT v_it(&vectors_);
+  v_it.add_list_after(&dummy_vectors);
+  // Now use the summed (mean) vertical vector as the direction for everything.
+  SetVerticalSkewAndParallelize(vertical_x, vertical_y);
+}
+
+// Helper for FindAllTabVectors finds the vectors of a particular type.
+int TabFind::FindTabVectors(int search_size_multiple, TabAlignment alignment,
+                            int min_gutter_width, TabVector_LIST* vectors,
+                            int* vertical_x, int* vertical_y) {
+  TabVector_IT vector_it(vectors);
+  int vector_count = 0;
+  // Search the right or left tab boxes, looking for tab vectors.
+  bool right = alignment == TA_RIGHT_ALIGNED || alignment == TA_RIGHT_RAGGED;
+  const GenericVector<BLOBNBOX*>& boxes = right ? right_tab_boxes_
+                                                : left_tab_boxes_;
+  for (int i = 0; i < boxes.size(); ++i) {
+    BLOBNBOX* bbox = boxes[i];
+    if ((!right && bbox->left_tab_type() == TT_MAYBE_ALIGNED) ||
+        (right && bbox->right_tab_type() == TT_MAYBE_ALIGNED)) {
+      TabVector* vector = FindTabVector(search_size_multiple, min_gutter_width,
+                                        alignment,
+                                        bbox, vertical_x, vertical_y);
+      if (vector != nullptr) {
+        ++vector_count;
+        vector_it.add_to_end(vector);
+      }
+    }
+  }
+  return vector_count;
+}
+
+// Finds a vector corresponding to a tabstop running through the
+// given box of the given alignment type.
+// search_size_multiple is a multiple of height used to control
+// the size of the search.
+// vertical_x and y are updated with an estimate of the real
+// vertical direction. (skew finding.)
+// Returns nullptr if no decent tabstop can be found.
+TabVector* TabFind::FindTabVector(int search_size_multiple,
+                                  int min_gutter_width,
+                                  TabAlignment alignment,
+                                  BLOBNBOX* bbox,
+                                  int* vertical_x, int* vertical_y) {
+  int height = std::max(static_cast<int>(bbox->bounding_box().height()), gridsize());
+  AlignedBlobParams align_params(*vertical_x, *vertical_y,
+                                 height,
+                                 search_size_multiple, min_gutter_width,
+                                 resolution_, alignment);
+  // FindVerticalAlignment is in the parent (AlignedBlob) class.
+  return FindVerticalAlignment(align_params, bbox, vertical_x, vertical_y);
+}
+
+// Set the vertical_skew_ member from the given vector and refit
+// all vectors parallel to the skew vector.
+void TabFind::SetVerticalSkewAndParallelize(int vertical_x, int vertical_y) {
+  // Fit the vertical vector into an ICOORD, which is 16 bit.
+  vertical_skew_.set_with_shrink(vertical_x, vertical_y);
+  if (textord_debug_tabfind)
+    tprintf("Vertical skew vector=(%d,%d)\n",
+            vertical_skew_.x(), vertical_skew_.y());
+  v_it_.set_to_list(&vectors_);
+  for (v_it_.mark_cycle_pt(); !v_it_.cycled_list(); v_it_.forward()) {
+    TabVector* v = v_it_.data();
+    v->Fit(vertical_skew_, true);
+  }
+  // Now sort the vectors as their direction has potentially changed.
+  SortVectors();
+}
+
+// Sort all the current vectors using the given vertical direction vector.
+void TabFind::SortVectors() {
+  vectors_.sort(TabVector::SortVectorsByKey);
+  v_it_.set_to_list(&vectors_);
+}
+
+// Evaluate all the current tab vectors.
+void TabFind::EvaluateTabs() {
+  TabVector_IT rule_it(&vectors_);
+  for (rule_it.mark_cycle_pt(); !rule_it.cycled_list(); rule_it.forward()) {
+    TabVector* tab = rule_it.data();
+    if (!tab->IsSeparator()) {
+      tab->Evaluate(vertical_skew_, this);
+      if (tab->BoxCount() < kMinEvaluatedTabs) {
+        if (textord_debug_tabfind > 2)
+          tab->Print("Too few boxes");
+        delete rule_it.extract();
+        v_it_.set_to_list(&vectors_);
+      } else if (WithinTestRegion(3, tab->startpt().x(), tab->startpt().y())) {
+        tab->Print("Evaluated tab");
+      }
+    }
+  }
+}
+
+// Trace textlines from one side to the other of each tab vector, saving
+// the most frequent column widths found in a list so that a given width
+// can be tested for being a common width with a simple callback function.
+void TabFind::ComputeColumnWidths(ScrollView* tab_win,
+                                  ColPartitionGrid* part_grid) {
+  #ifndef GRAPHICS_DISABLED
+  if (tab_win != nullptr)
+    tab_win->Pen(ScrollView::WHITE);
+  #endif // !GRAPHICS_DISABLED
+  // Accumulate column sections into a STATS
+  int col_widths_size = (tright_.x() - bleft_.x()) / kColumnWidthFactor;
+  STATS col_widths(0, col_widths_size + 1);
+  ApplyPartitionsToColumnWidths(part_grid, &col_widths);
+  #ifndef GRAPHICS_DISABLED
+  if (tab_win != nullptr) {
+    tab_win->Update();
+  }
+  #endif // !GRAPHICS_DISABLED
+  if (textord_debug_tabfind > 1)
+    col_widths.print();
+  // Now make a list of column widths.
+  MakeColumnWidths(col_widths_size, &col_widths);
+  // Turn the column width into a range.
+  ApplyPartitionsToColumnWidths(part_grid, nullptr);
+}
+
+// Finds column width and:
+//   if col_widths is not null (pass1):
+//     pair-up tab vectors with existing ColPartitions and accumulate widths.
+//   else (pass2):
+//     find the largest real partition width for each recorded column width,
+//     to be used as the minimum acceptable width.
+void TabFind::ApplyPartitionsToColumnWidths(ColPartitionGrid* part_grid,
+                                            STATS* col_widths) {
+  // For every ColPartition in the part_grid, add partners to the tabvectors
+  // and accumulate the column widths.
+  ColPartitionGridSearch gsearch(part_grid);
+  gsearch.StartFullSearch();
+  ColPartition* part;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    BLOBNBOX_C_IT blob_it(part->boxes());
+    if (blob_it.empty())
+      continue;
+    BLOBNBOX* left_blob = blob_it.data();
+    blob_it.move_to_last();
+    BLOBNBOX* right_blob = blob_it.data();
+    TabVector* left_vector = LeftTabForBox(left_blob->bounding_box(),
+                                           true, false);
+    if (left_vector == nullptr || left_vector->IsRightTab())
+      continue;
+    TabVector* right_vector = RightTabForBox(right_blob->bounding_box(),
+                                             true, false);
+    if (right_vector == nullptr || right_vector->IsLeftTab())
+      continue;
+
+    int line_left = left_vector->XAtY(left_blob->bounding_box().bottom());
+    int line_right = right_vector->XAtY(right_blob->bounding_box().bottom());
+    // Add to STATS of measurements if the width is significant.
+    int width = line_right - line_left;
+    if (col_widths != nullptr) {
+      AddPartnerVector(left_blob, right_blob, left_vector, right_vector);
+      if (width >= kMinColumnWidth)
+        col_widths->add(width / kColumnWidthFactor, 1);
+    } else {
+      width /= kColumnWidthFactor;
+      ICOORDELT_IT it(&column_widths_);
+      for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+        ICOORDELT* w = it.data();
+        if (NearlyEqual<int>(width, w->y(), 1)) {
+          int true_width = part->bounding_box().width() / kColumnWidthFactor;
+          if (true_width <= w->y() && true_width > w->x())
+            w->set_x(true_width);
+          break;
+        }
+      }
+    }
+  }
+}
+
+// Helper makes the list of common column widths in column_widths_ from the
+// input col_widths. Destroys the content of col_widths by repeatedly
+// finding the mode and erasing the peak.
+void TabFind::MakeColumnWidths(int col_widths_size, STATS* col_widths) {
+  ICOORDELT_IT w_it(&column_widths_);
+  int total_col_count = col_widths->get_total();
+  while (col_widths->get_total() > 0) {
+    int width = col_widths->mode();
+    int col_count = col_widths->pile_count(width);
+    col_widths->add(width, -col_count);
+    // Get the entire peak.
+    for (int left = width - 1; left > 0 &&
+         col_widths->pile_count(left) > 0;
+         --left) {
+      int new_count = col_widths->pile_count(left);
+      col_count += new_count;
+      col_widths->add(left, -new_count);
+    }
+    for (int right = width + 1; right < col_widths_size &&
+         col_widths->pile_count(right) > 0;
+         ++right) {
+      int new_count = col_widths->pile_count(right);
+      col_count += new_count;
+      col_widths->add(right, -new_count);
+    }
+    if (col_count > kMinLinesInColumn &&
+        col_count > kMinFractionalLinesInColumn * total_col_count) {
+      auto* w = new ICOORDELT(0, width);
+      w_it.add_after_then_move(w);
+      if (textord_debug_tabfind)
+        tprintf("Column of width %d has %d = %.2f%% lines\n",
+              width * kColumnWidthFactor, col_count,
+              100.0 * col_count / total_col_count);
+    }
+  }
+}
+
+// Mark blobs as being in a vertical text line where that is the case.
+// Returns true if the majority of the image is vertical text lines.
+void TabFind::MarkVerticalText() {
+  if (textord_debug_tabfind)
+    tprintf("Checking for vertical lines\n");
+  BlobGridSearch gsearch(this);
+  gsearch.StartFullSearch();
+  BLOBNBOX* blob = nullptr;
+  while ((blob = gsearch.NextFullSearch()) != nullptr) {
+    if (blob->region_type() < BRT_UNKNOWN)
+      continue;
+    if (blob->UniquelyVertical()) {
+      blob->set_region_type(BRT_VERT_TEXT);
+    }
+  }
+}
+
+int TabFind::FindMedianGutterWidth(TabVector_LIST *lines) {
+  TabVector_IT it(lines);
+  int prev_right = -1;
+  int max_gap = static_cast<int>(kMaxGutterWidthAbsolute * resolution_);
+  STATS gaps(0, max_gap);
+  STATS heights(0, max_gap);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    TabVector* v = it.data();
+    TabVector* partner = v->GetSinglePartner();
+    if (!v->IsLeftTab() || v->IsSeparator() || !partner) continue;
+    heights.add(partner->startpt().x() - v->startpt().x(), 1);
+    if (prev_right > 0 && v->startpt().x() > prev_right) {
+      gaps.add(v->startpt().x() - prev_right, 1);
+    }
+    prev_right = partner->startpt().x();
+  }
+  if (textord_debug_tabfind)
+    tprintf("TabGutter total %d  median_gap %.2f  median_hgt %.2f\n",
+            gaps.get_total(), gaps.median(), heights.median());
+  if (gaps.get_total() < kMinLinesInColumn) return 0;
+  return static_cast<int>(gaps.median());
+}
+
+// Find the next adjacent (looking to the left or right) blob on this text
+// line, with the constraint that it must vertically significantly overlap
+// the [top_y, bottom_y] range.
+// If ignore_images is true, then blobs with aligned_text() < 0 are treated
+// as if they do not exist.
+BLOBNBOX* TabFind::AdjacentBlob(const BLOBNBOX* bbox,
+                                bool look_left, bool ignore_images,
+                                double min_overlap_fraction,
+                                int gap_limit, int top_y, int bottom_y) {
+  GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> sidesearch(this);
+  const TBOX& box = bbox->bounding_box();
+  int left = box.left();
+  int right = box.right();
+  int mid_x = (left + right) / 2;
+  sidesearch.StartSideSearch(mid_x, bottom_y, top_y);
+  int best_gap = 0;
+  bool debug = WithinTestRegion(3, left, bottom_y);
+  BLOBNBOX* result = nullptr;
+  BLOBNBOX* neighbour = nullptr;
+  while ((neighbour = sidesearch.NextSideSearch(look_left)) != nullptr) {
+    if (debug) {
+      tprintf("Adjacent blob: considering box:");
+      neighbour->bounding_box().print();
+    }
+    if (neighbour == bbox ||
+        (ignore_images && neighbour->region_type() < BRT_UNKNOWN))
+      continue;
+    const TBOX& nbox = neighbour->bounding_box();
+    int n_top_y = nbox.top();
+    int n_bottom_y = nbox.bottom();
+    int v_overlap = std::min(n_top_y, top_y) - std::max(n_bottom_y, bottom_y);
+    int height = top_y - bottom_y;
+    int n_height = n_top_y - n_bottom_y;
+    if (v_overlap > min_overlap_fraction * std::min(height, n_height) &&
+        (min_overlap_fraction == 0.0 || !DifferentSizes(height, n_height))) {
+      int n_left = nbox.left();
+      int n_right = nbox.right();
+      int h_gap = std::max(n_left, left) - std::min(n_right, right);
+      int n_mid_x = (n_left + n_right) / 2;
+      if (look_left == (n_mid_x < mid_x) && n_mid_x != mid_x) {
+        if (h_gap > gap_limit) {
+          // Hit a big gap before next tab so don't return anything.
+          if (debug)
+            tprintf("Giving up due to big gap = %d vs %d\n",
+                    h_gap, gap_limit);
+          return result;
+        }
+        if (h_gap > 0 && (look_left ? neighbour->right_tab_type()
+                          : neighbour->left_tab_type()) >= TT_CONFIRMED) {
+          // Hit a tab facing the wrong way. Stop in case we are crossing
+          // the column boundary.
+          if (debug)
+            tprintf("Collision with like tab of type %d at %d,%d\n",
+                    look_left ? neighbour->right_tab_type()
+                                  : neighbour->left_tab_type(),
+                    n_left, nbox.bottom());
+          return result;
+        }
+        // This is a good fit to the line. Continue with this
+        // neighbour as the bbox if the best gap.
+        if (result == nullptr || h_gap < best_gap) {
+          if (debug)
+            tprintf("Good result\n");
+          result = neighbour;
+          best_gap = h_gap;
+        } else {
+          // The new one is worse, so we probably already have the best result.
+          return result;
+        }
+      } else if (debug) {
+        tprintf("Wrong way\n");
+      }
+    } else if (debug) {
+      tprintf("Insufficient overlap\n");
+    }
+  }
+  if (WithinTestRegion(3, left, box.top()))
+    tprintf("Giving up due to end of search\n");
+  return result;  // Hit the edge and found nothing.
+}
+
+// Add a bi-directional partner relationship between the left
+// and the right. If one (or both) of the vectors is a separator,
+// extend a nearby extendable vector or create a new one of the
+// correct type, using the given left or right blob as a guide.
+void TabFind::AddPartnerVector(BLOBNBOX* left_blob, BLOBNBOX* right_blob,
+                               TabVector* left, TabVector* right) {
+  const TBOX& left_box = left_blob->bounding_box();
+  const TBOX& right_box = right_blob->bounding_box();
+  if (left->IsSeparator()) {
+    // Try to find a nearby left edge to extend.
+    TabVector* v = LeftTabForBox(left_box, true, true);
+    if (v != nullptr && v != left && v->IsLeftTab() &&
+        v->XAtY(left_box.top()) > left->XAtY(left_box.top())) {
+      left = v;  // Found a good replacement.
+      left->ExtendToBox(left_blob);
+    } else {
+      // Fake a vector.
+      left = new TabVector(*left, TA_LEFT_RAGGED, vertical_skew_, left_blob);
+      vectors_.add_sorted(TabVector::SortVectorsByKey, left);
+      v_it_.move_to_first();
+    }
+  }
+  if (right->IsSeparator()) {
+    // Try to find a nearby left edge to extend.
+    if (WithinTestRegion(3, right_box.right(), right_box.bottom())) {
+      tprintf("Box edge (%d,%d-%d)",
+              right_box.right(), right_box.bottom(), right_box.top());
+      right->Print(" looking for improvement for");
+    }
+    TabVector* v = RightTabForBox(right_box, true, true);
+    if (v != nullptr && v != right && v->IsRightTab() &&
+        v->XAtY(right_box.top()) < right->XAtY(right_box.top())) {
+      right = v;  // Found a good replacement.
+      right->ExtendToBox(right_blob);
+      if (WithinTestRegion(3, right_box.right(), right_box.bottom())) {
+        right->Print("Extended vector");
+      }
+    } else {
+      // Fake a vector.
+      right = new TabVector(*right, TA_RIGHT_RAGGED, vertical_skew_,
+                            right_blob);
+      vectors_.add_sorted(TabVector::SortVectorsByKey, right);
+      v_it_.move_to_first();
+      if (WithinTestRegion(3, right_box.right(), right_box.bottom())) {
+        right->Print("Created new vector");
+      }
+    }
+  }
+  left->AddPartner(right);
+  right->AddPartner(left);
+}
+
+// Remove separators and unused tabs from the main vectors_ list
+// to the dead_vectors_ list.
+void TabFind::CleanupTabs() {
+  // TODO(rays) Before getting rid of separators and unused vectors, it
+  // would be useful to try moving ragged vectors outwards to see if this
+  // allows useful extension. Could be combined with checking ends of partners.
+  TabVector_IT it(&vectors_);
+  TabVector_IT dead_it(&dead_vectors_);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    TabVector* v = it.data();
+    if (v->IsSeparator() || v->Partnerless()) {
+      dead_it.add_after_then_move(it.extract());
+      v_it_.set_to_list(&vectors_);
+    } else {
+      v->FitAndEvaluateIfNeeded(vertical_skew_, this);
+    }
+  }
+}
+
+// Apply the given rotation to the given list of blobs.
+void TabFind::RotateBlobList(const FCOORD& rotation, BLOBNBOX_LIST* blobs) {
+  BLOBNBOX_IT it(blobs);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    it.data()->rotate_box(rotation);
+  }
+}
+
+// Recreate the grid with deskewed BLOBNBOXes.
+// Returns false if the detected skew angle is impossible.
+bool TabFind::Deskew(TabVector_LIST* hlines, BLOBNBOX_LIST* image_blobs,
+                     TO_BLOCK* block, FCOORD* deskew, FCOORD* reskew) {
+  ComputeDeskewVectors(deskew, reskew);
+  if (deskew->x() < kCosMaxSkewAngle)
+    return false;
+  RotateBlobList(*deskew, image_blobs);
+  RotateBlobList(*deskew, &block->blobs);
+  RotateBlobList(*deskew, &block->small_blobs);
+  RotateBlobList(*deskew, &block->noise_blobs);
+
+  // Rotate the horizontal vectors. The vertical vectors don't need
+  // rotating as they can just be refitted.
+  TabVector_IT h_it(hlines);
+  for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
+    TabVector* h = h_it.data();
+    h->Rotate(*deskew);
+  }
+  TabVector_IT d_it(&dead_vectors_);
+  for (d_it.mark_cycle_pt(); !d_it.cycled_list(); d_it.forward()) {
+    TabVector* d = d_it.data();
+    d->Rotate(*deskew);
+  }
+  SetVerticalSkewAndParallelize(0, 1);
+  // Rebuild the grid to the new size.
+  TBOX grid_box(bleft_, tright_);
+  grid_box.rotate_large(*deskew);
+  Init(gridsize(), grid_box.botleft(), grid_box.topright());
+  InsertBlobsToGrid(false, false, image_blobs, this);
+  InsertBlobsToGrid(true, false, &block->blobs, this);
+  return true;
+}
+
+// Flip the vertical and horizontal lines and rotate the grid ready
+// for working on the rotated image.
+// This also makes parameter adjustments for FindInitialTabVectors().
+void TabFind::ResetForVerticalText(const FCOORD& rotate, const FCOORD& rerotate,
+                                   TabVector_LIST* horizontal_lines,
+                                   int* min_gutter_width) {
+  // Rotate the horizontal and vertical vectors and swap them over.
+  // Only the separators are kept and rotated; other tabs are used
+  // to estimate the gutter width then thrown away.
+  TabVector_LIST ex_verticals;
+  TabVector_IT ex_v_it(&ex_verticals);
+  TabVector_LIST vlines;
+  TabVector_IT v_it(&vlines);
+  while (!v_it_.empty()) {
+    TabVector* v = v_it_.extract();
+    if (v->IsSeparator()) {
+      v->Rotate(rotate);
+      ex_v_it.add_after_then_move(v);
+    } else {
+      v_it.add_after_then_move(v);
+    }
+    v_it_.forward();
+  }
+
+  // Adjust the min gutter width for better tabbox selection
+  // in 2nd call to FindInitialTabVectors().
+  int median_gutter = FindMedianGutterWidth(&vlines);
+  if (median_gutter > *min_gutter_width)
+    *min_gutter_width = median_gutter;
+
+  TabVector_IT h_it(horizontal_lines);
+  for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
+    TabVector* h = h_it.data();
+    h->Rotate(rotate);
+  }
+  v_it_.add_list_after(horizontal_lines);
+  v_it_.move_to_first();
+  h_it.set_to_list(horizontal_lines);
+  h_it.add_list_after(&ex_verticals);
+
+  // Rebuild the grid to the new size.
+  TBOX grid_box(bleft(), tright());
+  grid_box.rotate_large(rotate);
+  Init(gridsize(), grid_box.botleft(), grid_box.topright());
+}
+
+// Clear the grid and get rid of the tab vectors, but not separators,
+// ready to start again.
+void TabFind::Reset() {
+  v_it_.move_to_first();
+  for (v_it_.mark_cycle_pt(); !v_it_.cycled_list(); v_it_.forward()) {
+    if (!v_it_.data()->IsSeparator())
+      delete v_it_.extract();
+  }
+  Clear();
+}
+
+// Reflect the separator tab vectors and the grids in the y-axis.
+// Can only be called after Reset!
+void TabFind::ReflectInYAxis() {
+  TabVector_LIST temp_list;
+  TabVector_IT temp_it(&temp_list);
+  v_it_.move_to_first();
+  // The TabVector list only contains vertical lines, but they need to be
+  // reflected and the list needs to be reversed, so they are still in
+  // sort_key order.
+  while (!v_it_.empty()) {
+    TabVector* v = v_it_.extract();
+    v_it_.forward();
+    v->ReflectInYAxis();
+    temp_it.add_before_then_move(v);
+  }
+  v_it_.add_list_after(&temp_list);
+  v_it_.move_to_first();
+  // Reset this grid with reflected bounding boxes.
+  TBOX grid_box(bleft(), tright());
+  int tmp = grid_box.left();
+  grid_box.set_left(-grid_box.right());
+  grid_box.set_right(-tmp);
+  Init(gridsize(), grid_box.botleft(), grid_box.topright());
+}
+
+// Compute the rotation required to deskew, and its inverse rotation.
+void TabFind::ComputeDeskewVectors(FCOORD* deskew, FCOORD* reskew) {
+  double length = vertical_skew_ % vertical_skew_;
+  length = sqrt(length);
+  deskew->set_x(static_cast<float>(vertical_skew_.y() / length));
+  deskew->set_y(static_cast<float>(vertical_skew_.x() / length));
+  reskew->set_x(deskew->x());
+  reskew->set_y(-deskew->y());
+}
+
+// Compute and apply constraints to the end positions of TabVectors so
+// that where possible partners end at the same y coordinate.
+void TabFind::ApplyTabConstraints() {
+  TabVector_IT it(&vectors_);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    TabVector* v = it.data();
+    v->SetupConstraints();
+  }
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    TabVector* v = it.data();
+    // With the first and last partner, we want a common bottom and top,
+    // respectively, and for each change of partner, we want a common
+    // top of first with bottom of next.
+    v->SetupPartnerConstraints();
+  }
+  // TODO(rays) The back-to-back pairs should really be done like the
+  // front-to-front pairs, but there is no convenient way of producing the
+  // list of partners like there is with the front-to-front.
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    TabVector* v = it.data();
+    if (!v->IsRightTab())
+      continue;
+    // For each back-to-back pair of vectors, try for common top and bottom.
+    TabVector_IT partner_it(it);
+    for (partner_it.forward(); !partner_it.at_first(); partner_it.forward()) {
+      TabVector* partner = partner_it.data();
+      if (!partner->IsLeftTab() || !v->VOverlap(*partner))
+        continue;
+      v->SetupPartnerConstraints(partner);
+    }
+  }
+  // Now actually apply the constraints to get common start/end points.
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    TabVector* v = it.data();
+    if (!v->IsSeparator())
+      v->ApplyConstraints();
+  }
+  // TODO(rays) Where constraint application fails, it would be good to try
+  // checking the ends to see if they really should be moved.
+}
+
+}  // namespace tesseract.
diff --git a/tesseract/src/textord/tabfind.h b/tesseract/src/textord/tabfind.h
new file mode 100644
index 00000000..d16a533c
--- /dev/null
+++ b/tesseract/src/textord/tabfind.h
@@ -0,0 +1,384 @@
+///////////////////////////////////////////////////////////////////////
+// File:        tabfind.h
+// Description: Subclass of BBGrid to find tabstops.
+// Author:      Ray Smith
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TEXTORD_TABFIND_H_
+#define TESSERACT_TEXTORD_TABFIND_H_
+
+#include <functional>           // for std::function
+#include "alignedblob.h"
+#include "tabvector.h"
+#include "linefind.h"
+
+class BLOBNBOX;
+class BLOBNBOX_LIST;
+class TO_BLOCK;
+class ScrollView;
+struct Pix;
+
+namespace tesseract {
+
+using WidthCallback = std::function<bool(int)>;
+
+struct AlignedBlobParams;
+class ColPartitionGrid;
+
+/** Pixel resolution of column width estimates. */
+const int kColumnWidthFactor = 20;
+
+/**
+ * The TabFind class contains code to find tab-stops and maintain the
+ * vectors_ list of tab vectors.
+ * Also provides an interface to find neighbouring blobs
+ * in the grid of BLOBNBOXes that is used by multiple subclasses.
+ * Searching is a complex operation because of the need to enforce
+ * rule/separator lines, and tabstop boundaries, (when available), so
+ * as the holder of the list of TabVectors this class provides the functions.
+ */
+class TESS_API TabFind : public AlignedBlob {
+ public:
+  TabFind(int gridsize, const ICOORD& bleft, const ICOORD& tright,
+          TabVector_LIST* vlines, int vertical_x, int vertical_y,
+          int resolution);
+  ~TabFind() override;
+
+  /**
+   * Insert a list of blobs into the given grid (not necessarily this).
+   * See InsertBlob for the other arguments.
+   * It would seem to make more sense to swap this and grid, but this way
+   * around allows grid to not be derived from TabFind, eg a ColPartitionGrid,
+   * while the grid that provides the tab stops(this) has to be derived from
+   * TabFind.
+   */
+  void InsertBlobsToGrid(bool h_spread, bool v_spread,
+                         BLOBNBOX_LIST* blobs,
+                         BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>* grid);
+
+  /**
+   * Insert a single blob into the given grid (not necessarily this).
+   * If h_spread, then all cells covered horizontally by the box are
+   * used, otherwise, just the bottom-left. Similarly for v_spread.
+   * A side effect is that the left and right rule edges of the blob are
+   * set according to the tab vectors in this (not grid).
+   */
+  bool InsertBlob(bool h_spread, bool v_spread, BLOBNBOX* blob,
+                  BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>* grid);
+  // Calls SetBlobRuleEdges for all the blobs in the given block.
+  void SetBlockRuleEdges(TO_BLOCK* block);
+  // Sets the left and right rule and crossing_rules for the blobs in the given
+  // list by finding the next outermost tabvectors for each blob.
+  void SetBlobRuleEdges(BLOBNBOX_LIST* blobs);
+
+  // Returns the gutter width of the given TabVector between the given y limits.
+  // Also returns x-shift to be added to the vector to clear any intersecting
+  // blobs. The shift is deducted from the returned gutter.
+  // If ignore_unmergeables is true, then blobs of UnMergeableType are
+  // ignored as if they don't exist. (Used for text on image.)
+  // max_gutter_width is used as the maximum width worth searching for in case
+  // there is nothing near the TabVector.
+  int GutterWidth(int bottom_y, int top_y, const TabVector& v,
+                  bool ignore_unmergeables, int max_gutter_width,
+                  int* required_shift);
+  /**
+   * Find the gutter width and distance to inner neighbour for the given blob.
+   */
+  void GutterWidthAndNeighbourGap(int tab_x, int mean_height,
+                                  int max_gutter, bool left,
+                                  BLOBNBOX* bbox, int* gutter_width,
+                                  int* neighbour_gap);
+
+  /**
+   * Return the x-coord that corresponds to the right edge for the given
+   * box. If there is a rule line to the right that vertically overlaps it,
+   * then return the x-coord of the rule line, otherwise return the right
+   * edge of the page. For details see RightTabForBox below.
+   */
+  int RightEdgeForBox(const TBOX& box, bool crossing, bool extended);
+  /**
+   * As RightEdgeForBox, but finds the left Edge instead.
+   */
+  int LeftEdgeForBox(const TBOX& box, bool crossing, bool extended);
+
+  /**
+   * Return the TabVector that corresponds to the right edge for the given
+   * box. If there is a TabVector to the right that vertically overlaps it,
+   * then return it, otherwise return nullptr. Note that Right and Left refer
+   * to the position of the TabVector, not its type, ie RightTabForBox
+   * returns the nearest TabVector to the right of the box, regardless of
+   * its type.
+   * If a TabVector crosses right through the box (as opposed to grazing one
+   * edge or missing entirely), then crossing false will ignore such a line.
+   * Crossing true will return the line for BOTH left and right edges.
+   * If extended is true, then TabVectors are considered to extend to their
+   * extended_start/end_y, otherwise, just the startpt_ and endpt_.
+   * These functions make use of an internal iterator to the vectors_ list
+   * for speed when used repeatedly on neighbouring boxes. The caveat is
+   * that the iterator must be updated whenever the list is modified.
+   */
+  TabVector* RightTabForBox(const TBOX& box, bool crossing, bool extended);
+  /**
+   * As RightTabForBox, but finds the left TabVector instead.
+   */
+  TabVector* LeftTabForBox(const TBOX& box, bool crossing, bool extended);
+
+  /**
+   * Return true if the given width is close to one of the common
+   * widths in column_widths_.
+   */
+  bool CommonWidth(int width);
+  /**
+   * Return true if the sizes are more than a
+   * factor of 2 different.
+   */
+  static bool DifferentSizes(int size1, int size2);
+  /**
+   * Return true if the sizes are more than a
+   * factor of 5 different.
+   */
+  static bool VeryDifferentSizes(int size1, int size2);
+
+  /**
+   * Return a callback for testing CommonWidth.
+   */
+  WidthCallback WidthCB() {
+    return width_cb_;
+  }
+
+  /**
+   * Return the coords at which to draw the image backdrop.
+   */
+  const ICOORD& image_origin() const {
+    return image_origin_;
+  }
+
+ protected:
+  /**
+  // Accessors
+   */
+  TabVector_LIST* vectors() {
+    return &vectors_;
+  }
+  TabVector_LIST* dead_vectors() {
+    return &dead_vectors_;
+  }
+
+  /**
+   * Top-level function to find TabVectors in an input page block.
+   * Returns false if the detected skew angle is impossible.
+   * Applies the detected skew angle to deskew the tabs, blobs and part_grid.
+   * tabfind_aligned_gap_fraction should be the value of parameter
+   * textord_tabfind_aligned_gap_fraction
+   */
+  bool FindTabVectors(TabVector_LIST* hlines,
+                      BLOBNBOX_LIST* image_blobs, TO_BLOCK* block,
+                      int min_gutter_width, double tabfind_aligned_gap_fraction,
+                      ColPartitionGrid* part_grid,
+                      FCOORD* deskew, FCOORD* reskew);
+
+  // Top-level function to not find TabVectors in an input page block,
+  // but setup for single column mode.
+  void DontFindTabVectors(BLOBNBOX_LIST* image_blobs,
+                          TO_BLOCK* block, FCOORD* deskew, FCOORD* reskew);
+
+  // Cleans up the lists of blobs in the block ready for use by TabFind.
+  // Large blobs that look like text are moved to the main blobs list.
+  // Main blobs that are superseded by the image blobs are deleted.
+  void TidyBlobs(TO_BLOCK* block);
+
+  // Helper function to setup search limits for *TabForBox.
+  void SetupTabSearch(int x, int y, int* min_key, int* max_key);
+
+  /**
+   * Display the tab vectors found in this grid.
+   */
+  ScrollView* DisplayTabVectors(ScrollView* tab_win);
+
+  // First part of FindTabVectors, which may be used twice if the text
+  // is mostly of vertical alignment.  If find_vertical_text flag is
+  // true, this finds vertical textlines in possibly rotated blob space.
+  // In other words, when the page has mostly vertical lines and is rotated,
+  // setting this to true will find horizontal lines on the page.
+  // tabfind_aligned_gap_fraction should be the value of parameter
+  // textord_tabfind_aligned_gap_fraction
+  ScrollView* FindInitialTabVectors(BLOBNBOX_LIST* image_blobs,
+                                    int min_gutter_width,
+                                    double tabfind_aligned_gap_fraction,
+                                    TO_BLOCK* block);
+
+  // Apply the given rotation to the given list of blobs.
+  static void RotateBlobList(const FCOORD& rotation, BLOBNBOX_LIST* blobs);
+
+  // Flip the vertical and horizontal lines and rotate the grid ready
+  // for working on the rotated image.
+  // The min_gutter_width will be adjusted to the median gutter width between
+  // vertical tabs to set a better threshold for tabboxes in the 2nd pass.
+  void ResetForVerticalText(const FCOORD& rotate, const FCOORD& rerotate,
+                            TabVector_LIST* horizontal_lines,
+                            int* min_gutter_width);
+
+  // Clear the grid and get rid of the tab vectors, but not separators,
+  // ready to start again.
+  void Reset();
+
+  // Reflect the separator tab vectors and the grids in the y-axis.
+  // Can only be called after Reset!
+  void ReflectInYAxis();
+
+ private:
+  // For each box in the grid, decide whether it is a candidate tab-stop,
+  // and if so add it to the left and right tab boxes.
+  // tabfind_aligned_gap_fraction should be the value of parameter
+  // textord_tabfind_aligned_gap_fraction
+  ScrollView* FindTabBoxes(int min_gutter_width,
+                           double tabfind_aligned_gap_fraction);
+
+  // Return true if this box looks like a candidate tab stop, and set
+  // the appropriate tab type(s) to TT_UNCONFIRMED.
+  // tabfind_aligned_gap_fraction should be the value of parameter
+  // textord_tabfind_aligned_gap_fraction
+  bool TestBoxForTabs(BLOBNBOX* bbox, int min_gutter_width,
+                      double tabfind_aligned_gap_fraction);
+
+  // Returns true if there is nothing in the rectangle of width min_gutter to
+  // the left of bbox.
+  bool ConfirmRaggedLeft(BLOBNBOX* bbox, int min_gutter);
+  // Returns true if there is nothing in the rectangle of width min_gutter to
+  // the right of bbox.
+  bool ConfirmRaggedRight(BLOBNBOX* bbox, int min_gutter);
+  // Returns true if there is nothing in the given search_box that vertically
+  // overlaps target_box other than target_box itself.
+  bool NothingYOverlapsInBox(const TBOX& search_box, const TBOX& target_box);
+
+  // Fills the list of TabVector with the tabstops found in the grid,
+  // and estimates the logical vertical direction.
+  void FindAllTabVectors(int min_gutter_width);
+  // Helper for FindAllTabVectors finds the vectors of a particular type.
+  int FindTabVectors(int search_size_multiple,
+                     TabAlignment alignment,
+                     int min_gutter_width,
+                     TabVector_LIST* vectors,
+                     int* vertical_x, int* vertical_y);
+  // Finds a vector corresponding to a tabstop running through the
+  // given box of the given alignment type.
+  // search_size_multiple is a multiple of height used to control
+  // the size of the search.
+  // vertical_x and y are updated with an estimate of the real
+  // vertical direction. (skew finding.)
+  // Returns nullptr if no decent tabstop can be found.
+  TabVector* FindTabVector(int search_size_multiple, int min_gutter_width,
+                           TabAlignment alignment,
+                           BLOBNBOX* bbox,
+                           int* vertical_x, int* vertical_y);
+
+  // Set the vertical_skew_ member from the given vector and refit
+  // all vectors parallel to the skew vector.
+  void SetVerticalSkewAndParallelize(int vertical_x, int vertical_y);
+
+  // Sort all the current vectors using the vertical_skew_ vector.
+  void SortVectors();
+
+  // Evaluate all the current tab vectors.
+  void EvaluateTabs();
+
+  // Trace textlines from one side to the other of each tab vector, saving
+  // the most frequent column widths found in a list so that a given width
+  // can be tested for being a common width with a simple callback function.
+  void ComputeColumnWidths(ScrollView* tab_win,
+                           ColPartitionGrid* part_grid);
+
+  // Finds column width and:
+  //   if col_widths is not null (pass1):
+  //     pair-up tab vectors with existing ColPartitions and accumulate widths.
+  //   else (pass2):
+  //     find the largest real partition width for each recorded column width,
+  //     to be used as the minimum acceptable width.
+  void ApplyPartitionsToColumnWidths(ColPartitionGrid* part_grid,
+                                     STATS* col_widths);
+
+  // Helper makes the list of common column widths in column_widths_ from the
+  // input col_widths. Destroys the content of col_widths by repeatedly
+  // finding the mode and erasing the peak.
+  void MakeColumnWidths(int col_widths_size, STATS* col_widths);
+
+  // Mark blobs as being in a vertical text line where that is the case.
+  void MarkVerticalText();
+
+  // Returns the median gutter width between pairs of matching tab vectors
+  // assuming they are sorted left-to-right.  If there are too few data
+  // points (< kMinLinesInColumn), then 0 is returned.
+  int FindMedianGutterWidth(TabVector_LIST* tab_vectors);
+
+  // Find the next adjacent (to left or right) blob on this text line,
+  // with the constraint that it must vertically significantly overlap
+  // the [top_y, bottom_y] range.
+  // If ignore_images is true, then blobs with aligned_text() < 0 are treated
+  // as if they do not exist.
+  BLOBNBOX* AdjacentBlob(const BLOBNBOX* bbox,
+                         bool look_left, bool ignore_images,
+                         double min_overlap_fraction,
+                         int gap_limit, int top_y, int bottom_y);
+
+  // Add a bi-directional partner relationship between the left
+  // and the right. If one (or both) of the vectors is a separator,
+  // extend a nearby extendable vector or create a new one of the
+  // correct type, using the given left or right blob as a guide.
+  void AddPartnerVector(BLOBNBOX* left_blob, BLOBNBOX* right_blob,
+                        TabVector* left, TabVector* right);
+
+  /**
+   * Remove separators and unused tabs from the main vectors_ list
+   * to the dead_vectors_ list.
+   */
+  void CleanupTabs();
+
+  /**
+   * Deskew the tab vectors and blobs, computing the rotation and resetting
+   * the storked vertical_skew_. The deskew inverse is returned in reskew.
+   * Returns false if the detected skew angle is impossible.
+   */
+  bool Deskew(TabVector_LIST* hlines, BLOBNBOX_LIST* image_blobs,
+              TO_BLOCK* block, FCOORD* deskew, FCOORD* reskew);
+
+  // Compute the rotation required to deskew, and its inverse rotation.
+  void ComputeDeskewVectors(FCOORD* deskew, FCOORD* reskew);
+
+  /**
+   * Compute and apply constraints to the end positions of TabVectors so
+   * that where possible partners end at the same y coordinate.
+   */
+  void ApplyTabConstraints();
+
+ protected:
+  ICOORD vertical_skew_;          ///< Estimate of true vertical in this image.
+  int resolution_;                ///< Of source image in pixels per inch.
+ private:
+  ICOORD image_origin_;           ///< Top-left of image in deskewed coords
+  TabVector_LIST vectors_;        ///< List of rule line and tabstops.
+  TabVector_IT v_it_;             ///< Iterator for searching vectors_.
+  TabVector_LIST dead_vectors_;   ///< Separators and unpartnered tab vectors.
+  // List of commonly occurring width ranges with x=min and y=max.
+  ICOORDELT_LIST column_widths_;  ///< List of commonly occurring width ranges.
+  /** Callback to test an int for being a common width. */
+  WidthCallback width_cb_;
+  // Sets of bounding boxes that are candidate tab stops.
+  GenericVector<BLOBNBOX*> left_tab_boxes_;
+  GenericVector<BLOBNBOX*> right_tab_boxes_;
+};
+
+}  // namespace tesseract.
+
+#endif  // TESSERACT_TEXTORD_TABFIND_H_
diff --git a/tesseract/src/textord/tablefind.cpp b/tesseract/src/textord/tablefind.cpp
new file mode 100644
index 00000000..6326b858
--- /dev/null
+++ b/tesseract/src/textord/tablefind.cpp
@@ -0,0 +1,2088 @@
+///////////////////////////////////////////////////////////////////////
+// File:        tablefind.cpp
+// Description: Helper classes to find tables from ColPartitions.
+// Author:      Faisal Shafait (faisal.shafait@dfki.de)
+//
+// (C) Copyright 2009, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "tablefind.h"
+#include <algorithm>
+#include <cmath>
+
+#include "allheaders.h"
+
+#include "colpartitionset.h"
+#include "tablerecog.h"
+
+namespace tesseract {
+
+// These numbers are used to calculate the global median stats.
+// They just set an upper bound on the stats objects.
+// Maximum vertical spacing between neighbor partitions.
+const int kMaxVerticalSpacing = 500;
+// Maximum width of a blob in a partition.
+const int kMaxBlobWidth = 500;
+
+// Minimum whitespace size to split a partition (measured as a multiple
+// of a partition's median width).
+const double kSplitPartitionSize = 2.0;
+// To insert text, the partition must satisfy these size constraints
+// in AllowTextPartition(). The idea is to filter noise partitions
+// determined by the size compared to the global medians.
+// TODO(nbeato): Need to find good numbers again.
+const double kAllowTextHeight = 0.5;
+const double kAllowTextWidth = 0.6;
+const double kAllowTextArea = 0.8;
+// The same thing applies to blobs (to filter noise).
+// TODO(nbeato): These numbers are a shot in the dark...
+// height and width are 0.5 * gridsize() in colfind.cpp
+// area is a rough guess for the size of a period.
+const double kAllowBlobHeight = 0.3;
+const double kAllowBlobWidth = 0.4;
+const double kAllowBlobArea = 0.05;
+
+// Minimum number of components in a text partition. A partition having fewer
+// components than that is more likely a data partition and is a candidate
+// table cell.
+const int kMinBoxesInTextPartition = 10;
+
+// Maximum number of components that a data partition can have
+const int kMaxBoxesInDataPartition = 20;
+
+// Maximum allowed gap in a text partitions as a multiple of its median size.
+const double kMaxGapInTextPartition = 4.0;
+
+// Minimum value that the maximum gap in a text partition should have as a
+// factor of its median size.
+const double kMinMaxGapInTextPartition = 0.5;
+
+// The amount of overlap that is "normal" for adjacent blobs in a text
+// partition. This is used to calculate gap between overlapping blobs.
+const double kMaxBlobOverlapFactor = 4.0;
+
+// Maximum x-height a table partition can have as a multiple of global
+// median x-height
+const double kMaxTableCellXheight = 2.0;
+
+// Maximum line spacing between a table column header and column contents
+// for merging the two (as a multiple of the partition's median_height).
+const int kMaxColumnHeaderDistance = 4;
+
+// Minimum ratio of num_table_partitions to num_text_partitions in a column
+// block to be called it a table column
+const double kTableColumnThreshold = 3.0;
+
+// Search for horizontal ruling lines within the vertical margin as a
+// multiple of grid size
+// const int kRulingVerticalMargin = 3;
+
+// Minimum overlap that a colpartition must have with a table region
+// to become part of that table
+const double kMinOverlapWithTable = 0.6;
+
+// Maximum side space (distance from column boundary) that a typical
+// text-line in flowing text should have as a multiple of its x-height
+// (Median size).
+const int kSideSpaceMargin = 10;
+
+// Fraction of the peak of x-projection of a table region to set the
+// threshold for the x-projection histogram
+const double kSmallTableProjectionThreshold = 0.35;
+const double kLargeTableProjectionThreshold = 0.45;
+// Minimum number of rows required to look for more rows in the projection.
+const int kLargeTableRowCount = 6;
+
+// Minimum number of rows in a table
+const int kMinRowsInTable = 3;
+
+// The amount of padding (multiplied by global_median_xheight_ during use)
+// that is vertically added to the search adjacent leader search during
+// ColPartition marking.
+const int kAdjacentLeaderSearchPadding = 2;
+
+// Used when filtering false positives. When finding the last line
+// of a paragraph (typically left-aligned), the previous line should have
+// its center to the right of the last line by this scaled amount.
+const double kParagraphEndingPreviousLineRatio = 1.3;
+
+// The maximum amount of whitespace allowed left of a paragraph ending.
+// Do not filter a ColPartition with more than this space left of it.
+const double kMaxParagraphEndingLeftSpaceMultiple = 3.0;
+
+// Used when filtering false positives. The last line of a paragraph
+// should be preceded by a line that is predominantly text. This is the
+// ratio of text to whitespace (to the right of the text) that is required
+// for the previous line to be a text.
+const double kMinParagraphEndingTextToWhitespaceRatio = 3.0;
+
+// When counting table columns, this is the required gap between two columns
+// (it is multiplied by global_median_xheight_).
+const double kMaxXProjectionGapFactor = 2.0;
+
+// Used for similarity in partitions using stroke width. Values copied
+// from ColFind.cpp in Ray's CL.
+const double kStrokeWidthFractionalTolerance = 0.25;
+const double kStrokeWidthConstantTolerance = 2.0;
+
+#ifndef GRAPHICS_DISABLED
+static BOOL_VAR(textord_show_tables, false, "Show table regions (ScrollView)");
+static BOOL_VAR(textord_tablefind_show_mark, false,
+                "Debug table marking steps in detail (ScrollView)");
+static BOOL_VAR(textord_tablefind_show_stats, false,
+                "Show page stats used in table finding (ScrollView)");
+#endif
+static BOOL_VAR(textord_tablefind_recognize_tables, false,
+                "Enables the table recognizer for table layout and filtering.");
+
+ELISTIZE(ColSegment)
+CLISTIZE(ColSegment)
+
+// Templated helper function used to create destructor callbacks for the
+// BBGrid::ClearGridData() method.
+template <typename T> void DeleteObject(T *object) {
+  delete object;
+}
+
+TableFinder::TableFinder()
+    : resolution_(0),
+      global_median_xheight_(0),
+      global_median_blob_width_(0),
+      global_median_ledding_(0),
+      left_to_right_language_(true) {
+}
+
+TableFinder::~TableFinder() {
+  // ColPartitions and ColSegments created by this class for storage in grids
+  // need to be deleted explicitly.
+  clean_part_grid_.ClearGridData(&DeleteObject<ColPartition>);
+  leader_and_ruling_grid_.ClearGridData(&DeleteObject<ColPartition>);
+  fragmented_text_grid_.ClearGridData(&DeleteObject<ColPartition>);
+  col_seg_grid_.ClearGridData(&DeleteObject<ColSegment>);
+  table_grid_.ClearGridData(&DeleteObject<ColSegment>);
+}
+
+void TableFinder::set_left_to_right_language(bool order) {
+  left_to_right_language_ = order;
+}
+
+void TableFinder::Init(int grid_size, const ICOORD& bottom_left,
+                       const ICOORD& top_right) {
+  // Initialize clean partitions list and grid
+  clean_part_grid_.Init(grid_size, bottom_left, top_right);
+  leader_and_ruling_grid_.Init(grid_size, bottom_left, top_right);
+  fragmented_text_grid_.Init(grid_size, bottom_left, top_right);
+  col_seg_grid_.Init(grid_size, bottom_left, top_right);
+  table_grid_.Init(grid_size, bottom_left, top_right);
+}
+
+// Copy cleaned partitions from part_grid_ to clean_part_grid_ and
+// insert leaders and rulers into the leader_and_ruling_grid_
+void TableFinder::InsertCleanPartitions(ColPartitionGrid* grid,
+                                        TO_BLOCK* block) {
+  // Calculate stats. This lets us filter partitions in AllowTextPartition()
+  // and filter blobs in AllowBlob().
+  SetGlobalSpacings(grid);
+
+  // Iterate the ColPartitions in the grid.
+  ColPartitionGridSearch gsearch(grid);
+  gsearch.SetUniqueMode(true);
+  gsearch.StartFullSearch();
+  ColPartition* part = nullptr;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    // Reject partitions with nothing useful inside of them.
+    if (part->blob_type() == BRT_NOISE || part->bounding_box().area() <= 0)
+      continue;
+    ColPartition* clean_part = part->ShallowCopy();
+    ColPartition* leader_part = nullptr;
+    if (part->IsLineType()) {
+      InsertRulingPartition(clean_part);
+      continue;
+    }
+    // Insert all non-text partitions to clean_parts
+    if (!part->IsTextType()) {
+      InsertImagePartition(clean_part);
+      continue;
+    }
+    // Insert text colpartitions after removing noisy components from them
+    // The leaders are split into a separate grid.
+    BLOBNBOX_CLIST* part_boxes = part->boxes();
+    BLOBNBOX_C_IT pit(part_boxes);
+    for (pit.mark_cycle_pt(); !pit.cycled_list(); pit.forward()) {
+      BLOBNBOX *pblob = pit.data();
+      // Bad blobs... happens in UNLV set.
+      // news.3G1, page 17 (around x=6)
+      if (!AllowBlob(*pblob))
+        continue;
+      if (pblob->flow() == BTFT_LEADER) {
+        if (leader_part == nullptr) {
+          leader_part = part->ShallowCopy();
+          leader_part->set_flow(BTFT_LEADER);
+        }
+        leader_part->AddBox(pblob);
+      } else if (pblob->region_type() != BRT_NOISE) {
+        clean_part->AddBox(pblob);
+      }
+    }
+    clean_part->ComputeLimits();
+    ColPartition* fragmented = clean_part->CopyButDontOwnBlobs();
+    InsertTextPartition(clean_part);
+    SplitAndInsertFragmentedTextPartition(fragmented);
+    if (leader_part != nullptr) {
+      // TODO(nbeato): Note that ComputeLimits does not update the column
+      // information. So the leader may appear to span more columns than it
+      // really does later on when IsInSameColumnAs gets called to test
+      // for adjacent leaders.
+      leader_part->ComputeLimits();
+      InsertLeaderPartition(leader_part);
+    }
+  }
+
+  // Make the partition partners better for upper and lower neighbors.
+  clean_part_grid_.FindPartitionPartners();
+  clean_part_grid_.RefinePartitionPartners(false);
+}
+
+// High level function to perform table detection
+void TableFinder::LocateTables(ColPartitionGrid* grid,
+                               ColPartitionSet** all_columns,
+                               WidthCallback width_cb,
+                               const FCOORD& reskew) {
+  // initialize spacing, neighbors, and columns
+  InitializePartitions(all_columns);
+
+#ifndef GRAPHICS_DISABLED
+  if (textord_show_tables) {
+    ScrollView* table_win = MakeWindow(0, 300, "Column Partitions & Neighbors");
+    DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE);
+    DisplayColPartitions(table_win, &leader_and_ruling_grid_,
+                         ScrollView::AQUAMARINE);
+    DisplayColPartitionConnections(table_win, &clean_part_grid_,
+                                   ScrollView::ORANGE);
+
+    table_win = MakeWindow(100, 300, "Fragmented Text");
+    DisplayColPartitions(table_win, &fragmented_text_grid_, ScrollView::BLUE);
+  }
+#endif // !GRAPHICS_DISABLED
+
+  // mark, filter, and smooth candidate table partitions
+  MarkTablePartitions();
+
+  // Make single-column blocks from good_columns_ partitions. col_segments are
+  // moved to a grid later which takes the ownership
+  ColSegment_LIST column_blocks;
+  GetColumnBlocks(all_columns, &column_blocks);
+  // Set the ratio of candidate table partitions in each column
+  SetColumnsType(&column_blocks);
+
+  // Move column segments to col_seg_grid_
+  MoveColSegmentsToGrid(&column_blocks, &col_seg_grid_);
+
+  // Detect split in column layout that might have occurred due to the
+  // presence of a table. In such a case, merge the corresponding columns.
+  GridMergeColumnBlocks();
+
+  // Group horizontally overlapping table partitions into table columns.
+  // table_columns created here get deleted at the end of this method.
+  ColSegment_LIST table_columns;
+  GetTableColumns(&table_columns);
+
+  // Within each column, mark the range table regions occupy based on the
+  // table columns detected. table_regions are moved to a grid later which
+  // takes the ownership
+  ColSegment_LIST table_regions;
+  GetTableRegions(&table_columns, &table_regions);
+
+#ifndef GRAPHICS_DISABLED
+  if (textord_tablefind_show_mark) {
+    ScrollView* table_win = MakeWindow(1200, 300, "Table Columns and Regions");
+    DisplayColSegments(table_win, &table_columns, ScrollView::DARK_TURQUOISE);
+    DisplayColSegments(table_win, &table_regions, ScrollView::YELLOW);
+  }
+#endif // !GRAPHICS_DISABLED
+
+  // Merge table regions across columns for tables spanning multiple
+  // columns
+  MoveColSegmentsToGrid(&table_regions, &table_grid_);
+  GridMergeTableRegions();
+
+  // Adjust table boundaries by including nearby horizontal lines and left
+  // out column headers
+  AdjustTableBoundaries();
+  GridMergeTableRegions();
+
+  if (textord_tablefind_recognize_tables) {
+    // Remove false alarms consisting of a single column
+    DeleteSingleColumnTables();
+
+#ifndef GRAPHICS_DISABLED
+    if (textord_show_tables) {
+      ScrollView* table_win = MakeWindow(1200, 300, "Detected Table Locations");
+      DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE);
+      DisplayColSegments(table_win, &table_columns, ScrollView::KHAKI);
+      table_grid_.DisplayBoxes(table_win);
+    }
+#endif // !GRAPHICS_DISABLED
+
+    // Find table grid structure and reject tables that are malformed.
+    RecognizeTables();
+    GridMergeTableRegions();
+    RecognizeTables();
+
+#ifndef GRAPHICS_DISABLED
+    if (textord_show_tables) {
+      ScrollView* table_win = MakeWindow(1400, 600, "Recognized Tables");
+      DisplayColPartitions(table_win, &clean_part_grid_,
+                           ScrollView::BLUE, ScrollView::BLUE);
+      table_grid_.DisplayBoxes(table_win);
+    }
+#endif // !GRAPHICS_DISABLED
+  } else {
+    // Remove false alarms consisting of a single column
+    // TODO(nbeato): verify this is a NOP after structured table rejection.
+    // Right now it isn't. If the recognize function is doing what it is
+    // supposed to do, this function is obsolete.
+    DeleteSingleColumnTables();
+
+#ifndef GRAPHICS_DISABLED
+    if (textord_show_tables) {
+      ScrollView* table_win = MakeWindow(1500, 300, "Detected Tables");
+      DisplayColPartitions(table_win, &clean_part_grid_,
+                           ScrollView::BLUE, ScrollView::BLUE);
+      table_grid_.DisplayBoxes(table_win);
+    }
+#endif // !GRAPHICS_DISABLED
+  }
+
+  // Merge all colpartitions in table regions to make them a single
+  // colpartition and revert types of isolated table cells not
+  // assigned to any table to their original types.
+  MakeTableBlocks(grid, all_columns, width_cb);
+}
+// All grids have the same dimensions. The clean_part_grid_ sizes are set from
+// the part_grid_ that is passed to InsertCleanPartitions, which was the same as
+// the grid that is the base of ColumnFinder. Just return the clean_part_grid_
+// dimensions instead of duplicated memory.
+int TableFinder::gridsize() const {
+  return clean_part_grid_.gridsize();
+}
+int TableFinder::gridwidth() const {
+  return clean_part_grid_.gridwidth();
+}
+int TableFinder::gridheight() const {
+  return clean_part_grid_.gridheight();
+}
+const ICOORD& TableFinder::bleft() const {
+  return clean_part_grid_.bleft();
+}
+const ICOORD& TableFinder::tright() const {
+  return clean_part_grid_.tright();
+}
+
+void TableFinder::InsertTextPartition(ColPartition* part) {
+  ASSERT_HOST(part != nullptr);
+  if (AllowTextPartition(*part)) {
+    clean_part_grid_.InsertBBox(true, true, part);
+  } else {
+    delete part;
+  }
+}
+void TableFinder::InsertFragmentedTextPartition(ColPartition* part) {
+  ASSERT_HOST(part != nullptr);
+  if (AllowTextPartition(*part)) {
+    fragmented_text_grid_.InsertBBox(true, true, part);
+  } else {
+    delete part;
+  }
+}
+void TableFinder::InsertLeaderPartition(ColPartition* part) {
+  ASSERT_HOST(part != nullptr);
+  if (!part->IsEmpty() && part->bounding_box().area() > 0) {
+    leader_and_ruling_grid_.InsertBBox(true, true, part);
+  } else {
+    delete part;
+  }
+}
+void TableFinder::InsertRulingPartition(ColPartition* part) {
+  leader_and_ruling_grid_.InsertBBox(true, true, part);
+}
+void TableFinder::InsertImagePartition(ColPartition* part) {
+  // NOTE: If images are placed into a different grid in the future,
+  // the function SetPartitionSpacings needs to be updated. It should
+  // be the only thing that cares about image partitions.
+  clean_part_grid_.InsertBBox(true, true, part);
+}
+
+// Splits a partition into its "words". The splits happen
+// at locations with wide inter-blob spacing. This is useful
+// because it allows the table recognize to "cut through" the
+// text lines on the page. The assumption is that a table
+// will have several lines with similar overlapping whitespace
+// whereas text will not have this type of property.
+// Note: The code Assumes that blobs are sorted by the left side x!
+// This will not work (as well) if the blobs are sorted by center/right.
+void TableFinder::SplitAndInsertFragmentedTextPartition(ColPartition* part) {
+  ASSERT_HOST(part != nullptr);
+  // Bye bye empty partitions!
+  if (part->boxes()->empty()) {
+    delete part;
+    return;
+  }
+
+  // The AllowBlob function prevents this.
+  ASSERT_HOST(part->median_width() > 0);
+  const double kThreshold = part->median_width() * kSplitPartitionSize;
+
+  ColPartition* right_part = part;
+  bool found_split = true;
+  while (found_split) {
+    found_split = false;
+    BLOBNBOX_C_IT box_it(right_part->boxes());
+    // Blobs are sorted left side first. If blobs overlap,
+    // the previous blob may have a "more right" right side.
+    // Account for this by always keeping the largest "right"
+    // so far.
+    int previous_right = INT32_MIN;
+
+    // Look for the next split in the partition.
+    for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) {
+      const TBOX& box = box_it.data()->bounding_box();
+      if (previous_right != INT32_MIN &&
+          box.left() - previous_right > kThreshold) {
+        // We have a split position. Split the partition in two pieces.
+        // Insert the left piece in the grid and keep processing the right.
+        int mid_x = (box.left() + previous_right) / 2;
+        ColPartition* left_part = right_part;
+        right_part = left_part->SplitAt(mid_x);
+
+        InsertFragmentedTextPartition(left_part);
+        found_split = true;
+        break;
+      }
+
+      // The right side of the previous blobs.
+      previous_right = std::max(previous_right, static_cast<int>(box.right()));
+    }
+  }
+  // When a split is not found, the right part is minimized
+  // as much as possible, so process it.
+  InsertFragmentedTextPartition(right_part);
+}
+
+// Some simple criteria to filter out now. We want to make sure the
+// average blob size in the partition is consistent with the
+// global page stats.
+// The area metric will almost always pass for multi-blob partitions.
+// It is useful when filtering out noise caused by an isolated blob.
+bool TableFinder::AllowTextPartition(const ColPartition& part) const {
+  const double kHeightRequired = global_median_xheight_ * kAllowTextHeight;
+  const double kWidthRequired = global_median_blob_width_ * kAllowTextWidth;
+  const int median_area = global_median_xheight_ * global_median_blob_width_;
+  const double kAreaPerBlobRequired = median_area * kAllowTextArea;
+  // Keep comparisons strictly greater to disallow 0!
+  return part.median_height() > kHeightRequired &&
+         part.median_width() > kWidthRequired &&
+         part.bounding_box().area() > kAreaPerBlobRequired * part.boxes_count();
+}
+
+// Same as above, applied to blobs. Keep in mind that
+// leaders, commas, and periods are important in tables.
+bool TableFinder::AllowBlob(const BLOBNBOX& blob) const {
+  const TBOX& box = blob.bounding_box();
+  const double kHeightRequired = global_median_xheight_ * kAllowBlobHeight;
+  const double kWidthRequired = global_median_blob_width_ * kAllowBlobWidth;
+  const int median_area = global_median_xheight_ * global_median_blob_width_;
+  const double kAreaRequired = median_area * kAllowBlobArea;
+  // Keep comparisons strictly greater to disallow 0!
+  return box.height() > kHeightRequired &&
+         box.width() > kWidthRequired &&
+         box.area() > kAreaRequired;
+}
+
+// TODO(nbeato): The grid that makes the window doesn't seem to matter.
+// The only downside is that window messages will be caught by
+// clean_part_grid_ instead of a useful object. This is a temporary solution
+// for the debug windows created by the TableFinder.
+#ifndef GRAPHICS_DISABLED
+ScrollView* TableFinder::MakeWindow(int x, int y, const char* window_name) {
+  return clean_part_grid_.MakeWindow(x, y, window_name);
+}
+#endif
+
+// Make single-column blocks from good_columns_ partitions.
+void TableFinder::GetColumnBlocks(ColPartitionSet** all_columns,
+                                  ColSegment_LIST* column_blocks) {
+  for (int i = 0; i < gridheight(); ++i) {
+    ColPartitionSet* columns = all_columns[i];
+    if (columns != nullptr) {
+      ColSegment_LIST new_blocks;
+      // Get boxes from the current vertical position on the grid
+      columns->GetColumnBoxes(i * gridsize(), (i+1) * gridsize(), &new_blocks);
+      // Merge the new_blocks boxes into column_blocks if they are well-aligned
+      GroupColumnBlocks(&new_blocks, column_blocks);
+    }
+  }
+}
+
+// Merge column segments into the current list if they are well aligned.
+void TableFinder::GroupColumnBlocks(ColSegment_LIST* new_blocks,
+                                    ColSegment_LIST* column_blocks) {
+  ColSegment_IT src_it(new_blocks);
+  ColSegment_IT dest_it(column_blocks);
+  // iterate through the source list
+  for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
+    ColSegment* src_seg = src_it.data();
+    const TBOX& src_box = src_seg->bounding_box();
+    bool match_found = false;
+    // iterate through the destination list to find a matching column block
+    for (dest_it.mark_cycle_pt(); !dest_it.cycled_list(); dest_it.forward()) {
+      ColSegment* dest_seg = dest_it.data();
+      TBOX dest_box = dest_seg->bounding_box();
+      if (ConsecutiveBoxes(src_box, dest_box)) {
+        // If matching block is found, insert the current block into it
+        // and delete the source block.
+        dest_seg->InsertBox(src_box);
+        match_found = true;
+        delete src_it.extract();
+        break;
+      }
+    }
+    // If no match is found, just append the source block to column_blocks
+    if (!match_found) {
+      dest_it.add_after_then_move(src_it.extract());
+    }
+  }
+}
+
+// are the two boxes immediate neighbors along the vertical direction
+bool TableFinder::ConsecutiveBoxes(const TBOX &b1, const TBOX &b2) {
+  int x_margin = 20;
+  int y_margin = 5;
+  return (abs(b1.left() - b2.left()) < x_margin) &&
+      (abs(b1.right() - b2.right()) < x_margin) &&
+      (abs(b1.top()-b2.bottom()) < y_margin ||
+       abs(b2.top()-b1.bottom()) < y_margin);
+}
+
+// Set up info for clean_part_grid_ partitions to be valid during detection
+// code.
+void TableFinder::InitializePartitions(ColPartitionSet** all_columns) {
+  FindNeighbors();
+  SetPartitionSpacings(&clean_part_grid_, all_columns);
+  SetGlobalSpacings(&clean_part_grid_);
+}
+
+// Set left, right and top, bottom spacings of each colpartition.
+void TableFinder::SetPartitionSpacings(ColPartitionGrid* grid,
+                                       ColPartitionSet** all_columns) {
+  // Iterate the ColPartitions in the grid.
+  ColPartitionGridSearch gsearch(grid);
+  gsearch.StartFullSearch();
+  ColPartition* part = nullptr;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    ColPartitionSet* columns = all_columns[gsearch.GridY()];
+    TBOX box = part->bounding_box();
+    int y = part->MidY();
+    ColPartition* left_column = columns->ColumnContaining(box.left(), y);
+    ColPartition* right_column = columns->ColumnContaining(box.right(), y);
+    // set distance from left column as space to the left
+    if (left_column) {
+      int left_space = std::max(0, box.left() - left_column->LeftAtY(y));
+      part->set_space_to_left(left_space);
+    }
+    // set distance from right column as space to the right
+    if (right_column) {
+      int right_space = std::max(0, right_column->RightAtY(y) - box.right());
+      part->set_space_to_right(right_space);
+    }
+
+    // Look for images that may be closer.
+    // NOTE: used to be part_grid_, might cause issues now
+    ColPartitionGridSearch hsearch(grid);
+    hsearch.StartSideSearch(box.left(), box.bottom(), box.top());
+    ColPartition* neighbor = nullptr;
+    while ((neighbor = hsearch.NextSideSearch(true)) != nullptr) {
+      if (neighbor->type() == PT_PULLOUT_IMAGE ||
+          neighbor->type() == PT_FLOWING_IMAGE ||
+          neighbor->type() == PT_HEADING_IMAGE) {
+        int right = neighbor->bounding_box().right();
+        if (right < box.left()) {
+          int space = std::min(box.left() - right, part->space_to_left());
+          part->set_space_to_left(space);
+        }
+      }
+    }
+    hsearch.StartSideSearch(box.left(), box.bottom(), box.top());
+    neighbor = nullptr;
+    while ((neighbor = hsearch.NextSideSearch(false)) != nullptr) {
+      if (neighbor->type() == PT_PULLOUT_IMAGE ||
+          neighbor->type() == PT_FLOWING_IMAGE ||
+          neighbor->type() == PT_HEADING_IMAGE) {
+        int left = neighbor->bounding_box().left();
+        if (left > box.right()) {
+          int space = std::min(left - box.right(), part->space_to_right());
+          part->set_space_to_right(space);
+        }
+      }
+    }
+
+    ColPartition* upper_part = part->SingletonPartner(true);
+    if (upper_part) {
+      int space = std::max(0, static_cast<int>(upper_part->bounding_box().bottom() -
+                         part->bounding_box().bottom()));
+      part->set_space_above(space);
+    } else {
+      // TODO(nbeato): What constitutes a good value?
+      // 0 is the default value when not set, explicitly noting it needs to
+      // be something else.
+      part->set_space_above(INT32_MAX);
+    }
+
+    ColPartition* lower_part = part->SingletonPartner(false);
+    if (lower_part) {
+      int space = std::max(0, static_cast<int>(part->bounding_box().bottom() -
+                         lower_part->bounding_box().bottom()));
+      part->set_space_below(space);
+    } else {
+      // TODO(nbeato): What constitutes a good value?
+      // 0 is the default value when not set, explicitly noting it needs to
+      // be something else.
+      part->set_space_below(INT32_MAX);
+    }
+  }
+}
+
+// Set spacing and closest neighbors above and below a given colpartition.
+void TableFinder::SetVerticalSpacing(ColPartition* part) {
+  TBOX box = part->bounding_box();
+  int top_range = std::min(box.top() + kMaxVerticalSpacing, static_cast<int>(tright().y()));
+  int bottom_range = std::max(box.bottom() - kMaxVerticalSpacing, static_cast<int>(bleft().y()));
+  box.set_top(top_range);
+  box.set_bottom(bottom_range);
+
+  TBOX part_box = part->bounding_box();
+  // Start a rect search
+  GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
+      rectsearch(&clean_part_grid_);
+  rectsearch.StartRectSearch(box);
+  ColPartition* neighbor;
+  int min_space_above = kMaxVerticalSpacing;
+  int min_space_below = kMaxVerticalSpacing;
+  ColPartition* above_neighbor = nullptr;
+  ColPartition* below_neighbor = nullptr;
+  while ((neighbor = rectsearch.NextRectSearch()) != nullptr) {
+    if (neighbor == part)
+      continue;
+    TBOX neighbor_box = neighbor->bounding_box();
+    if (neighbor_box.major_x_overlap(part_box)) {
+      int gap = abs(part->median_bottom() - neighbor->median_bottom());
+      // If neighbor is below current partition
+      if (neighbor_box.top() < part_box.bottom() &&
+          gap < min_space_below) {
+        min_space_below = gap;
+        below_neighbor = neighbor;
+      }  // If neighbor is above current partition
+      else if (part_box.top() < neighbor_box.bottom() &&
+               gap < min_space_above) {
+        min_space_above = gap;
+        above_neighbor = neighbor;
+       }
+    }
+  }
+  part->set_space_above(min_space_above);
+  part->set_space_below(min_space_below);
+  part->set_nearest_neighbor_above(above_neighbor);
+  part->set_nearest_neighbor_below(below_neighbor);
+}
+
+// Set global spacing and x-height estimates
+void TableFinder::SetGlobalSpacings(ColPartitionGrid* grid) {
+  STATS xheight_stats(0, kMaxVerticalSpacing + 1);
+  STATS width_stats(0, kMaxBlobWidth + 1);
+  STATS ledding_stats(0, kMaxVerticalSpacing + 1);
+  // Iterate the ColPartitions in the grid.
+  ColPartitionGridSearch gsearch(grid);
+  gsearch.SetUniqueMode(true);
+  gsearch.StartFullSearch();
+  ColPartition* part = nullptr;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    // TODO(nbeato): HACK HACK HACK! medians are equal to partition length.
+    // ComputeLimits needs to get called somewhere outside of TableFinder
+    // to make sure the partitions are properly initialized.
+    // When this is called, SmoothPartitionPartners dies in an assert after
+    // table find runs. Alternative solution.
+    // part->ComputeLimits();
+    if (part->IsTextType()) {
+      // xheight_stats.add(part->median_height(), part->boxes_count());
+      // width_stats.add(part->median_width(), part->boxes_count());
+
+      // This loop can be removed when above issues are fixed.
+      // Replace it with the 2 lines commented out above.
+      BLOBNBOX_C_IT it(part->boxes());
+      for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+        xheight_stats.add(it.data()->bounding_box().height(), 1);
+        width_stats.add(it.data()->bounding_box().width(), 1);
+      }
+
+      ledding_stats.add(part->space_above(), 1);
+      ledding_stats.add(part->space_below(), 1);
+    }
+  }
+  // Set estimates based on median of statistics obtained
+  set_global_median_xheight(static_cast<int>(xheight_stats.median() + 0.5));
+  set_global_median_blob_width(static_cast<int>(width_stats.median() + 0.5));
+  set_global_median_ledding(static_cast<int>(ledding_stats.median() + 0.5));
+  #ifndef GRAPHICS_DISABLED
+  if (textord_tablefind_show_stats) {
+    const char* kWindowName = "X-height (R), X-width (G), and ledding (B)";
+    ScrollView* stats_win = MakeWindow(500, 10, kWindowName);
+    xheight_stats.plot(stats_win, 10, 200, 2, 15, ScrollView::RED);
+    width_stats.plot(stats_win, 10, 200, 2, 15, ScrollView::GREEN);
+    ledding_stats.plot(stats_win, 10, 200, 2, 15, ScrollView::BLUE);
+  }
+  #endif // !GRAPHICS_DISABLED
+}
+
+void TableFinder::set_global_median_xheight(int xheight) {
+  global_median_xheight_ = xheight;
+}
+void TableFinder::set_global_median_blob_width(int width) {
+  global_median_blob_width_ = width;
+}
+void TableFinder::set_global_median_ledding(int ledding) {
+  global_median_ledding_ = ledding;
+}
+
+void TableFinder::FindNeighbors() {
+  ColPartitionGridSearch gsearch(&clean_part_grid_);
+  gsearch.StartFullSearch();
+  ColPartition* part = nullptr;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    // TODO(nbeato): Rename this function, meaning is different now.
+    // IT is finding nearest neighbors its own way
+    //SetVerticalSpacing(part);
+
+    ColPartition* upper = part->SingletonPartner(true);
+    if (upper)
+      part->set_nearest_neighbor_above(upper);
+
+    ColPartition* lower = part->SingletonPartner(false);
+    if (lower)
+      part->set_nearest_neighbor_below(lower);
+  }
+}
+
+// High level interface. Input is an unmarked ColPartitionGrid
+// (namely, clean_part_grid_). Partitions are identified using local
+// information and filter/smoothed. The function exit should contain
+// a good sampling of the table partitions.
+void TableFinder::MarkTablePartitions() {
+  MarkPartitionsUsingLocalInformation();
+#ifndef GRAPHICS_DISABLED
+  if (textord_tablefind_show_mark) {
+    ScrollView* table_win = MakeWindow(300, 300, "Initial Table Partitions");
+    DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE);
+    DisplayColPartitions(table_win, &leader_and_ruling_grid_,
+                         ScrollView::AQUAMARINE);
+  }
+#endif
+  FilterFalseAlarms();
+#ifndef GRAPHICS_DISABLED
+  if (textord_tablefind_show_mark) {
+    ScrollView* table_win = MakeWindow(600, 300, "Filtered Table Partitions");
+    DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE);
+    DisplayColPartitions(table_win, &leader_and_ruling_grid_,
+                         ScrollView::AQUAMARINE);
+  }
+#endif
+  SmoothTablePartitionRuns();
+#ifndef GRAPHICS_DISABLED
+  if (textord_tablefind_show_mark) {
+    ScrollView* table_win = MakeWindow(900, 300, "Smoothed Table Partitions");
+    DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE);
+    DisplayColPartitions(table_win, &leader_and_ruling_grid_,
+                         ScrollView::AQUAMARINE);
+  }
+#endif
+  FilterFalseAlarms();
+#ifndef GRAPHICS_DISABLED
+  if (textord_tablefind_show_mark || textord_show_tables) {
+    ScrollView* table_win = MakeWindow(900, 300, "Final Table Partitions");
+    DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE);
+    DisplayColPartitions(table_win, &leader_and_ruling_grid_,
+                         ScrollView::AQUAMARINE);
+  }
+#endif
+}
+
+// These types of partitions are marked as table partitions:
+//  1- Partitions that have at lease one large gap between words
+//  2- Partitions that consist of only one word (no significant gap
+//     between components)
+//  3- Partitions that vertically overlap with other partitions within the
+//     same column.
+//  4- Partitions with leaders before/after them.
+void TableFinder::MarkPartitionsUsingLocalInformation() {
+  // Iterate the ColPartitions in the grid.
+  GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
+    gsearch(&clean_part_grid_);
+  gsearch.StartFullSearch();
+  ColPartition* part = nullptr;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    if (!part->IsTextType())  // Only consider text partitions
+      continue;
+    // Only consider partitions in dominant font size or smaller
+    if (part->median_height() > kMaxTableCellXheight * global_median_xheight_)
+      continue;
+    // Mark partitions with a large gap, or no significant gap as
+    // table partitions.
+    // Comments: It produces several false alarms at:
+    //  - last line of a paragraph (fixed)
+    //  - single word section headings
+    //  - page headers and footers
+    //  - numbered equations
+    //  - line drawing regions
+    // TODO(faisal): detect and fix above-mentioned cases
+    if (HasWideOrNoInterWordGap(part) ||
+        HasLeaderAdjacent(*part)) {
+      part->set_table_type();
+    }
+  }
+}
+
+// Check if the partition has at least one large gap between words or no
+// significant gap at all
+bool TableFinder::HasWideOrNoInterWordGap(ColPartition* part) const {
+  // Should only get text partitions.
+  ASSERT_HOST(part->IsTextType());
+  // Blob access
+  BLOBNBOX_CLIST* part_boxes = part->boxes();
+  BLOBNBOX_C_IT it(part_boxes);
+  // Check if this is a relatively small partition (such as a single word)
+  if (part->bounding_box().width() <
+      kMinBoxesInTextPartition * part->median_height() &&
+      part_boxes->length() < kMinBoxesInTextPartition)
+    return true;
+
+  // Variables used to compute inter-blob spacing.
+  int current_x0 = -1;
+  int current_x1 = -1;
+  int previous_x1 = -1;
+  // Stores the maximum gap detected.
+  int largest_partition_gap_found = -1;
+  // Text partition gap limits. If this is text (and not a table),
+  // there should be at least one gap larger than min_gap and no gap
+  // larger than max_gap.
+  const double max_gap = kMaxGapInTextPartition * part->median_height();
+  const double min_gap = kMinMaxGapInTextPartition * part->median_height();
+
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    BLOBNBOX* blob = it.data();
+    current_x0 = blob->bounding_box().left();
+    current_x1 = blob->bounding_box().right();
+    if (previous_x1 != -1) {
+      int gap = current_x0 - previous_x1;
+
+      // TODO(nbeato): Boxes may overlap? Huh?
+      // For example, mag.3B 8003_033.3B.tif in UNLV data. The titles/authors
+      // on the top right of the page are filtered out with this line.
+      // Note 2: Iterating over blobs in a partition, so we are looking for
+      // spacing between the words.
+      if (gap < 0) {
+        // More likely case, the blobs slightly overlap. This can happen
+        // with diacritics (accents) or broken alphabet symbols (characters).
+        // Merge boxes together by taking max of right sides.
+        if (-gap < part->median_height() * kMaxBlobOverlapFactor) {
+          previous_x1 = std::max(previous_x1, current_x1);
+          continue;
+        }
+        // Extreme case, blobs overlap significantly in the same partition...
+        // This should not happen often (if at all), but it does.
+        // TODO(nbeato): investigate cases when this happens.
+        else {
+          // The behavior before was to completely ignore this case.
+        }
+      }
+
+      // If a large enough gap is found, mark it as a table cell (return true)
+      if (gap > max_gap)
+        return true;
+      if (gap > largest_partition_gap_found)
+        largest_partition_gap_found = gap;
+    }
+    previous_x1 = current_x1;
+  }
+  // Since no large gap was found, return false if the partition is too
+  // long to be a data cell
+  if (part->bounding_box().width() >
+      kMaxBoxesInDataPartition * part->median_height() ||
+      part_boxes->length() > kMaxBoxesInDataPartition)
+    return false;
+
+  // A partition may be a single blob. In this case, it's an isolated symbol
+  // or non-text (such as a ruling or image).
+  // Detect these as table partitions? Shouldn't this be case by case?
+  // The behavior before was to ignore this, making max_partition_gap < 0
+  // and implicitly return true. Just making it explicit.
+  if (largest_partition_gap_found == -1)
+    return true;
+
+  // return true if the maximum gap found is smaller than the minimum allowed
+  // max_gap in a text partition. This indicates that there is no significant
+  // space in the partition, hence it is likely a single word.
+  return largest_partition_gap_found < min_gap;
+}
+
+// A criteria for possible tables is that a table may have leaders
+// between data cells. An aggressive solution to find such tables is to
+// explicitly mark partitions that have adjacent leaders.
+// Note that this includes overlapping leaders. However, it does not
+// include leaders in different columns on the page.
+// Possible false-positive will include lists, such as a table of contents.
+// As these arise, the aggressive nature of this search may need to be
+// trimmed down.
+bool TableFinder::HasLeaderAdjacent(const ColPartition& part) {
+  if (part.flow() == BTFT_LEADER)
+    return true;
+  // Search range is left and right bounded by an offset of the
+  // median xheight. This offset is to allow some tolerance to the
+  // the leaders on the page in the event that the alignment is still
+  // a bit off.
+  const TBOX& box = part.bounding_box();
+  const int search_size = kAdjacentLeaderSearchPadding * global_median_xheight_;
+  const int top = box.top() + search_size;
+  const int bottom = box.bottom() - search_size;
+  ColPartitionGridSearch hsearch(&leader_and_ruling_grid_);
+  for (int direction = 0; direction < 2; ++direction) {
+    bool right_to_left = (direction == 0);
+    int x = right_to_left ? box.right() : box.left();
+    hsearch.StartSideSearch(x, bottom, top);
+    ColPartition* leader = nullptr;
+    while ((leader = hsearch.NextSideSearch(right_to_left)) != nullptr) {
+      // The leader could be a horizontal ruling in the grid.
+      // Make sure it is actually a leader.
+      if (leader->flow() != BTFT_LEADER)
+        continue;
+      // This should not happen, they are in different grids.
+      ASSERT_HOST(&part != leader);
+      // Make sure the leader shares a page column with the partition,
+      // otherwise we are spreading across columns.
+      if (!part.IsInSameColumnAs(*leader))
+        break;
+      // There should be a significant vertical overlap
+      if (!leader->VSignificantCoreOverlap(part))
+        continue;
+      // Leader passed all tests, so it is adjacent.
+      return true;
+    }
+  }
+  // No leaders are adjacent to the given partition.
+  return false;
+}
+
+// Filter individual text partitions marked as table partitions
+// consisting of paragraph endings, small section headings, and
+// headers and footers.
+void TableFinder::FilterFalseAlarms() {
+  FilterParagraphEndings();
+  FilterHeaderAndFooter();
+  // TODO(nbeato): Fully justified text as non-table?
+}
+
+void TableFinder::FilterParagraphEndings() {
+  // Detect last line of paragraph
+  // Iterate the ColPartitions in the grid.
+  ColPartitionGridSearch gsearch(&clean_part_grid_);
+  gsearch.StartFullSearch();
+  ColPartition* part = nullptr;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    if (part->type() != PT_TABLE)
+      continue;  // Consider only table partitions
+
+    // Paragraph ending should have flowing text above it.
+    ColPartition* upper_part = part->nearest_neighbor_above();
+    if (!upper_part)
+      continue;
+    if (upper_part->type() != PT_FLOWING_TEXT)
+      continue;
+    if (upper_part->bounding_box().width() <
+        2 * part->bounding_box().width())
+      continue;
+    // Check if its the last line of a paragraph.
+    // In most cases, a paragraph ending should be left-aligned to text line
+    // above it. Sometimes, it could be a 2 line paragraph, in which case
+    // the line above it is indented.
+    // To account for that, check if the partition center is to
+    // the left of the one above it.
+    int mid = (part->bounding_box().left() + part->bounding_box().right()) / 2;
+    int upper_mid = (upper_part->bounding_box().left() +
+                     upper_part->bounding_box().right()) / 2;
+    int current_spacing = 0;  // spacing of the current line to margin
+    int upper_spacing = 0;    // spacing of the previous line to the margin
+    if (left_to_right_language_) {
+      // Left to right languages, use mid - left to figure out the distance
+      // the middle is from the left margin.
+      int left = std::min(part->bounding_box().left(),
+                     upper_part->bounding_box().left());
+      current_spacing = mid - left;
+      upper_spacing = upper_mid - left;
+    } else {
+      // Right to left languages, use right - mid to figure out the distance
+      // the middle is from the right margin.
+      int right = std::max(part->bounding_box().right(),
+                      upper_part->bounding_box().right());
+      current_spacing = right - mid;
+      upper_spacing = right - upper_mid;
+    }
+    if (current_spacing * kParagraphEndingPreviousLineRatio > upper_spacing)
+      continue;
+
+    // Paragraphs should have similar fonts.
+    if (!part->MatchingSizes(*upper_part) ||
+        !part->MatchingStrokeWidth(*upper_part, kStrokeWidthFractionalTolerance,
+                                   kStrokeWidthConstantTolerance)) {
+      continue;
+    }
+
+    // The last line of a paragraph should be left aligned.
+    // TODO(nbeato): This would be untrue if the text was right aligned.
+    // How often is that?
+    if (part->space_to_left() >
+        kMaxParagraphEndingLeftSpaceMultiple * part->median_height())
+      continue;
+    // The line above it should be right aligned (assuming justified format).
+    // Since we can't assume justified text, we compare whitespace to text.
+    // The above line should have majority spanning text (or the current
+    // line could have fit on the previous line). So compare
+    // whitespace to text.
+    if (upper_part->bounding_box().width() <
+        kMinParagraphEndingTextToWhitespaceRatio * upper_part->space_to_right())
+      continue;
+
+    // Ledding above the line should be less than ledding below
+    if (part->space_above() >= part->space_below() ||
+        part->space_above() > 2 * global_median_ledding_)
+      continue;
+
+    // If all checks failed, it is probably text.
+    part->clear_table_type();
+  }
+}
+
+void TableFinder::FilterHeaderAndFooter() {
+  // Consider top-most text colpartition as header and bottom most as footer
+  ColPartition* header = nullptr;
+  ColPartition* footer = nullptr;
+  int max_top = INT32_MIN;
+  int min_bottom = INT32_MAX;
+  ColPartitionGridSearch gsearch(&clean_part_grid_);
+  gsearch.StartFullSearch();
+  ColPartition* part = nullptr;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    if (!part->IsTextType())
+      continue;  // Consider only text partitions
+    int top = part->bounding_box().top();
+    int bottom = part->bounding_box().bottom();
+    if (top > max_top) {
+      max_top = top;
+      header = part;
+    }
+    if (bottom < min_bottom) {
+      min_bottom = bottom;
+      footer = part;
+    }
+  }
+  if (header)
+    header->clear_table_type();
+  if (footer)
+    footer->clear_table_type();
+}
+
+// Mark all ColPartitions as table cells that have a table cell above
+// and below them
+// TODO(faisal): This is too aggressive at the moment. The method needs to
+// consider spacing and alignment as well. Detection of false alarm table cells
+// should also be done as part of it.
+void TableFinder::SmoothTablePartitionRuns() {
+  // Iterate the ColPartitions in the grid.
+  ColPartitionGridSearch gsearch(&clean_part_grid_);
+  gsearch.StartFullSearch();
+  ColPartition* part = nullptr;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    if (part->type() >= PT_TABLE || part->type() == PT_UNKNOWN)
+      continue;  // Consider only text partitions
+    ColPartition* upper_part = part->nearest_neighbor_above();
+    ColPartition* lower_part = part->nearest_neighbor_below();
+    if (!upper_part || !lower_part)
+      continue;
+    if (upper_part->type() == PT_TABLE && lower_part->type() == PT_TABLE)
+      part->set_table_type();
+  }
+
+  // Pass 2, do the opposite. If both the upper and lower neighbors
+  // exist and are not tables, this probably shouldn't be a table.
+  gsearch.StartFullSearch();
+  part = nullptr;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    if (part->type() != PT_TABLE)
+      continue;  // Consider only text partitions
+    ColPartition* upper_part = part->nearest_neighbor_above();
+    ColPartition* lower_part = part->nearest_neighbor_below();
+
+    // table can't be by itself
+    if ((upper_part && upper_part->type() != PT_TABLE) &&
+        (lower_part && lower_part->type() != PT_TABLE)) {
+      part->clear_table_type();
+    }
+  }
+}
+
+// Set the type of a column segment based on the ratio of table to text cells
+void TableFinder::SetColumnsType(ColSegment_LIST* column_blocks) {
+  ColSegment_IT it(column_blocks);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    ColSegment* seg = it.data();
+    TBOX box = seg->bounding_box();
+    int num_table_cells = 0;
+    int num_text_cells = 0;
+    GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
+        rsearch(&clean_part_grid_);
+    rsearch.SetUniqueMode(true);
+    rsearch.StartRectSearch(box);
+    ColPartition* part = nullptr;
+    while ((part = rsearch.NextRectSearch()) != nullptr) {
+      if (part->type() == PT_TABLE) {
+        num_table_cells++;
+      } else if (part->type() == PT_FLOWING_TEXT) {
+        num_text_cells++;
+      }
+    }
+    // If a column block has no text or table partition in it, it is not needed
+    // for table detection.
+    if (!num_table_cells && !num_text_cells) {
+      delete it.extract();
+    } else {
+      seg->set_num_table_cells(num_table_cells);
+      seg->set_num_text_cells(num_text_cells);
+      // set column type based on the ratio of table to text cells
+      seg->set_type();
+    }
+  }
+}
+
+// Move column blocks to grid
+void TableFinder::MoveColSegmentsToGrid(ColSegment_LIST *segments,
+                                         ColSegmentGrid *col_seg_grid) {
+  ColSegment_IT it(segments);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    ColSegment* seg = it.extract();
+    col_seg_grid->InsertBBox(true, true, seg);
+  }
+}
+
+// Merge column blocks if a split is detected due to the presence of a
+// table. A text block is considered split if it has multiple
+// neighboring blocks above/below it, and at least one of the
+// neighboring blocks is of table type (has a high density of table
+// partitions). In this case neighboring blocks in the direction
+// (above/below) of the table block are merged with the text block.
+
+// Comment: This method does not handle split due to a full page table
+// since table columns in this case do not have a text column on which
+// split decision can be based.
+void TableFinder::GridMergeColumnBlocks() {
+  int margin = gridsize();
+
+  // Iterate the Column Blocks in the grid.
+  GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT>
+    gsearch(&col_seg_grid_);
+  gsearch.StartFullSearch();
+  ColSegment* seg;
+  while ((seg = gsearch.NextFullSearch()) != nullptr) {
+    if (seg->type() != COL_TEXT)
+      continue;  // only consider text blocks for split detection
+    bool neighbor_found = false;
+    bool modified = false;  // Modified at least once
+    // keep expanding current box as long as neighboring table columns
+    // are found above or below it.
+    do {
+      TBOX box = seg->bounding_box();
+      // slightly expand the search region vertically
+      int top_range = std::min(box.top() + margin, static_cast<int>(tright().y()));
+      int bottom_range = std::max(box.bottom() - margin, static_cast<int>(bleft().y()));
+      box.set_top(top_range);
+      box.set_bottom(bottom_range);
+      neighbor_found = false;
+      GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT>
+          rectsearch(&col_seg_grid_);
+      rectsearch.StartRectSearch(box);
+      ColSegment* neighbor = nullptr;
+      while ((neighbor = rectsearch.NextRectSearch()) != nullptr) {
+        if (neighbor == seg)
+          continue;
+        const TBOX& neighbor_box = neighbor->bounding_box();
+        // If the neighbor box significantly overlaps with the current
+        // box (due to the expansion of the current box in the
+        // previous iteration of this loop), remove the neighbor box
+        // and expand the current box to include it.
+        if (neighbor_box.overlap_fraction(box) >= 0.9) {
+          seg->InsertBox(neighbor_box);
+          modified = true;
+          rectsearch.RemoveBBox();
+          gsearch.RepositionIterator();
+          delete neighbor;
+          continue;
+        }
+        // Only expand if the neighbor box is of table type
+        if (neighbor->type() != COL_TABLE)
+          continue;
+        // Insert the neighbor box into the current column block
+        if (neighbor_box.major_x_overlap(box) &&
+            !box.contains(neighbor_box)) {
+          seg->InsertBox(neighbor_box);
+          neighbor_found = true;
+          modified = true;
+          rectsearch.RemoveBBox();
+          gsearch.RepositionIterator();
+          delete neighbor;
+        }
+      }
+    } while (neighbor_found);
+    if (modified) {
+      // Because the box has changed, it has to be removed first.
+      gsearch.RemoveBBox();
+      col_seg_grid_.InsertBBox(true, true, seg);
+      gsearch.RepositionIterator();
+    }
+  }
+}
+
+// Group horizontally overlapping table partitions into table columns.
+// TODO(faisal): This is too aggressive at the moment. The method should
+// consider more attributes to group table partitions together. Some common
+// errors are:
+//  1- page number is merged with a table column above it even
+//      if there is a large vertical gap between them.
+//  2- column headers go on to catch one of the columns arbitrarily
+//  3- an isolated noise blob near page top or bottom merges with the table
+//     column below/above it
+//  4- cells from two vertically adjacent tables merge together to make a
+//     single column resulting in merging of the two tables
+void TableFinder::GetTableColumns(ColSegment_LIST *table_columns) {
+  ColSegment_IT it(table_columns);
+  // Iterate the ColPartitions in the grid.
+  GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
+    gsearch(&clean_part_grid_);
+  gsearch.StartFullSearch();
+  ColPartition* part;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    if (part->inside_table_column() || part->type() != PT_TABLE)
+      continue;  // prevent a partition to be assigned to multiple columns
+    const TBOX& box = part->bounding_box();
+    auto* col = new ColSegment();
+    col->InsertBox(box);
+    part->set_inside_table_column(true);
+    // Start a search below the current cell to find bottom neighbours
+    // Note: a full search will always process things above it first, so
+    // this should be starting at the highest cell and working its way down.
+    GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
+        vsearch(&clean_part_grid_);
+    vsearch.StartVerticalSearch(box.left(), box.right(), box.bottom());
+    ColPartition* neighbor = nullptr;
+    bool found_neighbours = false;
+    while ((neighbor = vsearch.NextVerticalSearch(true)) != nullptr) {
+      // only consider neighbors not assigned to any column yet
+      if (neighbor->inside_table_column())
+        continue;
+      // Horizontal lines should not break the flow
+      if (neighbor->IsHorizontalLine())
+        continue;
+      // presence of a non-table neighbor marks the end of current
+      // table column
+      if (neighbor->type() != PT_TABLE)
+        break;
+      // add the neighbor partition to the table column
+      const TBOX& neighbor_box = neighbor->bounding_box();
+      col->InsertBox(neighbor_box);
+      neighbor->set_inside_table_column(true);
+      found_neighbours = true;
+    }
+    if (found_neighbours) {
+      it.add_after_then_move(col);
+    } else {
+      part->set_inside_table_column(false);
+      delete col;
+    }
+  }
+}
+
+// Mark regions in a column that are x-bounded by the column boundaries and
+// y-bounded by the table columns' projection on the y-axis as table regions
+void TableFinder::GetTableRegions(ColSegment_LIST* table_columns,
+                                  ColSegment_LIST* table_regions) {
+  ColSegment_IT cit(table_columns);
+  ColSegment_IT rit(table_regions);
+  // Iterate through column blocks
+  GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT>
+      gsearch(&col_seg_grid_);
+  gsearch.StartFullSearch();
+  ColSegment* part;
+  int page_height = tright().y() - bleft().y();
+  ASSERT_HOST(page_height > 0);
+  // create a bool array to hold projection on y-axis
+  bool* table_region = new bool[page_height];
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    const TBOX& part_box = part->bounding_box();
+    // reset the projection array
+    for (int i = 0; i < page_height; i++) {
+      table_region[i] = false;
+    }
+    // iterate through all table columns to find regions in the current
+    // page column block
+    cit.move_to_first();
+    for (cit.mark_cycle_pt(); !cit.cycled_list(); cit.forward()) {
+      TBOX col_box = cit.data()->bounding_box();
+      // find intersection region of table column and page column
+      TBOX intersection_box = col_box.intersection(part_box);
+      // project table column on the y-axis
+      for (int i = intersection_box.bottom(); i < intersection_box.top(); i++) {
+        table_region[i - bleft().y()] = true;
+      }
+    }
+    // set x-limits of table regions to page column width
+    TBOX current_table_box;
+    current_table_box.set_left(part_box.left());
+    current_table_box.set_right(part_box.right());
+    // go through the y-axis projection to find runs of table
+    // regions. Each run makes one table region.
+    for (int i = 1; i < page_height; i++) {
+      // detect start of a table region
+      if (!table_region[i - 1] && table_region[i]) {
+        current_table_box.set_bottom(i + bleft().y());
+      }
+      // TODO(nbeato): Is it guaranteed that the last row is not a table region?
+      // detect end of a table region
+      if (table_region[i - 1] && !table_region[i]) {
+        current_table_box.set_top(i + bleft().y());
+        if (!current_table_box.null_box()) {
+          auto* seg = new ColSegment();
+          seg->InsertBox(current_table_box);
+          rit.add_after_then_move(seg);
+        }
+      }
+    }
+  }
+  delete[] table_region;
+}
+
+// Merge table regions corresponding to tables spanning multiple columns if
+// there is a colpartition (horizontal ruling line or normal text) that
+// touches both regions.
+// TODO(faisal): A rare error occurs if there are two horizontally adjacent
+// tables with aligned ruling lines. In this case, line finder returns a
+// single line and hence the tables get merged together
+void TableFinder::GridMergeTableRegions() {
+  // Iterate the table regions in the grid.
+  GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT>
+      gsearch(&table_grid_);
+  gsearch.StartFullSearch();
+  ColSegment* seg = nullptr;
+  while ((seg = gsearch.NextFullSearch()) != nullptr) {
+    bool neighbor_found = false;
+    bool modified = false;  // Modified at least once
+    do {
+      // Start a rectangle search x-bounded by the image and y by the table
+      const TBOX& box = seg->bounding_box();
+      TBOX search_region(box);
+      search_region.set_left(bleft().x());
+      search_region.set_right(tright().x());
+      neighbor_found = false;
+      GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT>
+          rectsearch(&table_grid_);
+      rectsearch.StartRectSearch(search_region);
+      ColSegment* neighbor = nullptr;
+      while ((neighbor = rectsearch.NextRectSearch()) != nullptr) {
+        if (neighbor == seg)
+          continue;
+        const TBOX& neighbor_box = neighbor->bounding_box();
+        // Check if a neighbor box has a large overlap with the table
+        // region.  This may happen as a result of merging two table
+        // regions in the previous iteration.
+        if (neighbor_box.overlap_fraction(box) >= 0.9) {
+          seg->InsertBox(neighbor_box);
+          rectsearch.RemoveBBox();
+          gsearch.RepositionIterator();
+          delete neighbor;
+          modified = true;
+          continue;
+        }
+        // Check if two table regions belong together based on a common
+        // horizontal ruling line
+        if (BelongToOneTable(box, neighbor_box)) {
+          seg->InsertBox(neighbor_box);
+          neighbor_found = true;
+          modified = true;
+          rectsearch.RemoveBBox();
+          gsearch.RepositionIterator();
+          delete neighbor;
+        }
+      }
+    } while (neighbor_found);
+    if (modified) {
+      // Because the box has changed, it has to be removed first.
+      gsearch.RemoveBBox();
+      table_grid_.InsertBBox(true, true, seg);
+      gsearch.RepositionIterator();
+    }
+  }
+}
+
+// Decide if two table regions belong to one table based on a common
+// horizontal ruling line or another colpartition
+bool TableFinder::BelongToOneTable(const TBOX &box1, const TBOX &box2) {
+  // Check the obvious case. Most likely not true because overlapping boxes
+  // should already be merged, but seems like a good thing to do in case things
+  // change.
+  if (box1.overlap(box2))
+    return true;
+  // Check for ColPartitions spanning both table regions
+  TBOX bbox = box1.bounding_union(box2);
+  // Start a rect search on bbox
+  GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
+      rectsearch(&clean_part_grid_);
+  rectsearch.StartRectSearch(bbox);
+  ColPartition* part = nullptr;
+  while ((part = rectsearch.NextRectSearch()) != nullptr) {
+    const TBOX& part_box = part->bounding_box();
+    // return true if a colpartition spanning both table regions is found
+    if (part_box.overlap(box1) && part_box.overlap(box2) &&
+        !part->IsImageType())
+      return true;
+  }
+  return false;
+}
+
+// Adjust table boundaries by:
+//  - building a tight bounding box around all ColPartitions contained in it.
+//  - expanding table boundaries to include all colpartitions that overlap the
+//    table by more than half of their area
+//  - expanding table boundaries to include nearby horizontal rule lines
+//  - expanding table vertically to include left out column headers
+// TODO(faisal): Expansion of table boundaries is quite aggressive. It usually
+//               makes following errors:
+//  1- horizontal lines consisting of underlines are included in the table if
+//     they are close enough
+//  2- horizontal lines originating from noise tend to get merged with a table
+//     near the top of the page
+//  3- the criteria for including horizontal lines is very generous. Many times
+//     horizontal lines separating headers and footers get merged with a
+//     single-column table in a multi-column page thereby including text
+//     from the neighboring column inside the table
+//  4- the criteria for including left out column headers also tends to
+//     occasionally include text-lines above the tables, typically from
+//     table caption
+void TableFinder::AdjustTableBoundaries() {
+  // Iterate the table regions in the grid
+  ColSegment_CLIST adjusted_tables;
+  ColSegment_C_IT it(&adjusted_tables);
+  ColSegmentGridSearch gsearch(&table_grid_);
+  gsearch.StartFullSearch();
+  ColSegment* table = nullptr;
+  while ((table = gsearch.NextFullSearch()) != nullptr) {
+    const TBOX& table_box = table->bounding_box();
+    TBOX grown_box = table_box;
+    GrowTableBox(table_box, &grown_box);
+    // To prevent a table from expanding again, do not insert the
+    // modified box back to the grid. Instead move it to a list and
+    // and remove it from the grid. The list is moved later back to the grid.
+    if (!grown_box.null_box()) {
+      auto* col = new ColSegment();
+      col->InsertBox(grown_box);
+      it.add_after_then_move(col);
+    }
+    gsearch.RemoveBBox();
+    delete table;
+  }
+  // clear table grid to move final tables in it
+  // TODO(nbeato): table_grid_ should already be empty. The above loop
+  // removed everything. Maybe just assert it is empty?
+  table_grid_.Clear();
+  it.move_to_first();
+  // move back final tables to table_grid_
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    ColSegment* seg = it.extract();
+    table_grid_.InsertBBox(true, true, seg);
+  }
+}
+
+void TableFinder::GrowTableBox(const TBOX& table_box, TBOX* result_box) {
+  // TODO(nbeato): The growing code is a bit excessive right now.
+  // By removing these lines, the partitions considered need
+  // to have some overlap or be special cases. These lines could
+  // be added again once a check is put in place to make sure that
+  // growing tables don't stomp on a lot of non-table partitions.
+
+  // search for horizontal ruling lines within the vertical margin
+  // int vertical_margin = kRulingVerticalMargin * gridsize();
+  TBOX search_box = table_box;
+  // int top = MIN(search_box.top() + vertical_margin, tright().y());
+  // int bottom = MAX(search_box.bottom() - vertical_margin, bleft().y());
+  // search_box.set_top(top);
+  // search_box.set_bottom(bottom);
+
+  GrowTableToIncludePartials(table_box, search_box, result_box);
+  GrowTableToIncludeLines(table_box, search_box, result_box);
+  IncludeLeftOutColumnHeaders(result_box);
+}
+
+// Grow a table by increasing the size of the box to include
+// partitions with significant overlap with the table.
+void TableFinder::GrowTableToIncludePartials(const TBOX& table_box,
+                                             const TBOX& search_range,
+                                             TBOX* result_box) {
+  // Rulings are in a different grid, so search 2 grids for rulings, text,
+  // and table partitions that are not entirely within the new box.
+  for (int i = 0; i < 2; ++i) {
+    ColPartitionGrid* grid = (i == 0) ? &fragmented_text_grid_ :
+                                        &leader_and_ruling_grid_;
+    ColPartitionGridSearch rectsearch(grid);
+    rectsearch.StartRectSearch(search_range);
+    ColPartition* part = nullptr;
+    while ((part = rectsearch.NextRectSearch()) != nullptr) {
+     // Only include text and table types.
+      if (part->IsImageType())
+        continue;
+      const TBOX& part_box = part->bounding_box();
+      // Include partition in the table if more than half of it
+      // is covered by the table
+      if (part_box.overlap_fraction(table_box) > kMinOverlapWithTable) {
+        *result_box = result_box->bounding_union(part_box);
+        continue;
+      }
+    }
+  }
+}
+
+// Grow a table by expanding to the extents of significantly
+// overlapping lines.
+void TableFinder::GrowTableToIncludeLines(const TBOX& table_box,
+                                          const TBOX& search_range,
+                                          TBOX* result_box) {
+  ColPartitionGridSearch rsearch(&leader_and_ruling_grid_);
+  rsearch.SetUniqueMode(true);
+  rsearch.StartRectSearch(search_range);
+  ColPartition* part = nullptr;
+  while ((part = rsearch.NextRectSearch()) != nullptr) {
+    // TODO(nbeato) This should also do vertical, but column
+    // boundaries are breaking things. This function needs to be
+    // updated to allow vertical lines as well.
+    if (!part->IsLineType())
+      continue;
+    // Avoid the following function call if the result of the
+    // function is irrelevant.
+    const TBOX& part_box = part->bounding_box();
+    if (result_box->contains(part_box))
+      continue;
+    // Include a partially overlapping horizontal line only if the
+    // extra ColPartitions that will be included due to expansion
+    // have large side spacing w.r.t. columns containing them.
+    if (HLineBelongsToTable(*part, table_box))
+      *result_box = result_box->bounding_union(part_box);
+    // TODO(nbeato): Vertical
+  }
+}
+
+// Checks whether the horizontal line belong to the table by looking at the
+// side spacing of extra ColParitions that will be included in the table
+// due to expansion
+bool TableFinder::HLineBelongsToTable(const ColPartition& part,
+                                      const TBOX& table_box) {
+  if (!part.IsHorizontalLine())
+    return false;
+  const TBOX& part_box = part.bounding_box();
+  if (!part_box.major_x_overlap(table_box))
+    return false;
+  // Do not consider top-most horizontal line since it usually
+  // originates from noise.
+  // TODO(nbeato): I had to comment this out because the ruling grid doesn't
+  // have neighbors solved.
+  // if (!part.nearest_neighbor_above())
+  //   return false;
+  const TBOX bbox = part_box.bounding_union(table_box);
+  // In the "unioned table" box (the table extents expanded by the line),
+  // keep track of how many partitions have significant padding to the left
+  // and right. If more than half of the partitions covered by the new table
+  // have significant spacing, the line belongs to the table and the table
+  // grows to include all of the partitions.
+  int num_extra_partitions = 0;
+  int extra_space_to_right = 0;
+  int extra_space_to_left = 0;
+  // Rulings are in a different grid, so search 2 grids for rulings, text,
+  // and table partitions that are introduced by the new box.
+  for (int i = 0; i < 2; ++i) {
+    ColPartitionGrid* grid = (i == 0) ? &clean_part_grid_ :
+                                        &leader_and_ruling_grid_;
+    // Start a rect search on bbox
+    ColPartitionGridSearch rectsearch(grid);
+    rectsearch.SetUniqueMode(true);
+    rectsearch.StartRectSearch(bbox);
+    ColPartition* extra_part = nullptr;
+    while ((extra_part = rectsearch.NextRectSearch()) != nullptr) {
+      // ColPartition already in table
+      const TBOX& extra_part_box = extra_part->bounding_box();
+      if (extra_part_box.overlap_fraction(table_box) > kMinOverlapWithTable)
+        continue;
+      // Non-text ColPartitions do not contribute
+      if (extra_part->IsImageType())
+        continue;
+      // Consider this partition.
+      num_extra_partitions++;
+      // presence of a table cell is a strong hint, so just increment the scores
+      // without looking at the spacing.
+      if (extra_part->type() == PT_TABLE || extra_part->IsLineType()) {
+        extra_space_to_right++;
+        extra_space_to_left++;
+        continue;
+      }
+      int space_threshold = kSideSpaceMargin * part.median_height();
+      if (extra_part->space_to_right() > space_threshold)
+        extra_space_to_right++;
+      if (extra_part->space_to_left() > space_threshold)
+        extra_space_to_left++;
+    }
+  }
+  // tprintf("%d %d %d\n",
+  // num_extra_partitions,extra_space_to_right,extra_space_to_left);
+  return (extra_space_to_right > num_extra_partitions / 2) ||
+      (extra_space_to_left > num_extra_partitions / 2);
+}
+
+// Look for isolated column headers above the given table box and
+// include them in the table
+void TableFinder::IncludeLeftOutColumnHeaders(TBOX* table_box) {
+  // Start a search above the current table to look for column headers
+  ColPartitionGridSearch vsearch(&clean_part_grid_);
+  vsearch.StartVerticalSearch(table_box->left(), table_box->right(),
+                              table_box->top());
+  ColPartition* neighbor = nullptr;
+  ColPartition* previous_neighbor = nullptr;
+  while ((neighbor = vsearch.NextVerticalSearch(false)) != nullptr) {
+    // Max distance to find a table heading.
+    const int max_distance = kMaxColumnHeaderDistance *
+                             neighbor->median_height();
+    int table_top = table_box->top();
+    const TBOX& box = neighbor->bounding_box();
+    // Do not continue if the next box is way above
+    if (box.bottom() - table_top > max_distance)
+      break;
+    // Unconditionally include partitions of type TABLE or LINE
+    // TODO(faisal): add some reasonable conditions here
+    if (neighbor->type() == PT_TABLE || neighbor->IsLineType()) {
+      table_box->set_top(box.top());
+      previous_neighbor = nullptr;
+      continue;
+    }
+    // If there are two text partitions, one above the other, without a table
+    // cell on their left or right side, consider them a barrier and quit
+    if (previous_neighbor == nullptr) {
+      previous_neighbor = neighbor;
+    } else {
+      const TBOX& previous_box = previous_neighbor->bounding_box();
+      if (!box.major_y_overlap(previous_box))
+        break;
+    }
+  }
+}
+
+// Remove false alarms consisting of a single column based on their
+// projection on the x-axis. Projection of a real table on the x-axis
+// should have at least one zero-valley larger than the global median
+// x-height of the page.
+void TableFinder::DeleteSingleColumnTables() {
+  int page_width = tright().x() - bleft().x();
+  ASSERT_HOST(page_width > 0);
+  // create an integer array to hold projection on x-axis
+  int* table_xprojection = new int[page_width];
+  // Iterate through all tables in the table grid
+  GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT>
+      table_search(&table_grid_);
+  table_search.StartFullSearch();
+  ColSegment* table;
+  while ((table = table_search.NextFullSearch()) != nullptr) {
+    TBOX table_box = table->bounding_box();
+    // reset the projection array
+    for (int i = 0; i < page_width; i++) {
+      table_xprojection[i] = 0;
+    }
+    // Start a rect search on table_box
+    GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
+        rectsearch(&clean_part_grid_);
+    rectsearch.SetUniqueMode(true);
+    rectsearch.StartRectSearch(table_box);
+    ColPartition* part;
+    while ((part = rectsearch.NextRectSearch()) != nullptr) {
+      if (!part->IsTextType())
+        continue;  // Do not consider non-text partitions
+      if (part->flow() == BTFT_LEADER)
+        continue;  // Assume leaders are in tables
+      TBOX part_box = part->bounding_box();
+      // Do not consider partitions partially covered by the table
+      if (part_box.overlap_fraction(table_box) < kMinOverlapWithTable)
+        continue;
+      BLOBNBOX_CLIST* part_boxes = part->boxes();
+      BLOBNBOX_C_IT pit(part_boxes);
+
+      // Make sure overlapping blobs don't artificially inflate the number
+      // of rows in the table. This happens frequently with things such as
+      // decimals and split characters. Do this by assuming the column
+      // partition is sorted mostly left to right and just clip
+      // bounding boxes by the previous box's extent.
+      int next_position_to_write = 0;
+
+      for (pit.mark_cycle_pt(); !pit.cycled_list(); pit.forward()) {
+        BLOBNBOX *pblob = pit.data();
+        // ignore blob height for the purpose of projection since we
+        // are only interested in finding valleys
+        int xstart = pblob->bounding_box().left();
+        int xend = pblob->bounding_box().right();
+
+        xstart = std::max(xstart, next_position_to_write);
+        for (int i = xstart; i < xend; i++)
+          table_xprojection[i - bleft().x()]++;
+        next_position_to_write = xend;
+      }
+    }
+    // Find largest valley between two reasonable peaks in the table
+    if (!GapInXProjection(table_xprojection, page_width)) {
+      table_search.RemoveBBox();
+      delete table;
+    }
+  }
+  delete[] table_xprojection;
+}
+
+// Return true if at least one gap larger than the global x-height
+// exists in the horizontal projection
+bool TableFinder::GapInXProjection(int* xprojection, int length) {
+  // Find peak value of the histogram
+  int peak_value = 0;
+  for (int i = 0; i < length; i++) {
+    if (xprojection[i] > peak_value) {
+      peak_value = xprojection[i];
+    }
+  }
+  // Peak value represents the maximum number of horizontally
+  // overlapping colpartitions, so this can be considered as the
+  // number of rows in the table
+  if (peak_value < kMinRowsInTable)
+    return false;
+  double projection_threshold = kSmallTableProjectionThreshold * peak_value;
+  if (peak_value >= kLargeTableRowCount)
+    projection_threshold = kLargeTableProjectionThreshold * peak_value;
+  // Threshold the histogram
+  for (int i = 0; i < length; i++) {
+    xprojection[i] = (xprojection[i] >= projection_threshold) ? 1 : 0;
+  }
+  // Find the largest run of zeros between two ones
+  int largest_gap = 0;
+  int run_start = -1;
+  for (int i = 1; i < length; i++) {
+    // detect start of a run of zeros
+    if (xprojection[i - 1] && !xprojection[i]) {
+      run_start = i;
+    }
+    // detect end of a run of zeros and update the value of largest gap
+    if (run_start != -1 && !xprojection[i - 1] && xprojection[i]) {
+      int gap = i - run_start;
+      if (gap > largest_gap)
+        largest_gap = gap;
+      run_start = -1;
+    }
+  }
+  return largest_gap > kMaxXProjectionGapFactor * global_median_xheight_;
+}
+
+// Given the location of a table "guess", try to overlay a cellular
+// grid in the location, adjusting the boundaries.
+// TODO(nbeato): Falsely introduces:
+//   -headers/footers (not any worse, too much overlap destroys cells)
+//   -page numbers (not worse, included because maximize margins)
+//   -equations (nicely fit into a celluar grid, but more sparsely)
+//   -figures (random text box, also sparse)
+//   -small left-aligned text areas with overlapping positioned whitespace
+//       (rejected before)
+// Overall, this just needs some more work.
+void TableFinder::RecognizeTables() {
+  ScrollView* table_win = nullptr;
+#ifndef GRAPHICS_DISABLED
+  if (textord_show_tables) {
+    table_win = MakeWindow(0, 0, "Table Structure");
+    DisplayColPartitions(table_win, &fragmented_text_grid_,
+                         ScrollView::BLUE, ScrollView::LIGHT_BLUE);
+    // table_grid_.DisplayBoxes(table_win);
+  }
+#endif
+
+  TableRecognizer recognizer;
+  recognizer.Init();
+  recognizer.set_line_grid(&leader_and_ruling_grid_);
+  recognizer.set_text_grid(&fragmented_text_grid_);
+  recognizer.set_max_text_height(global_median_xheight_ * 2.0);
+  recognizer.set_min_height(1.5 * gridheight());
+  // Loop over all of the tables and try to fit them.
+  // Store the good tables here.
+  ColSegment_CLIST good_tables;
+  ColSegment_C_IT good_it(&good_tables);
+
+  ColSegmentGridSearch gsearch(&table_grid_);
+  gsearch.StartFullSearch();
+  ColSegment* found_table = nullptr;
+  while ((found_table = gsearch.NextFullSearch()) != nullptr) {
+    gsearch.RemoveBBox();
+
+    // The goal is to make the tables persistent in a list.
+    // When that happens, this will move into the search loop.
+    const TBOX& found_box = found_table->bounding_box();
+    StructuredTable* table_structure = recognizer.RecognizeTable(found_box);
+
+    // Process a table. Good tables are inserted into the grid again later on
+    // We can't change boxes in the grid while it is running a search.
+    if (table_structure != nullptr) {
+#ifndef GRAPHICS_DISABLED
+      if (textord_show_tables) {
+        table_structure->Display(table_win, ScrollView::LIME_GREEN);
+      }
+#endif
+      found_table->set_bounding_box(table_structure->bounding_box());
+      delete table_structure;
+      good_it.add_after_then_move(found_table);
+    } else {
+      delete found_table;
+    }
+  }
+  // TODO(nbeato): MERGE!! There is awesome info now available for merging.
+
+  // At this point, the grid is empty. We can safely insert the good tables
+  // back into grid.
+  for (good_it.mark_cycle_pt(); !good_it.cycled_list(); good_it.forward())
+    table_grid_.InsertBBox(true, true, good_it.extract());
+}
+
+#ifndef GRAPHICS_DISABLED
+
+// Displays the column segments in some window.
+void TableFinder::DisplayColSegments(ScrollView* win,
+                                     ColSegment_LIST *segments,
+                                     ScrollView::Color color) {
+  win->Pen(color);
+  win->Brush(ScrollView::NONE);
+  ColSegment_IT it(segments);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    ColSegment* col = it.data();
+    const TBOX& box = col->bounding_box();
+    int left_x = box.left();
+    int right_x = box.right();
+    int top_y = box.top();
+    int bottom_y = box.bottom();
+    win->Rectangle(left_x, bottom_y, right_x, top_y);
+  }
+  win->UpdateWindow();
+}
+
+// Displays the colpartitions using a new coloring on an existing window.
+// Note: This method is only for debug purpose during development and
+// would not be part of checked in code
+void TableFinder::DisplayColPartitions(ScrollView* win,
+                                       ColPartitionGrid* grid,
+                                       ScrollView::Color default_color,
+                                       ScrollView::Color table_color) {
+  ScrollView::Color color = default_color;
+  // Iterate the ColPartitions in the grid.
+  GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
+    gsearch(grid);
+  gsearch.StartFullSearch();
+  ColPartition* part = nullptr;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    color = default_color;
+    if (part->type() == PT_TABLE)
+      color = table_color;
+
+    const TBOX& box = part->bounding_box();
+    int left_x = box.left();
+    int right_x = box.right();
+    int top_y = box.top();
+    int bottom_y = box.bottom();
+    win->Brush(ScrollView::NONE);
+    win->Pen(color);
+    win->Rectangle(left_x, bottom_y, right_x, top_y);
+  }
+  win->UpdateWindow();
+}
+
+void TableFinder::DisplayColPartitions(ScrollView* win,
+                                       ColPartitionGrid* grid,
+                                       ScrollView::Color default_color) {
+  DisplayColPartitions(win, grid, default_color, ScrollView::YELLOW);
+}
+
+void TableFinder::DisplayColPartitionConnections(
+                     ScrollView* win,
+                     ColPartitionGrid* grid,
+                     ScrollView::Color color) {
+  // Iterate the ColPartitions in the grid.
+  GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
+    gsearch(grid);
+  gsearch.StartFullSearch();
+  ColPartition* part = nullptr;
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    const TBOX& box = part->bounding_box();
+    int left_x = box.left();
+    int right_x = box.right();
+    int top_y = box.top();
+    int bottom_y = box.bottom();
+
+    ColPartition* upper_part = part->nearest_neighbor_above();
+    if (upper_part) {
+      const TBOX& upper_box = upper_part->bounding_box();
+      int mid_x = (left_x + right_x) / 2;
+      int mid_y = (top_y + bottom_y) / 2;
+      int other_x = (upper_box.left() + upper_box.right()) / 2;
+      int other_y = (upper_box.top() + upper_box.bottom()) / 2;
+      win->Brush(ScrollView::NONE);
+      win->Pen(color);
+      win->Line(mid_x, mid_y, other_x, other_y);
+    }
+    ColPartition* lower_part = part->nearest_neighbor_below();
+    if (lower_part) {
+      const TBOX& lower_box = lower_part->bounding_box();
+      int mid_x = (left_x + right_x) / 2;
+      int mid_y = (top_y + bottom_y) / 2;
+      int other_x = (lower_box.left() + lower_box.right()) / 2;
+      int other_y = (lower_box.top() + lower_box.bottom()) / 2;
+      win->Brush(ScrollView::NONE);
+      win->Pen(color);
+      win->Line(mid_x, mid_y, other_x, other_y);
+    }
+  }
+  win->UpdateWindow();
+}
+
+#endif
+
+// Merge all colpartitions in table regions to make them a single
+// colpartition and revert types of isolated table cells not
+// assigned to any table to their original types.
+void TableFinder::MakeTableBlocks(ColPartitionGrid* grid,
+                                  ColPartitionSet** all_columns,
+                                  WidthCallback width_cb) {
+  // Since we have table blocks already, remove table tags from all
+  // colpartitions
+  GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
+    gsearch(grid);
+  gsearch.StartFullSearch();
+  ColPartition* part = nullptr;
+
+  while ((part = gsearch.NextFullSearch()) != nullptr) {
+    if (part->type() == PT_TABLE) {
+      part->clear_table_type();
+    }
+  }
+  // Now make a single colpartition out of each table block and remove
+  // all colpartitions contained within a table
+  GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT>
+      table_search(&table_grid_);
+  table_search.StartFullSearch();
+  ColSegment* table;
+  while ((table = table_search.NextFullSearch()) != nullptr) {
+    const TBOX& table_box = table->bounding_box();
+    // Start a rect search on table_box
+    GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
+        rectsearch(grid);
+    rectsearch.StartRectSearch(table_box);
+    ColPartition* part;
+    ColPartition* table_partition = nullptr;
+    while ((part = rectsearch.NextRectSearch()) != nullptr) {
+     // Do not consider image partitions
+      if (!part->IsTextType())
+        continue;
+      TBOX part_box = part->bounding_box();
+      // Include partition in the table if more than half of it
+      // is covered by the table
+      if (part_box.overlap_fraction(table_box) > kMinOverlapWithTable) {
+        rectsearch.RemoveBBox();
+        if (table_partition) {
+          table_partition->Absorb(part, width_cb);
+        } else {
+          table_partition = part;
+        }
+      }
+    }
+    // Insert table colpartition back to part_grid_
+    if (table_partition) {
+      // To match the columns used when transforming to blocks, the new table
+      // partition must have its first and last column set at the grid y that
+      // corresponds to its bottom.
+      const TBOX& table_box = table_partition->bounding_box();
+      int grid_x, grid_y;
+      grid->GridCoords(table_box.left(), table_box.bottom(), &grid_x, &grid_y);
+      table_partition->SetPartitionType(resolution_, all_columns[grid_y]);
+      table_partition->set_table_type();
+      table_partition->set_blob_type(BRT_TEXT);
+      table_partition->set_flow(BTFT_CHAIN);
+      table_partition->SetBlobTypes();
+      grid->InsertBBox(true, true, table_partition);
+    }
+  }
+}
+
+//////// ColSegment code
+////////
+ColSegment::ColSegment()
+    : ELIST_LINK(),
+      num_table_cells_(0),
+      num_text_cells_(0),
+      type_(COL_UNKNOWN) {
+}
+
+// Provides a color for BBGrid to draw the rectangle.
+ScrollView::Color  ColSegment::BoxColor() const {
+  const ScrollView::Color kBoxColors[PT_COUNT] = {
+    ScrollView::YELLOW,
+    ScrollView::BLUE,
+    ScrollView::YELLOW,
+    ScrollView::MAGENTA,
+  };
+  return kBoxColors[type_];
+}
+
+// Insert a box into this column segment
+void ColSegment::InsertBox(const TBOX& other) {
+  bounding_box_ = bounding_box_.bounding_union(other);
+}
+
+// Set column segment type based on the ratio of text and table partitions
+// in it.
+void ColSegment::set_type() {
+  if (num_table_cells_ > kTableColumnThreshold * num_text_cells_)
+    type_ = COL_TABLE;
+  else if (num_text_cells_ > num_table_cells_)
+    type_ = COL_TEXT;
+  else
+    type_ = COL_MIXED;
+}
+
+}  // namespace tesseract.
diff --git a/tesseract/src/textord/tablefind.h b/tesseract/src/textord/tablefind.h
new file mode 100644
index 00000000..dc6ff932
--- /dev/null
+++ b/tesseract/src/textord/tablefind.h
@@ -0,0 +1,427 @@
+///////////////////////////////////////////////////////////////////////
+// File:        tablefind.h
+// Description: Helper classes to find tables from ColPartitions.
+// Author:      Faisal Shafait (faisal.shafait@dfki.de)
+//
+// (C) Copyright 2009, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TEXTORD_TABLEFIND_H_
+#define TESSERACT_TEXTORD_TABLEFIND_H_
+
+#include "colpartitiongrid.h"
+#include "elst.h"
+#include "rect.h"
+
+namespace tesseract {
+
+// Possible types for a column segment.
+enum ColSegType {
+  COL_UNKNOWN,
+  COL_TEXT,
+  COL_TABLE,
+  COL_MIXED,
+  COL_COUNT
+};
+
+class ColPartitionSet;
+
+// ColSegment holds rectangular blocks that represent segmentation of a page
+// into regions containing single column text/table.
+class ColSegment;
+ELISTIZEH(ColSegment)
+CLISTIZEH(ColSegment)
+
+class ColSegment : public ELIST_LINK {
+ public:
+  ColSegment();
+  ~ColSegment() = default;
+
+  // Simple accessors and mutators
+  const TBOX& bounding_box() const {
+    return bounding_box_;
+  }
+
+  void set_top(int y) {
+    bounding_box_.set_top(y);
+  }
+
+  void set_bottom(int y) {
+    bounding_box_.set_bottom(y);
+  }
+
+  void set_left(int x) {
+    bounding_box_.set_left(x);
+  }
+
+  void set_right(int x) {
+    bounding_box_.set_right(x);
+  }
+
+  void set_bounding_box(const TBOX& other) {
+    bounding_box_ = other;
+  }
+
+  int get_num_table_cells() const {
+    return num_table_cells_;
+  }
+
+  // set the number of table colpartitions covered by the bounding_box_
+  void set_num_table_cells(int n) {
+    num_table_cells_ = n;
+  }
+
+  int get_num_text_cells() const {
+    return num_text_cells_;
+  }
+
+  // set the number of text colpartitions covered by the bounding_box_
+  void set_num_text_cells(int n) {
+    num_text_cells_ = n;
+  }
+
+  ColSegType type() const {
+    return type_;
+  }
+
+  // set the type of the block based on the ratio of table to text
+  // colpartitions covered by it.
+  void set_type();
+
+  // Provides a color for BBGrid to draw the rectangle.
+  ScrollView::Color  BoxColor() const;
+
+  // Insert a rectangle into bounding_box_
+  void InsertBox(const TBOX& other);
+
+ private:
+  TBOX bounding_box_;                    // bounding box
+  int num_table_cells_;
+  int num_text_cells_;
+  ColSegType type_;
+};
+
+// Typedef BBGrid of ColSegments
+using ColSegmentGrid = BBGrid<ColSegment,
+               ColSegment_CLIST,
+               ColSegment_C_IT>;
+using ColSegmentGridSearch = GridSearch<ColSegment,
+                   ColSegment_CLIST,
+                   ColSegment_C_IT>;
+
+// TableFinder is a utility class to find a set of tables given a set of
+// ColPartitions and Columns. The TableFinder will mark candidate ColPartitions
+// based on research in "Table Detection in Heterogeneous Documents".
+// Usage flow is as follows:
+//   TableFinder finder;
+//   finder.InsertCleanPartitions(/* grid info */)
+//   finder.LocateTables(/* ColPartitions and Columns */);
+//   finder.Update TODO(nbeato)
+class TESS_API TableFinder {
+ public:
+  // Constructor is simple initializations
+  TableFinder();
+  ~TableFinder();
+
+  // Set the resolution of the connected components in ppi.
+  void set_resolution(int resolution) {
+    resolution_ = resolution;
+  }
+  // Change the reading order. Initially it is left to right.
+  void set_left_to_right_language(bool order);
+
+  // Initialize
+  void Init(int grid_size, const ICOORD& bottom_left, const ICOORD& top_right);
+
+  // Copy cleaned partitions from ColumnFinder's part_grid_ to this
+  // clean_part_grid_ and insert dot-like noise into period_grid_.
+  // It resizes the grids in this object to the dimensions of grid.
+  void InsertCleanPartitions(ColPartitionGrid* grid, TO_BLOCK* block);
+
+  // High level function to perform table detection
+  // Finds tables and updates the grid object with new partitions for the
+  // tables. The columns and width callbacks are used to merge tables.
+  // The reskew argument is only used to write the tables to the out.png
+  // if that feature is enabled.
+  void LocateTables(ColPartitionGrid* grid,
+                    ColPartitionSet** columns,
+                    WidthCallback width_cb,
+                    const FCOORD& reskew);
+
+ protected:
+  // Access for the grid dimensions.
+  // The results will not be correct until InsertCleanPartitions
+  // has been called. The values are taken from the grid passed as an argument
+  // to that function.
+  int gridsize() const;
+  int gridwidth() const;
+  int gridheight() const;
+  const ICOORD& bleft() const;
+  const ICOORD& tright() const;
+
+  // Makes a window for debugging, see BBGrid
+  ScrollView* MakeWindow(int x, int y, const char* window_name);
+
+  //////// Functions to insert objects from the grid into the table finder.
+  //////// In all cases, ownership is transferred to the table finder.
+  // Inserts text into the table finder.
+  void InsertTextPartition(ColPartition* part);
+  void InsertFragmentedTextPartition(ColPartition* part);
+  void InsertLeaderPartition(ColPartition* part);
+  void InsertRulingPartition(ColPartition* part);
+  void InsertImagePartition(ColPartition* part);
+  void SplitAndInsertFragmentedTextPartition(ColPartition* part);
+  bool AllowTextPartition(const ColPartition& part) const;
+  bool AllowBlob(const BLOBNBOX& blob) const;
+
+  //////// Functions that manipulate ColPartitions in the part_grid_ /////
+  //////// to find tables.
+  ////////
+
+  // Utility function to move segments to col_seg_grid
+  // Note: Move includes ownership,
+  // so segments will be be owned by col_seg_grid
+  void MoveColSegmentsToGrid(ColSegment_LIST* segments,
+                             ColSegmentGrid* col_seg_grid);
+
+  //////// Set up code to run during table detection to correctly
+  //////// initialize variables on column partitions that are used later.
+  ////////
+
+  // Initialize the grid and partitions
+  void InitializePartitions(ColPartitionSet** all_columns);
+
+  // Set left, right and top, bottom spacings of each colpartition.
+  // Left/right spacings are w.r.t the column boundaries
+  // Top/bottom spacings are w.r.t. previous and next colpartitions
+  static void SetPartitionSpacings(ColPartitionGrid* grid,
+                                   ColPartitionSet** all_columns);
+
+  // Set spacing and closest neighbors above and below a given colpartition.
+  void SetVerticalSpacing(ColPartition* part);
+
+  // Set global spacing estimates. This function is dependent on the
+  // partition spacings. So make sure SetPartitionSpacings is called
+  // on the same grid before this.
+  void SetGlobalSpacings(ColPartitionGrid* grid);
+  // Access to the global median xheight. The xheight is the height
+  // of a lowercase 'x' character on the page. This can be viewed as the
+  // average height of a lowercase letter in a textline. As a result
+  // it is used to make assumptions about spacing between words and
+  // table cells.
+  void set_global_median_xheight(int xheight);
+  // Access to the global median blob width. The width is useful
+  // when deciding if a partition is noise.
+  void set_global_median_blob_width(int width);
+  // Access to the global median ledding. The ledding is the distance between
+  // two adjacent text lines. This value can be used to get a rough estimate
+  // for the amount of space between two lines of text. As a result, it
+  // is used to calculate appropriate spacing between adjacent rows of text.
+  void set_global_median_ledding(int ledding);
+
+  // Updates the nearest neighbors for each ColPartition in clean_part_grid_.
+  // The neighbors are most likely SingletonPartner calls after the neighbors
+  // are assigned. This is hear until it is decided to remove the
+  // nearest_neighbor code in ColPartition
+  void FindNeighbors();
+
+  //////// Functions to mark candidate column partitions as tables.
+  //////// Tables are marked as described in
+  ////////   Table Detection in Heterogeneous Documents (2010, Shafait & Smith)
+  ////////
+
+  // High level function to mark partitions as table rows/cells.
+  // When this function is done, the column partitions in clean_part_grid_
+  // should mostly be marked as tables.
+  void MarkTablePartitions();
+  // Marks partitions given a local view of a single partition
+  void MarkPartitionsUsingLocalInformation();
+  /////// Heuristics for local marking
+  // Check if the partition has at least one large gap between words or no
+  // significant gap at all
+  // TODO(nbeato): Make const, prevented because blobnbox array access
+  bool HasWideOrNoInterWordGap(ColPartition* part) const;
+  // Checks if a partition is adjacent to leaders on the page
+  bool HasLeaderAdjacent(const ColPartition& part);
+  // Filter individual text partitions marked as table partitions
+  // consisting of paragraph endings, small section headings, and
+  // headers and footers.
+  void FilterFalseAlarms();
+  void FilterParagraphEndings();
+  void FilterHeaderAndFooter();
+  // Mark all ColPartitions as table cells that have a table cell above
+  // and below them
+  void SmoothTablePartitionRuns();
+
+  //////// Functions to create bounding boxes (ColSegment) objects for
+  //////// the columns on the page. The columns are not necessarily
+  //////// vertical lines, meaning if tab stops strongly suggests that
+  //////// a column changes horizontal position, as in the case below,
+  //////// The ColSegment objects will respect that after processing.
+  ////////
+  ////////     _____________
+  //////// Ex. |     |      |
+  ////////     |_____|______|  5 boxes: 2 on this line
+  ////////     |   |    |   |           3 on this line
+  ////////     |___|____|___|
+  ////////
+
+  // Get Column segments from best_columns_
+  void GetColumnBlocks(ColPartitionSet** columns,
+                       ColSegment_LIST *col_segments);
+
+  // Group Column segments into consecutive single column regions.
+  void GroupColumnBlocks(ColSegment_LIST *current_segments,
+                        ColSegment_LIST *col_segments);
+
+  // Check if two boxes are consecutive within the same column
+  bool ConsecutiveBoxes(const TBOX &b1, const TBOX &b2);
+
+  // Set the ratio of candidate table partitions in each column
+  void SetColumnsType(ColSegment_LIST* col_segments);
+
+  // Merge Column Blocks that were split due to the presence of a table
+  void GridMergeColumnBlocks();
+
+  //////// Functions to turn marked ColPartitions into candidate tables
+  //////// using a modified T-Recs++ algorithm described in
+  ////////   Applying The T-Recs Table Recognition System
+  ////////   To The Business Letter Domain (2001, Kieninger & Dengel)
+  ////////
+
+  // Merge partititons cells into table columns
+  // Differs from paper by just looking at marked table partitions
+  // instead of similarity metric.
+  // Modified section 4.1 of paper.
+  void GetTableColumns(ColSegment_LIST *table_columns);
+
+  // Finds regions within a column that potentially contain a table.
+  // Ie, the table columns from GetTableColumns are turned into boxes
+  // that span the entire page column (using ColumnBlocks found in
+  // earlier functions) in the x direction and the min/max extent of
+  // overlapping table columns in the y direction.
+  // Section 4.2 of paper.
+  void GetTableRegions(ColSegment_LIST *table_columns,
+                       ColSegment_LIST *table_regions);
+
+
+  //////// Functions to "patch up" found tables
+  ////////
+
+  // Merge table regions corresponding to tables spanning multiple columns
+  void GridMergeTableRegions();
+  bool BelongToOneTable(const TBOX &box1, const TBOX &box2);
+
+  // Adjust table boundaries by building a tight bounding box around all
+  // ColPartitions contained in it.
+  void AdjustTableBoundaries();
+
+  // Grows a table to include partitions that are partially covered
+  // by the table. This includes lines and text. It does not include
+  // noise or images.
+  // On entry, result_box is the minimum size of the result. The results of the
+  // function will union the actual result with result_box.
+  void GrowTableBox(const TBOX& table_box, TBOX* result_box);
+  // Grow a table by increasing the size of the box to include
+  // partitions with significant overlap with the table.
+  void GrowTableToIncludePartials(const TBOX& table_box,
+                                  const TBOX& search_range,
+                                  TBOX* result_box);
+  // Grow a table by expanding to the extents of significantly
+  // overlapping lines.
+  void GrowTableToIncludeLines(const TBOX& table_box, const TBOX& search_range,
+                               TBOX* result_box);
+  // Checks whether the horizontal line belong to the table by looking at the
+  // side spacing of extra ColParitions that will be included in the table
+  // due to expansion
+  bool HLineBelongsToTable(const ColPartition& part, const TBOX& table_box);
+
+  // Look for isolated column headers above the given table box and
+  // include them in the table
+  void IncludeLeftOutColumnHeaders(TBOX* table_box);
+
+  // Remove false alarms consisting of a single column
+  void DeleteSingleColumnTables();
+
+  // Return true if at least one gap larger than the global x-height
+  // exists in the horizontal projection
+  bool GapInXProjection(int* xprojection, int length);
+
+  //////// Recognize the tables.
+  ////////
+  // This function will run the table recognizer and try to find better
+  // bounding boxes. The structures of the tables never leave this function
+  // right now. It just tries to prune and merge tables based on info it
+  // has available.
+  void RecognizeTables();
+
+  //////// Debugging functions. Render different structures to GUI
+  //////// for visual debugging / intuition.
+  ////////
+
+  // Displays Colpartitions marked as table row. Overlays them on top of
+  // part_grid_.
+  void DisplayColSegments(ScrollView* win, ColSegment_LIST *cols,
+                          ScrollView::Color color);
+
+  // Displays the colpartitions using a new coloring on an existing window.
+  // Note: This method is only for debug purpose during development and
+  // would not be part of checked in code
+  void DisplayColPartitions(ScrollView* win, ColPartitionGrid* grid,
+                            ScrollView::Color text_color,
+                            ScrollView::Color table_color);
+  void DisplayColPartitions(ScrollView* win, ColPartitionGrid* grid,
+                            ScrollView::Color default_color);
+  void DisplayColPartitionConnections(ScrollView* win,
+                                      ColPartitionGrid* grid,
+                                      ScrollView::Color default_color);
+
+  // Merge all colpartitions in table regions to make them a single
+  // colpartition and revert types of isolated table cells not
+  // assigned to any table to their original types.
+  void MakeTableBlocks(ColPartitionGrid* grid,
+                       ColPartitionSet** columns,
+                       WidthCallback width_cb);
+
+  /////////////////////////////////////////////////
+  // Useful objects used during table find process.
+  /////////////////////////////////////////////////
+  // Resolution of the connected components in ppi.
+  int resolution_;
+  // Estimate of median x-height over the page
+  int global_median_xheight_;
+  // Estimate of the median blob width on the page
+  int global_median_blob_width_;
+  // Estimate of median leading on the page
+  int global_median_ledding_;
+  // Grid to hold cleaned colpartitions after removing all
+  // colpartitions that consist of only noise blobs, and removing
+  // noise blobs from remaining colpartitions.
+  ColPartitionGrid clean_part_grid_;
+  // Grid contains the leaders and ruling lines.
+  ColPartitionGrid leader_and_ruling_grid_;
+  // Grid contains the broken down column partitions. It can be thought
+  // of as a "word" grid. However, it usually doesn't break apart text lines.
+  // It does break apart table data (most of the time).
+  ColPartitionGrid fragmented_text_grid_;
+  // Grid of page column blocks
+  ColSegmentGrid col_seg_grid_;
+  // Grid of detected tables
+  ColSegmentGrid table_grid_;
+  // The reading order of text. Defaults to true, for languages such as English.
+  bool left_to_right_language_;
+};
+
+}  // namespace tesseract.
+
+#endif  // TESSERACT_TEXTORD_TABLEFIND_H_
diff --git a/tesseract/src/textord/tablerecog.cpp b/tesseract/src/textord/tablerecog.cpp
new file mode 100644
index 00000000..af565891
--- /dev/null
+++ b/tesseract/src/textord/tablerecog.cpp
@@ -0,0 +1,1067 @@
+///////////////////////////////////////////////////////////////////////
+// File:        tablerecog.cpp
+// Description: Helper class to help structure table areas. Given an bounding
+//              box from TableFinder, the TableRecognizer should give a
+//              StructuredTable (maybe a list in the future) of "good" tables
+//              in that area.
+// Author:      Nicholas Beato
+// Created:     Friday, Aug. 20, 2010
+//
+// (C) Copyright 2009, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "tablerecog.h"
+
+#include <algorithm>
+
+namespace tesseract {
+
+// The amount of space required between the ColPartitions in 2 columns
+// of a non-lined table as a multiple of the median width.
+const double kHorizontalSpacing = 0.30;
+// The amount of space required between the ColPartitions in 2 rows
+// of a non-lined table as multiples of the median height.
+const double kVerticalSpacing = -0.2;
+// The number of cells that the grid lines may intersect.
+// See FindCellSplitLocations for explanation.
+const int kCellSplitRowThreshold = 0;
+const int kCellSplitColumnThreshold = 0;
+// For "lined tables", the number of required lines. Currently a guess.
+const int kLinedTableMinVerticalLines = 3;
+const int kLinedTableMinHorizontalLines = 3;
+// Number of columns required, as a fraction of the most columns found.
+// None of these are tweaked at all.
+const double kRequiredColumns = 0.7;
+// The tolerance for comparing margins of potential tables.
+const double kMarginFactor = 1.1;
+// The first and last row should be consistent cell height.
+// This factor is the first and last row cell height max.
+const double kMaxRowSize = 2.5;
+// Number of filled columns required to form a strong table row.
+// For small tables, this is an absolute number.
+const double kGoodRowNumberOfColumnsSmall[] = { 2, 2, 2, 2, 2, 3, 3 };
+const int kGoodRowNumberOfColumnsSmallSize =
+    sizeof(kGoodRowNumberOfColumnsSmall) / sizeof(double) - 1;
+// For large tables, it is a relative number
+const double kGoodRowNumberOfColumnsLarge = 0.7;
+// The amount of area that must be covered in a cell by ColPartitions to
+// be considered "filled"
+const double kMinFilledArea = 0.35;
+
+////////
+//////// StructuredTable Class
+////////
+
+StructuredTable::StructuredTable()
+    : text_grid_(nullptr),
+      line_grid_(nullptr),
+      is_lined_(false),
+      space_above_(0),
+      space_below_(0),
+      space_left_(0),
+      space_right_(0),
+      median_cell_height_(0),
+      median_cell_width_(0),
+      max_text_height_(INT32_MAX) {
+}
+
+void StructuredTable::Init() {
+}
+
+void StructuredTable::set_text_grid(ColPartitionGrid* text_grid) {
+  text_grid_ = text_grid;
+}
+void StructuredTable::set_line_grid(ColPartitionGrid* line_grid) {
+  line_grid_ = line_grid;
+}
+void StructuredTable::set_max_text_height(int height) {
+  max_text_height_ = height;
+}
+bool StructuredTable::is_lined() const {
+  return is_lined_;
+}
+int StructuredTable::row_count() const {
+  return cell_y_.size() == 0 ? 0 : cell_y_.size() - 1;
+}
+int StructuredTable::column_count() const {
+  return cell_x_.size() == 0 ? 0 : cell_x_.size() - 1;
+}
+int StructuredTable::cell_count() const {
+  return row_count() * column_count();
+}
+void StructuredTable::set_bounding_box(const TBOX& box) {
+  bounding_box_ = box;
+}
+const TBOX& StructuredTable::bounding_box() const {
+  return bounding_box_;
+}
+int StructuredTable::median_cell_height() {
+  return median_cell_height_;
+}
+int StructuredTable::median_cell_width() {
+  return median_cell_width_;
+}
+int StructuredTable::row_height(int row) const {
+  ASSERT_HOST(0 <= row && row < row_count());
+  return cell_y_[row + 1] - cell_y_[row];
+}
+int StructuredTable::column_width(int column) const {
+  ASSERT_HOST(0 <= column && column < column_count());
+  return cell_x_[column + 1] - cell_x_[column];
+}
+int StructuredTable::space_above() const {
+  return space_above_;
+}
+int StructuredTable::space_below() const {
+  return space_below_;
+}
+
+// At this point, we know that the lines are contained
+// by the box (by FindLinesBoundingBox).
+// So try to find the cell structure and make sure it works out.
+// The assumption is that all lines span the table. If this
+// assumption fails, the VerifyLinedTable method will
+// abort the lined table. The TableRecognizer will fall
+// back on FindWhitespacedStructure.
+bool StructuredTable::FindLinedStructure() {
+  ClearStructure();
+
+  // Search for all of the lines in the current box.
+  // Update the cellular structure with the exact lines.
+  ColPartitionGridSearch box_search(line_grid_);
+  box_search.SetUniqueMode(true);
+  box_search.StartRectSearch(bounding_box_);
+  ColPartition* line = nullptr;
+
+  while ((line = box_search.NextRectSearch()) != nullptr) {
+    if (line->IsHorizontalLine())
+      cell_y_.push_back(line->MidY());
+    if (line->IsVerticalLine())
+      cell_x_.push_back(line->MidX());
+  }
+
+  // HasSignificantLines should guarantee cells.
+  // Because that code is a different class, just gracefully
+  // return false. This could be an assert.
+  if (cell_x_.size() < 3 || cell_y_.size() < 3)
+    return false;
+
+  cell_x_.sort();
+  cell_y_.sort();
+
+  // Remove duplicates that may have occurred due to split lines.
+  cell_x_.compact_sorted();
+  cell_y_.compact_sorted();
+
+  // The border should be the extents of line boxes, not middle.
+  cell_x_[0] = bounding_box_.left();
+  cell_x_[cell_x_.size() - 1] = bounding_box_.right();
+  cell_y_[0] = bounding_box_.bottom();
+  cell_y_[cell_y_.size() - 1] = bounding_box_.top();
+
+  // Remove duplicates that may have occurred due to moving the borders.
+  cell_x_.compact_sorted();
+  cell_y_.compact_sorted();
+
+  CalculateMargins();
+  CalculateStats();
+  is_lined_ = VerifyLinedTableCells();
+  return is_lined_;
+}
+
+// Finds the cellular structure given a particular box.
+bool StructuredTable::FindWhitespacedStructure() {
+  ClearStructure();
+  FindWhitespacedColumns();
+  FindWhitespacedRows();
+
+  if (!VerifyWhitespacedTable()) {
+    return false;
+  } else {
+    bounding_box_.set_left(cell_x_[0]);
+    bounding_box_.set_right(cell_x_[cell_x_.size() - 1]);
+    bounding_box_.set_bottom(cell_y_[0]);
+    bounding_box_.set_top(cell_y_[cell_y_.size() - 1]);
+    AbsorbNearbyLines();
+    CalculateMargins();
+    CalculateStats();
+    return true;
+  }
+}
+
+// Tests if a partition fits inside the table structure.
+// Partitions must fully span a grid line in order to intersect it.
+// This means that a partition does not intersect a line
+// that it "just" touches. This is mainly because the assumption
+// throughout the code is that "0" distance is a very very small space.
+bool StructuredTable::DoesPartitionFit(const ColPartition& part) const {
+  const TBOX& box = part.bounding_box();
+  for (int i = 0; i < cell_x_.size(); ++i)
+    if (box.left() < cell_x_[i] && cell_x_[i] < box.right())
+      return false;
+  for (int i = 0; i < cell_y_.size(); ++i)
+    if (box.bottom() < cell_y_[i] && cell_y_[i] < box.top())
+      return false;
+  return true;
+}
+
+// Checks if a sub-table has multiple data cells filled.
+int StructuredTable::CountFilledCells() {
+  return CountFilledCells(0, row_count() - 1, 0, column_count() - 1);
+}
+int StructuredTable::CountFilledCellsInRow(int row) {
+  return CountFilledCells(row, row, 0, column_count() - 1);
+}
+int StructuredTable::CountFilledCellsInColumn(int column) {
+  return CountFilledCells(0, row_count() - 1, column, column);
+}
+int StructuredTable::CountFilledCells(int row_start, int row_end,
+                            int column_start, int column_end) {
+  ASSERT_HOST(0 <= row_start && row_start <= row_end && row_end < row_count());
+  ASSERT_HOST(0 <= column_start && column_start <= column_end &&
+              column_end < column_count());
+  int cell_count = 0;
+  TBOX cell_box;
+  for (int row = row_start; row <= row_end; ++row) {
+    cell_box.set_bottom(cell_y_[row]);
+    cell_box.set_top(cell_y_[row + 1]);
+    for (int col = column_start; col <= column_end; ++col) {
+      cell_box.set_left(cell_x_[col]);
+      cell_box.set_right(cell_x_[col + 1]);
+      if (CountPartitions(cell_box) > 0)
+        ++cell_count;
+    }
+  }
+  return cell_count;
+}
+
+// Makes sure that at least one cell in a row has substantial area filled.
+// This can filter out large whitespace caused by growing tables too far
+// and page numbers.
+bool StructuredTable::VerifyRowFilled(int row) {
+  for (int i = 0; i < column_count(); ++i) {
+    double area_filled = CalculateCellFilledPercentage(row, i);
+    if (area_filled >= kMinFilledArea)
+      return true;
+  }
+  return false;
+}
+
+// Finds the filled area in a cell.
+// Assume ColPartitions do not overlap for simplicity (even though they do).
+double StructuredTable::CalculateCellFilledPercentage(int row, int column) {
+  ASSERT_HOST(0 <= row && row <= row_count());
+  ASSERT_HOST(0 <= column && column <= column_count());
+  const TBOX kCellBox(cell_x_[column], cell_y_[row],
+                      cell_x_[column + 1], cell_y_[row + 1]);
+  ASSERT_HOST(!kCellBox.null_box());
+
+  ColPartitionGridSearch gsearch(text_grid_);
+  gsearch.SetUniqueMode(true);
+  gsearch.StartRectSearch(kCellBox);
+  double area_covered = 0;
+  ColPartition* text = nullptr;
+  while ((text = gsearch.NextRectSearch()) != nullptr) {
+    if (text->IsTextType())
+      area_covered += text->bounding_box().intersection(kCellBox).area();
+  }
+  const int32_t current_area = kCellBox.area();
+  if (current_area == 0) {
+    return 1.0;
+  }
+  return std::min(1.0, area_covered / current_area);
+}
+
+#ifndef GRAPHICS_DISABLED
+
+void StructuredTable::Display(ScrollView* window, ScrollView::Color color) {
+  window->Brush(ScrollView::NONE);
+  window->Pen(color);
+  window->Rectangle(bounding_box_.left(), bounding_box_.bottom(),
+                    bounding_box_.right(), bounding_box_.top());
+  for (int i = 0; i < cell_x_.size(); i++) {
+    window->Line(cell_x_[i], bounding_box_.bottom(),
+                 cell_x_[i], bounding_box_.top());
+  }
+  for (int i = 0; i < cell_y_.size(); i++) {
+    window->Line(bounding_box_.left(), cell_y_[i],
+                 bounding_box_.right(), cell_y_[i]);
+  }
+  window->UpdateWindow();
+}
+
+#endif
+
+// Clear structure information.
+void StructuredTable::ClearStructure() {
+  cell_x_.clear();
+  cell_y_.clear();
+  is_lined_ = false;
+  space_above_ = 0;
+  space_below_ = 0;
+  space_left_ = 0;
+  space_right_ = 0;
+  median_cell_height_ = 0;
+  median_cell_width_ = 0;
+}
+
+// When a table has lines, the lines should not intersect any partitions.
+// The following function makes sure the previous assumption is met.
+bool StructuredTable::VerifyLinedTableCells() {
+  // Function only called when lines exist.
+  ASSERT_HOST(cell_y_.size() >= 2 && cell_x_.size() >= 2);
+  for (int i = 0; i < cell_y_.size(); ++i) {
+    if (CountHorizontalIntersections(cell_y_[i]) > 0)
+      return false;
+  }
+  for (int i = 0; i < cell_x_.size(); ++i) {
+    if (CountVerticalIntersections(cell_x_[i]) > 0)
+      return false;
+  }
+  return true;
+}
+
+// TODO(nbeato): Could be much better than this.
+// Examples:
+//   - Caclulate the percentage of filled cells.
+//   - Calculate the average number of ColPartitions per cell.
+//   - Calculate the number of cells per row with partitions.
+//   - Check if ColPartitions in adjacent cells are similar.
+//   - Check that all columns are at least a certain width.
+//   - etc.
+bool StructuredTable::VerifyWhitespacedTable() {
+  // criteria for a table, must be at least 2x3 or 3x2
+  return row_count() >= 2 && column_count() >= 2 && cell_count() >= 6;
+}
+
+// Finds vertical splits in the ColPartitions of text_grid_ by considering
+// all possible "good" guesses. A good guess is just the left/right sides of
+// the partitions, since these locations will uniquely define where the
+// extremal values where the splits can occur. The split happens
+// in the middle of the two nearest partitions.
+void StructuredTable::FindWhitespacedColumns() {
+  // Set of the extents of all partitions on the page.
+  GenericVector<int> left_sides;
+  GenericVector<int> right_sides;
+
+  // Look at each text partition. We want to find the partitions
+  // that have extremal left/right sides. These will give us a basis
+  // for the table columns.
+  ColPartitionGridSearch gsearch(text_grid_);
+  gsearch.SetUniqueMode(true);
+  gsearch.StartRectSearch(bounding_box_);
+  ColPartition* text = nullptr;
+  while ((text = gsearch.NextRectSearch()) != nullptr) {
+    if (!text->IsTextType())
+      continue;
+
+    ASSERT_HOST(text->bounding_box().left() < text->bounding_box().right());
+    int spacing = static_cast<int>(text->median_width() *
+                                   kHorizontalSpacing / 2.0 + 0.5);
+    left_sides.push_back(text->bounding_box().left() - spacing);
+    right_sides.push_back(text->bounding_box().right() + spacing);
+  }
+  // It causes disaster below, so avoid it!
+  if (left_sides.size() == 0 || right_sides.size() == 0)
+    return;
+
+  // Since data may be inserted in grid order, we sort the left/right sides.
+  left_sides.sort();
+  right_sides.sort();
+
+  // At this point, in the "merged list", we expect to have a left side,
+  // followed by either more left sides or a right side. The last number
+  // should be a right side. We find places where the splits occur by looking
+  // for "valleys". If we want to force gap sizes or allow overlap, change
+  // the spacing above. If you want to let lines "slice" partitions as long
+  // as it is infrequent, change the following function.
+  FindCellSplitLocations(left_sides, right_sides, kCellSplitColumnThreshold,
+                         &cell_x_);
+}
+
+// Finds horizontal splits in the ColPartitions of text_grid_ by considering
+// all possible "good" guesses. A good guess is just the bottom/top sides of
+// the partitions, since these locations will uniquely define where the
+// extremal values where the splits can occur. The split happens
+// in the middle of the two nearest partitions.
+void StructuredTable::FindWhitespacedRows() {
+  // Set of the extents of all partitions on the page.
+  GenericVector<int> bottom_sides;
+  GenericVector<int> top_sides;
+  // We will be "shrinking" partitions, so keep the min/max around to
+  // make sure the bottom/top lines do not intersect text.
+  int min_bottom = INT32_MAX;
+  int max_top = INT32_MIN;
+
+  // Look at each text partition. We want to find the partitions
+  // that have extremal bottom/top sides. These will give us a basis
+  // for the table rows. Because the textlines can be skewed and close due
+  // to warping, the height of the partitions is toned down a little bit.
+  ColPartitionGridSearch gsearch(text_grid_);
+  gsearch.SetUniqueMode(true);
+  gsearch.StartRectSearch(bounding_box_);
+  ColPartition* text = nullptr;
+  while ((text = gsearch.NextRectSearch()) != nullptr) {
+    if (!text->IsTextType())
+      continue;
+
+    ASSERT_HOST(text->bounding_box().bottom() < text->bounding_box().top());
+    min_bottom = std::min(min_bottom, static_cast<int>(text->bounding_box().bottom()));
+    max_top = std::max(max_top, static_cast<int>(text->bounding_box().top()));
+
+    // Ignore "tall" text partitions, as these are usually false positive
+    // vertical text or multiple lines pulled together.
+    if (text->bounding_box().height() > max_text_height_)
+      continue;
+
+    int spacing = static_cast<int>(text->bounding_box().height() *
+                                   kVerticalSpacing / 2.0 + 0.5);
+    int bottom = text->bounding_box().bottom() - spacing;
+    int top = text->bounding_box().top() + spacing;
+    // For horizontal text, the factor can be negative. This should
+    // probably cause a warning or failure. I haven't actually checked if
+    // it happens.
+    if (bottom >= top)
+      continue;
+
+    bottom_sides.push_back(bottom);
+    top_sides.push_back(top);
+  }
+  // It causes disaster below, so avoid it!
+  if (bottom_sides.size() == 0 || top_sides.size() == 0)
+    return;
+
+  // Since data may be inserted in grid order, we sort the bottom/top sides.
+  bottom_sides.sort();
+  top_sides.sort();
+
+  // At this point, in the "merged list", we expect to have a bottom side,
+  // followed by either more bottom sides or a top side. The last number
+  // should be a top side. We find places where the splits occur by looking
+  // for "valleys". If we want to force gap sizes or allow overlap, change
+  // the spacing above. If you want to let lines "slice" partitions as long
+  // as it is infrequent, change the following function.
+  FindCellSplitLocations(bottom_sides, top_sides, kCellSplitRowThreshold,
+                         &cell_y_);
+
+  // Recover the min/max correctly since it was shifted.
+  cell_y_[0] = min_bottom;
+  cell_y_[cell_y_.size() - 1] = max_top;
+}
+
+void StructuredTable::CalculateMargins() {
+  space_above_ = INT32_MAX;
+  space_below_ = INT32_MAX;
+  space_right_ = INT32_MAX;
+  space_left_ = INT32_MAX;
+  UpdateMargins(text_grid_);
+  UpdateMargins(line_grid_);
+}
+// Finds the nearest partition in grid to the table
+// boundaries and updates the margin.
+void StructuredTable::UpdateMargins(ColPartitionGrid* grid) {
+  int below = FindVerticalMargin(grid, bounding_box_.bottom(), true);
+  space_below_ = std::min(space_below_, below);
+  int above = FindVerticalMargin(grid, bounding_box_.top(), false);
+  space_above_ = std::min(space_above_, above);
+  int left = FindHorizontalMargin(grid, bounding_box_.left(), true);
+  space_left_ = std::min(space_left_, left);
+  int right = FindHorizontalMargin(grid, bounding_box_.right(), false);
+  space_right_ = std::min(space_right_, right);
+}
+int StructuredTable::FindVerticalMargin(ColPartitionGrid* grid, int border,
+                                        bool decrease) const {
+  ColPartitionGridSearch gsearch(grid);
+  gsearch.SetUniqueMode(true);
+  gsearch.StartVerticalSearch(bounding_box_.left(), bounding_box_.right(),
+                              border);
+  ColPartition* part = nullptr;
+  while ((part = gsearch.NextVerticalSearch(decrease)) != nullptr) {
+    if (!part->IsTextType() && !part->IsHorizontalLine())
+      continue;
+    int distance = decrease ? border - part->bounding_box().top()
+                            : part->bounding_box().bottom() - border;
+    if (distance >= 0)
+      return distance;
+  }
+  return INT32_MAX;
+}
+int StructuredTable::FindHorizontalMargin(ColPartitionGrid* grid, int border,
+                                          bool decrease) const {
+  ColPartitionGridSearch gsearch(grid);
+  gsearch.SetUniqueMode(true);
+  gsearch.StartSideSearch(border, bounding_box_.bottom(), bounding_box_.top());
+  ColPartition* part = nullptr;
+  while ((part = gsearch.NextSideSearch(decrease)) != nullptr) {
+    if (!part->IsTextType() && !part->IsVerticalLine())
+      continue;
+    int distance = decrease ? border - part->bounding_box().right()
+                            : part->bounding_box().left() - border;
+    if (distance >= 0)
+      return distance;
+  }
+  return INT32_MAX;
+}
+
+void StructuredTable::CalculateStats() {
+  const int kMaxCellHeight = 1000;
+  const int kMaxCellWidth = 1000;
+  STATS height_stats(0, kMaxCellHeight + 1);
+  STATS width_stats(0, kMaxCellWidth + 1);
+
+  for (int i = 0; i < row_count(); ++i)
+    height_stats.add(row_height(i), column_count());
+  for (int i = 0; i < column_count(); ++i)
+    width_stats.add(column_width(i), row_count());
+
+  median_cell_height_ = static_cast<int>(height_stats.median() + 0.5);
+  median_cell_width_ = static_cast<int>(width_stats.median() + 0.5);
+}
+
+// Looks for grid lines near the current bounding box and
+// grows the bounding box to include them if no intersections
+// will occur as a result. This is necessary because the margins
+// are calculated relative to the closest line/text. If the
+// line isn't absorbed, the margin will be the distance to the line.
+void StructuredTable::AbsorbNearbyLines() {
+  ColPartitionGridSearch gsearch(line_grid_);
+  gsearch.SetUniqueMode(true);
+
+  // Is the closest line above good? Loop multiple times for tables with
+  // multi-line (sometimes 2) borders. Limit the number of lines by
+  // making sure they stay within a table cell or so.
+  ColPartition* line = nullptr;
+  gsearch.StartVerticalSearch(bounding_box_.left(), bounding_box_.right(),
+                              bounding_box_.top());
+  while ((line = gsearch.NextVerticalSearch(false)) != nullptr) {
+    if (!line->IsHorizontalLine())
+      break;
+    TBOX text_search(bounding_box_.left(), bounding_box_.top() + 1,
+                     bounding_box_.right(), line->MidY());
+    if (text_search.height() > median_cell_height_ * 2)
+      break;
+    if (CountPartitions(text_search) > 0)
+      break;
+    bounding_box_.set_top(line->MidY());
+  }
+  // As above, is the closest line below good?
+  line = nullptr;
+  gsearch.StartVerticalSearch(bounding_box_.left(), bounding_box_.right(),
+                              bounding_box_.bottom());
+  while ((line = gsearch.NextVerticalSearch(true)) != nullptr) {
+    if (!line->IsHorizontalLine())
+      break;
+    TBOX text_search(bounding_box_.left(), line->MidY(),
+                     bounding_box_.right(), bounding_box_.bottom() - 1);
+    if (text_search.height() > median_cell_height_ * 2)
+      break;
+    if (CountPartitions(text_search) > 0)
+      break;
+    bounding_box_.set_bottom(line->MidY());
+  }
+  // TODO(nbeato): vertical lines
+}
+
+
+// This function will find all "0 valleys" (of any length) given two
+// arrays. The arrays are the mins and maxes of partitions (either
+// left and right or bottom and top). Since the min/max lists are generated
+// with pairs of increasing integers, we can make some assumptions in
+// the function about ordering of the overall list, which are shown in the
+// asserts.
+// The algorithm works as follows:
+//   While there are numbers to process, take the smallest number.
+//     If it is from the min_list, increment the "hill" counter.
+//     Otherwise, decrement the "hill" counter.
+//     In the process of doing this, keep track of "crossing" the
+//     desired height.
+// The first/last items are extremal values of the list and known.
+// NOTE: This function assumes the lists are sorted!
+void StructuredTable::FindCellSplitLocations(const GenericVector<int>& min_list,
+                                             const GenericVector<int>& max_list,
+                                             int max_merged,
+                                             GenericVector<int>* locations) {
+  locations->clear();
+  ASSERT_HOST(min_list.size() == max_list.size());
+  if (min_list.size() == 0)
+    return;
+  ASSERT_HOST(min_list.get(0) < max_list.get(0));
+  ASSERT_HOST(min_list.get(min_list.size() - 1) <
+              max_list.get(max_list.size() - 1));
+
+  locations->push_back(min_list.get(0));
+  int min_index = 0;
+  int max_index = 0;
+  int stacked_partitions = 0;
+  int last_cross_position = INT32_MAX;
+  // max_index will expire after min_index.
+  // However, we can't "increase" the hill size if min_index expired.
+  // So finish processing when min_index expires.
+  while (min_index < min_list.size()) {
+    // Increase the hill count.
+    if (min_list[min_index] < max_list[max_index]) {
+      ++stacked_partitions;
+      if (last_cross_position != INT32_MAX &&
+          stacked_partitions > max_merged) {
+        int mid = (last_cross_position + min_list[min_index]) / 2;
+        locations->push_back(mid);
+        last_cross_position = INT32_MAX;
+      }
+      ++min_index;
+    } else {
+      // Decrease the hill count.
+      --stacked_partitions;
+      if (last_cross_position == INT32_MAX &&
+          stacked_partitions <= max_merged) {
+        last_cross_position = max_list[max_index];
+      }
+      ++max_index;
+    }
+  }
+  locations->push_back(max_list.get(max_list.size() - 1));
+}
+
+// Counts the number of partitions in the table
+// box that intersection the given x value.
+int StructuredTable::CountVerticalIntersections(int x) {
+  int count = 0;
+  // Make a small box to keep the search time down.
+  const int kGridSize = text_grid_->gridsize();
+  TBOX vertical_box = bounding_box_;
+  vertical_box.set_left(x - kGridSize);
+  vertical_box.set_right(x + kGridSize);
+
+  ColPartitionGridSearch gsearch(text_grid_);
+  gsearch.SetUniqueMode(true);
+  gsearch.StartRectSearch(vertical_box);
+  ColPartition* text = nullptr;
+  while ((text = gsearch.NextRectSearch()) != nullptr) {
+    if (!text->IsTextType())
+      continue;
+    const TBOX& box = text->bounding_box();
+    if (box.left() < x && x < box.right())
+      ++count;
+  }
+  return count;
+}
+
+// Counts the number of partitions in the table
+// box that intersection the given y value.
+int StructuredTable::CountHorizontalIntersections(int y) {
+  int count = 0;
+  // Make a small box to keep the search time down.
+  const int kGridSize = text_grid_->gridsize();
+  TBOX horizontal_box = bounding_box_;
+  horizontal_box.set_bottom(y - kGridSize);
+  horizontal_box.set_top(y + kGridSize);
+
+  ColPartitionGridSearch gsearch(text_grid_);
+  gsearch.SetUniqueMode(true);
+  gsearch.StartRectSearch(horizontal_box);
+  ColPartition* text = nullptr;
+  while ((text = gsearch.NextRectSearch()) != nullptr) {
+    if (!text->IsTextType())
+      continue;
+
+    const TBOX& box = text->bounding_box();
+    if (box.bottom() < y && y < box.top())
+      ++count;
+  }
+  return count;
+}
+
+// Counts how many text partitions are in this box.
+// This is used to count partitons in cells, as that can indicate
+// how "strong" a potential table row/column (or even full table) actually is.
+int StructuredTable::CountPartitions(const TBOX& box) {
+  ColPartitionGridSearch gsearch(text_grid_);
+  gsearch.SetUniqueMode(true);
+  gsearch.StartRectSearch(box);
+  int count = 0;
+  ColPartition* text = nullptr;
+  while ((text = gsearch.NextRectSearch()) != nullptr) {
+    if (text->IsTextType())
+      ++count;
+  }
+  return count;
+}
+
+////////
+//////// TableRecognizer Class
+////////
+
+TableRecognizer::TableRecognizer()
+    : text_grid_(nullptr),
+      line_grid_(nullptr),
+      min_height_(0),
+      min_width_(0),
+      max_text_height_(INT32_MAX) {
+}
+
+TableRecognizer::~TableRecognizer() {
+}
+
+void TableRecognizer::Init() {
+}
+
+void TableRecognizer::set_text_grid(ColPartitionGrid* text_grid) {
+  text_grid_ = text_grid;
+}
+void TableRecognizer::set_line_grid(ColPartitionGrid* line_grid) {
+  line_grid_ = line_grid;
+}
+void TableRecognizer::set_min_height(int height) {
+  min_height_ = height;
+}
+void TableRecognizer::set_min_width(int width) {
+  min_width_ = width;
+}
+void TableRecognizer::set_max_text_height(int height) {
+  max_text_height_ = height;
+}
+
+StructuredTable* TableRecognizer::RecognizeTable(const TBOX& guess) {
+  auto* table = new StructuredTable();
+  table->Init();
+  table->set_text_grid(text_grid_);
+  table->set_line_grid(line_grid_);
+  table->set_max_text_height(max_text_height_);
+
+  // Try to solve this simple case, a table with *both*
+  // vertical and horizontal lines.
+  if (RecognizeLinedTable(guess, table))
+    return table;
+
+  // Fallback to whitespace if that failed.
+  // TODO(nbeato): Break this apart to take advantage of horizontal
+  // lines or vertical lines when present.
+  if (RecognizeWhitespacedTable(guess, table))
+    return table;
+
+  // No table found...
+  delete table;
+  return nullptr;
+}
+
+bool TableRecognizer::RecognizeLinedTable(const TBOX& guess_box,
+                                          StructuredTable* table) {
+  if (!HasSignificantLines(guess_box))
+    return false;
+  TBOX line_bound = guess_box;
+  if (!FindLinesBoundingBox(&line_bound))
+    return false;
+  table->set_bounding_box(line_bound);
+  return table->FindLinedStructure();
+}
+
+// Quick implementation. Just count the number of lines in the box.
+// A better implementation would counter intersections and look for connected
+// components. It could even go as far as finding similar length lines.
+// To account for these possible issues, the VerifyLinedTableCells function
+// will reject lined tables that cause intersections with text on the page.
+// TODO(nbeato): look for "better" lines
+bool TableRecognizer::HasSignificantLines(const TBOX& guess) {
+  ColPartitionGridSearch box_search(line_grid_);
+  box_search.SetUniqueMode(true);
+  box_search.StartRectSearch(guess);
+  ColPartition* line = nullptr;
+  int vertical_count = 0;
+  int horizontal_count = 0;
+
+  while ((line = box_search.NextRectSearch()) != nullptr) {
+    if (line->IsHorizontalLine())
+      ++horizontal_count;
+    if (line->IsVerticalLine())
+      ++vertical_count;
+  }
+
+  return vertical_count >= kLinedTableMinVerticalLines &&
+         horizontal_count >= kLinedTableMinHorizontalLines;
+}
+
+// Given a bounding box with a bunch of horizontal / vertical lines,
+// we just find the extents of all of these lines iteratively.
+// The box will be at least as large as guess. This
+// could possibly be a bad assumption.
+// It is guaranteed to halt in at least O(n * gridarea) where n
+// is the number of lines.
+// The assumption is that growing the box iteratively will add lines
+// several times, but eventually we'll find the extents.
+//
+// For tables, the approach is a bit aggressive, a single line (which could be
+// noise or a column ruling) can destroy the table inside.
+//
+// TODO(nbeato): This is a quick first implementation.
+// A better implementation would actually look for consistency
+// in extents of the lines and find the extents using lines
+// that clearly describe the table. This would allow the
+// lines to "vote" for height/width. An approach like
+// this would solve issues with page layout rulings.
+// I haven't looked for these issues yet, so I can't even
+// say they happen confidently.
+bool TableRecognizer::FindLinesBoundingBox(TBOX* bounding_box) {
+  // The first iteration will tell us if there are lines
+  // present and shrink the box to a minimal iterative size.
+  if (!FindLinesBoundingBoxIteration(bounding_box))
+    return false;
+
+  // Keep growing until the area of the table stabilizes.
+  // The box can only get bigger, increasing area.
+  bool changed = true;
+  while (changed) {
+    changed = false;
+    int old_area = bounding_box->area();
+    bool check = FindLinesBoundingBoxIteration(bounding_box);
+    // At this point, the function will return true.
+    ASSERT_HOST(check);
+    ASSERT_HOST(bounding_box->area() >= old_area);
+    changed = (bounding_box->area() > old_area);
+  }
+
+  return true;
+}
+
+bool TableRecognizer::FindLinesBoundingBoxIteration(TBOX* bounding_box) {
+  // Search for all of the lines in the current box, keeping track of extents.
+  ColPartitionGridSearch box_search(line_grid_);
+  box_search.SetUniqueMode(true);
+  box_search.StartRectSearch(*bounding_box);
+  ColPartition* line = nullptr;
+  bool first_line = true;
+
+  while ((line = box_search.NextRectSearch()) != nullptr) {
+    if (line->IsLineType()) {
+      if (first_line) {
+        // The first iteration can shrink the box.
+        *bounding_box = line->bounding_box();
+        first_line = false;
+      } else {
+        *bounding_box += line->bounding_box();
+      }
+    }
+  }
+  return !first_line;
+}
+
+// The goal of this function is to move the table boundaries around and find
+// a table that maximizes the whitespace around the table while maximizing
+// the cellular structure. As a result, it gets confused by headers, footers,
+// and merged columns (text that crosses columns). There is a tolerance
+// that allows a few partitions to count towards potential cell merges.
+// It's the max_merged parameter to FindPartitionLocations.
+// It can work, but it needs some false positive remove on boundaries.
+// For now, the grid structure must not intersect any partitions.
+// Also, small tolerance is added to the horizontal lines for tightly packed
+// tables. The tolerance is added by adjusting the bounding boxes of the
+// partitions (in FindHorizontalPartitions). The current implementation
+// only adjusts the vertical extents of the table.
+//
+// Also note. This was hacked at a lot. It could probably use some
+// more hacking at to find a good set of border conditions and then a
+// nice clean up.
+bool TableRecognizer::RecognizeWhitespacedTable(const TBOX& guess_box,
+                                                StructuredTable* table) {
+  TBOX best_box = guess_box;  // Best borders known.
+  int best_below = 0;         // Margin size above best table.
+  int best_above = 0;         // Margin size below best table.
+  TBOX adjusted = guess_box;  // The search box.
+
+  // We assume that the guess box is somewhat accurate, so we don't allow
+  // the adjusted border to pass half of the guessed area. This prevents
+  // "negative" tables from forming.
+  const int kMidGuessY = (guess_box.bottom() + guess_box.top()) / 2;
+  // Keeps track of the most columns in an accepted table. The resulting table
+  // may be less than the max, but we don't want to stray too far.
+  int best_cols = 0;
+  // Make sure we find a good border.
+  bool found_good_border = false;
+
+  // Find the bottom of the table by trying a few different locations. For
+  // each location, the top, left, and right are fixed. We start the search
+  // in a smaller table to favor best_cols getting a good estimate sooner.
+  int last_bottom = INT32_MAX;
+  int bottom = NextHorizontalSplit(guess_box.left(), guess_box.right(),
+                                   kMidGuessY - min_height_ / 2, true);
+  int top = NextHorizontalSplit(guess_box.left(), guess_box.right(),
+                                kMidGuessY + min_height_ / 2, false);
+  adjusted.set_top(top);
+
+  // Headers/footers can be spaced far from everything.
+  // Make sure that the space below is greater than the space above
+  // the lowest row.
+  int previous_below = 0;
+  const int kMaxChances = 10;
+  int chances = kMaxChances;
+  while (bottom != last_bottom) {
+    adjusted.set_bottom(bottom);
+
+    if (adjusted.height() >= min_height_) {
+      // Try to fit the grid on the current box. We give it a chance
+      // if the number of columns didn't significantly drop.
+      table->set_bounding_box(adjusted);
+      if (table->FindWhitespacedStructure() &&
+          table->column_count() >= best_cols * kRequiredColumns) {
+        if (false && IsWeakTableRow(table, 0)) {
+          // Currently buggy, but was looking promising so disabled.
+          --chances;
+        } else {
+          // We favor 2 things,
+          //   1- Adding rows that have partitioned data.
+          //   2- Better margins (to find header/footer).
+          // For better tables, we just look for multiple cells in the
+          // bottom row with data in them.
+          // For margins, the space below the last row should
+          // be better than a table with the last row removed.
+          chances = kMaxChances;
+          double max_row_height = kMaxRowSize * table->median_cell_height();
+          if ((table->space_below() * kMarginFactor >= best_below &&
+               table->space_below() >= previous_below) ||
+              (table->CountFilledCellsInRow(0) > 1 &&
+               table->row_height(0) < max_row_height)) {
+            best_box.set_bottom(bottom);
+            best_below = table->space_below();
+            best_cols = std::max(table->column_count(), best_cols);
+            found_good_border = true;
+          }
+        }
+        previous_below = table->space_below();
+      } else {
+       --chances;
+      }
+    }
+    if (chances <= 0)
+      break;
+
+    last_bottom = bottom;
+    bottom = NextHorizontalSplit(guess_box.left(), guess_box.right(),
+                                 last_bottom, true);
+  }
+  if (!found_good_border)
+    return false;
+
+  // TODO(nbeato) comments: follow modified code above... put it in a function!
+  found_good_border = false;
+  int last_top = INT32_MIN;
+  top = NextHorizontalSplit(guess_box.left(), guess_box.right(),
+                            kMidGuessY + min_height_ / 2, false);
+  int previous_above = 0;
+  chances = kMaxChances;
+
+  adjusted.set_bottom(best_box.bottom());
+  while (last_top != top) {
+    adjusted.set_top(top);
+    if (adjusted.height() >= min_height_) {
+      table->set_bounding_box(adjusted);
+      if (table->FindWhitespacedStructure() &&
+          table->column_count() >= best_cols * kRequiredColumns) {
+        int last_row = table->row_count() - 1;
+        if (false && IsWeakTableRow(table, last_row)) {
+          // Currently buggy, but was looking promising so disabled.
+          --chances;
+        } else {
+          chances = kMaxChances;
+          double max_row_height = kMaxRowSize * table->median_cell_height();
+          if ((table->space_above() * kMarginFactor >= best_above &&
+               table->space_above() >= previous_above) ||
+              (table->CountFilledCellsInRow(last_row) > 1 &&
+               table->row_height(last_row) < max_row_height)) {
+            best_box.set_top(top);
+            best_above = table->space_above();
+            best_cols = std::max(table->column_count(), best_cols);
+            found_good_border = true;
+          }
+        }
+        previous_above = table->space_above();
+      } else {
+       --chances;
+      }
+    }
+    if (chances <= 0)
+      break;
+
+    last_top = top;
+    top = NextHorizontalSplit(guess_box.left(), guess_box.right(),
+                              last_top, false);
+  }
+
+  if (!found_good_border)
+    return false;
+
+  // If we get here, this shouldn't happen. It can be an assert, but
+  // I haven't tested it enough to make it crash things.
+  if (best_box.null_box())
+    return false;
+
+  // Given the best locations, fit the box to those locations.
+  table->set_bounding_box(best_box);
+  return table->FindWhitespacedStructure();
+}
+
+// Finds the closest value to y that can safely cause a horizontal
+// split in the partitions.
+// This function has been buggy and not as reliable as I would've
+// liked. I suggest finding all of the splits using the
+// FindPartitionLocations once and then just keeping the results
+// of that function cached somewhere.
+int TableRecognizer::NextHorizontalSplit(int left, int right, int y,
+                                         bool top_to_bottom) {
+  ColPartitionGridSearch gsearch(text_grid_);
+  gsearch.SetUniqueMode(true);
+  gsearch.StartVerticalSearch(left, right, y);
+  ColPartition* text = nullptr;
+  int last_y = y;
+  while ((text = gsearch.NextVerticalSearch(top_to_bottom)) != nullptr) {
+    if (!text->IsTextType() || !text->IsHorizontalType())
+      continue;
+    if (text->bounding_box().height() > max_text_height_)
+      continue;
+
+    const TBOX& text_box = text->bounding_box();
+    if (top_to_bottom && (last_y >= y || last_y <= text_box.top())) {
+      last_y = std::min(last_y, static_cast<int>(text_box.bottom()));
+      continue;
+    }
+    if (!top_to_bottom && (last_y <= y || last_y >= text_box.bottom())) {
+      last_y = std::max(last_y, static_cast<int>(text_box.top()));
+      continue;
+    }
+
+    return last_y;
+  }
+  // If none is found, we at least want to preserve the min/max,
+  // which defines the overlap of y with the last partition in the grid.
+  return last_y;
+}
+
+// Code is buggy right now. It is disabled in the calling function.
+// It seems like sometimes the row that is passed in is not correct
+// sometimes (like a phantom row is introduced). There's something going
+// on in the cell_y_ data member before this is called... not certain.
+bool TableRecognizer::IsWeakTableRow(StructuredTable* table, int row) {
+  if (!table->VerifyRowFilled(row))
+    return false;
+
+  double threshold = 0.0;
+  if (table->column_count() > kGoodRowNumberOfColumnsSmallSize)
+    threshold = table->column_count() * kGoodRowNumberOfColumnsLarge;
+  else
+    threshold = kGoodRowNumberOfColumnsSmall[table->column_count()];
+
+  return table->CountFilledCellsInRow(row) < threshold;
+}
+
+}  // namespace tesseract
diff --git a/tesseract/src/textord/tablerecog.h b/tesseract/src/textord/tablerecog.h
new file mode 100644
index 00000000..eb8f0543
--- /dev/null
+++ b/tesseract/src/textord/tablerecog.h
@@ -0,0 +1,378 @@
+///////////////////////////////////////////////////////////////////////
+// File:        tablerecog.h
+// Description: Functions to detect structure of tables.
+// Author:    Nicholas Beato
+// Created:   Aug 17, 2010
+//
+// (C) Copyright 2010, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TABLERECOG_H_
+#define TABLERECOG_H_
+
+#include "colpartitiongrid.h"
+#include "genericvector.h"
+
+namespace tesseract {
+
+// There are 2 classes in this file. They have 2 different purposes.
+//  - StructuredTable contains the methods to find the structure given
+//    a specific bounding box and grow that structure.
+//  - TableRecognizer contains the methods to adjust the possible positions
+//    of a table without worrying about structure.
+//
+// To use these classes, the assumption is that the TableFinder will
+// have a guess of the location of a table (or possibly over/undersegmented
+// tables). The TableRecognizer is responsible for finding the table boundaries
+// at a high level. The StructuredTable class is responsible for determining
+// the structure of the table and trying to maximize its bounds while retaining
+// the structure.
+// (The latter part is not implemented yet, but that was the goal).
+//
+// While on the boundary discussion, keep in mind that this is a first pass.
+// There should eventually be some things like internal structure checks,
+// and, more importantly, surrounding text flow checks.
+//
+
+// Usage:
+// The StructuredTable class contains methods to query a potential table.
+// It has functions to find structure, count rows, find ColPartitions that
+// intersect gridlines, etc. It is not meant to blindly find a table. It
+// is meant to start with a known table location and enhance it.
+// Usage:
+//    ColPartitionGrid text_grid, line_grid;  // init
+//    TBOX table_box;  // known location of table location
+//
+//    StructuredTable table;
+//    table.Init();  // construction code
+//    table.set_text_grid(/* text */);  // These 2 grids can be the same!
+//    table.set_line_grid(/* lines */);
+//    table.set_min_text_height(10);    // Filter vertical and tall text.
+//    // IMPORTANT! The table needs to be told where it is!
+//    table.set_bounding_box(table_box);  // Set initial table location.
+//    if (table.FindWhitespacedStructure()) {
+//      // process table
+//      table.column_count();  // number of columns
+//      table.row_count();     // number of rows
+//      table.cells_count();   // number of cells
+//      table.bounding_box();  // updated bounding box
+//      // etc.
+//    }
+//
+class TESS_API StructuredTable {
+ public:
+  StructuredTable();
+  ~StructuredTable() = default;
+
+  // Initialization code. Must be called after the constructor.
+  void Init();
+
+  // Sets the grids used by the table. These can be changed between
+  // calls to Recognize. They are treated as read-only data.
+  void set_text_grid(ColPartitionGrid* text);
+  void set_line_grid(ColPartitionGrid* lines);
+  // Filters text partitions that are ridiculously tall to prevent
+  // merging rows.
+  void set_max_text_height(int height);
+
+  // Basic accessors. Some are treated as attributes despite having indirect
+  // representation.
+  bool is_lined() const;
+  int row_count() const;
+  int column_count() const;
+  int cell_count() const;
+  void set_bounding_box(const TBOX& box);
+  const TBOX& bounding_box() const;
+  int median_cell_height();
+  int median_cell_width();
+  int row_height(int row) const;
+  int column_width(int column) const;
+  int space_above() const;
+  int space_below() const;
+
+  // Given enough horizontal and vertical lines in a region, create this table
+  // based on the structure given by the lines. Return true if it worked out.
+  // Code assumes the lines exist. It is the caller's responsibility to check
+  // for lines and find an appropriate bounding box.
+  bool FindLinedStructure();
+
+  // The main subroutine for finding generic table structure. The function
+  // finds the grid structure in the given box. Returns true if a good grid
+  // exists, implying that "this" table is valid.
+  bool FindWhitespacedStructure();
+
+  ////////
+  //////// Functions to query table info.
+  ////////
+
+  // Returns true if inserting part into the table does not cause any
+  // cell merges.
+  bool DoesPartitionFit(const ColPartition& part) const;
+  // Checks if a sub-table has multiple data cells filled.
+  int CountFilledCells();
+  int CountFilledCellsInRow(int row);
+  int CountFilledCellsInColumn(int column);
+  int CountFilledCells(int row_start, int row_end,
+                       int column_start, int column_end);
+
+  // Makes sure that at least one cell in a row has substantial area filled.
+  // This can filter out large whitespace caused by growing tables too far
+  // and page numbers.
+  // (currently bugged for some reason).
+  bool VerifyRowFilled(int row);
+  // Finds the filled area in a cell.
+  double CalculateCellFilledPercentage(int row, int column);
+
+  // Debug display, draws the table in the given color. If the table is not
+  // valid, the table and "best" grid lines are still drawn in the given color.
+  void Display(ScrollView* window, ScrollView::Color color);
+
+ protected:
+  // Clear the structure information.
+  void ClearStructure();
+
+  ////////
+  //////// Lined tables
+  ////////
+
+  // Verifies the lines do not intersect partitions. This happens when
+  // the lines are in column boundaries and extend the full page. As a result,
+  // the grid lines go through column text. The condition is detectable.
+  bool VerifyLinedTableCells();
+
+  ////////
+  //////// Tables with whitespace
+  ////////
+
+  // This is the function to change if you want to filter resulting tables
+  // better. Right now it just checks for a minimum cell count and such.
+  // You could add things like maximum number of ColPartitions per cell or
+  // similar.
+  bool VerifyWhitespacedTable();
+  // Find the columns of a table using whitespace.
+  void FindWhitespacedColumns();
+  // Find the rows of a table using whitespace.
+  void FindWhitespacedRows();
+
+  ////////
+  //////// Functions to provide information about the table.
+  ////////
+
+  // Calculates the whitespace around the table using the table boundary and
+  // the supplied grids (set_text_grid and set_line_grid).
+  void CalculateMargins();
+  // Update the table margins with the supplied grid. This is
+  // only called by calculate margins to use multiple grid sources.
+  void UpdateMargins(ColPartitionGrid* grid);
+  int FindVerticalMargin(ColPartitionGrid* grid, int start_x,
+                         bool decrease) const;
+  int FindHorizontalMargin(ColPartitionGrid* grid, int start_y,
+                           bool decrease) const;
+  // Calculates stats on the table, namely the median cell height and width.
+  void CalculateStats();
+
+  ////////
+  //////// Functions to try to "fix" some table errors.
+  ////////
+
+  // Given a whitespaced table, this looks for bordering lines that might
+  // be page layout boxes around the table. It is necessary to get the margins
+  // correct on the table. If the lines are not joined, the margins will be
+  // the distance to the line, which is not right.
+  void AbsorbNearbyLines();
+
+  // Nice utility function for finding partition gaps. You feed it a sorted
+  // list of all of the mins/maxes of the partitions in the table, and it gives
+  // you the gaps (middle). This works for both vertical and horizontal
+  // gaps.
+  //
+  // If you want to allow slight overlap in the division and the partitions,
+  // just scale down the partitions before inserting them in the list.
+  // Likewise, you can force at least some space between partitions.
+  // This trick is how the horizontal partitions are done (since the page
+  // skew could make it hard to find splits in the text).
+  //
+  // As a result, "0 distance" between closest partitions causes a gap.
+  // This is not a programmatic assumption. It is intentional and simplifies
+  // things.
+  //
+  // "max_merged" indicates both the minimum number of stacked partitions
+  // to cause a cell (add 1 to it), and the maximum number of partitions that
+  // a grid line can intersect. For example, if max_merged is 0, then lines
+  // are inserted wherever space exists between partitions. If it is 2,
+  // lines may intersect 2 partitions at most, but you also need at least
+  // 2 partitions to generate a line.
+  static void FindCellSplitLocations(const GenericVector<int>& min_list,
+                                     const GenericVector<int>& max_list,
+                                     int max_merged,
+                                     GenericVector<int>* locations);
+
+  ////////
+  //////// Utility function for table queries
+  ////////
+
+  // Counts the number of ColPartitions that intersect vertical cell
+  // division at this x value. Used by VerifyLinedTable.
+  int CountVerticalIntersections(int x);
+  int CountHorizontalIntersections(int y);
+
+  // Counts how many text partitions are in this box.
+  int CountPartitions(const TBOX& box);
+
+  ////////
+  //////// Data members.
+  ////////
+
+  // Input data, used as read only data to make decisions.
+  ColPartitionGrid* text_grid_;    // Text ColPartitions
+  ColPartitionGrid* line_grid_;    // Line ColPartitions
+  // Table structure.
+  // bounding box is a convenient external representation.
+  // cell_x_ and cell_y_ indicate the grid lines.
+  TBOX bounding_box_;              // Bounding box
+  GenericVector<int> cell_x_;  // Locations of vertical divisions (sorted)
+  GenericVector<int> cell_y_;  // Locations of horizontal divisions (sorted)
+  bool is_lined_;                  // Is the table backed up by a line structure
+  // Table margins, set via CalculateMargins
+  int space_above_;
+  int space_below_;
+  int space_left_;
+  int space_right_;
+  int median_cell_height_;
+  int median_cell_width_;
+  // Filters, used to prevent awkward partitions from destroying structure.
+  int max_text_height_;
+};
+
+class TESS_API TableRecognizer {
+ public:
+  TableRecognizer();
+  ~TableRecognizer();
+
+  // Initialization code. Must be called after the constructor.
+  void Init();
+
+  ////////
+  //////// Pre-recognize methods to initial table constraints.
+  ////////
+
+  // Sets the grids used by the table. These can be changed between
+  // calls to Recognize. They are treated as read-only data.
+  void set_text_grid(ColPartitionGrid* text);
+  void set_line_grid(ColPartitionGrid* lines);
+  // Sets some additional constraints on the table.
+  void set_min_height(int height);
+  void set_min_width(int width);
+  // Filters text partitions that are ridiculously tall to prevent
+  // merging rows. Note that "filters" refers to allowing horizontal
+  // cells to slice through them on the premise that they were
+  // merged text rows during previous layout.
+  void set_max_text_height(int height);
+
+  // Given a guess location, the RecognizeTable function will try to find a
+  // structured grid in the area. On success, it will return a new
+  // StructuredTable (and assumes you will delete it). Otherwise,
+  // nullptr is returned.
+  //
+  // Keep in mind, this may "overgrow" or "undergrow" the size of guess.
+  // Ideally, there is a either a one-to-one correspondence between
+  // the guess and table or no table at all. This is not the best of
+  // assumptions right now, but was made to try to keep things simple in
+  // the first pass.
+  //
+  // If a line structure is available on the page in the given region,
+  // the table will use the linear structure as it is.
+  // Otherwise, it will try to maximize the whitespace around it while keeping
+  // a grid structure. This is somewhat working.
+  //
+  // Since the combination of adjustments can get high, effort was
+  // originally made to keep the number of adjustments linear in the number
+  // of partitions. The underlying structure finding code used to be
+  // much more complex. I don't know how necessary this constraint is anymore.
+  // The evaluation of a possible table is kept within O(nlogn) in the size of
+  // the table (where size is the number of partitions in the table).
+  // As a result, the algorithm is capable of O(n^2 log n). Depending
+  // on the grid search size, it may be higher.
+  //
+  // Last note: it is possible to just try all partition boundaries at a high
+  // level O(n^4) and do a verification scheme (at least O(nlogn)). If there
+  // area 200 partitions on a page, this could be too costly. Effort could go
+  // into pruning the search, but I opted for something quicker. I'm confident
+  // that the independent adjustments can get similar results and keep the
+  // complextiy down. However, the other approach could work without using
+  // TableFinder at all if it is fast enough.  It comes down to properly
+  // deciding what is a table. The code currently relies on TableFinder's
+  // guess to the location of a table for that.
+  StructuredTable* RecognizeTable(const TBOX& guess_box);
+
+ protected:
+  ////////
+  //////// Lined tables
+  ////////
+
+  // Returns true if the given box has a lined table within it. The
+  // table argument will be updated with the table if the table exists.
+  bool RecognizeLinedTable(const TBOX& guess_box, StructuredTable* table);
+  // Returns true if the given box has a large number of horizontal and
+  // vertical lines present. If so, we assume the extent of these lines
+  // uniquely defines a table and find that table via SolveLinedTable.
+  bool HasSignificantLines(const TBOX& guess);
+
+  // Given enough horizontal and vertical lines in a region, find a bounding
+  // box that encloses all of them (as well as newly introduced lines).
+  // The bounding box is the smallest box that encloses the lines in guess
+  // without having any lines sticking out of it.
+  // bounding_box is an in/out parameter.
+  // On input, it in the extents of the box to search.
+  // On output, it is the resulting bounding box.
+  bool FindLinesBoundingBox(TBOX* bounding_box);
+  // Iteration in above search.
+  // bounding_box is an in/out parameter.
+  // On input, it in the extents of the box to search.
+  // On output, it is the resulting bounding box.
+  bool FindLinesBoundingBoxIteration(TBOX* bounding_box);
+
+  ////////
+  //////// Generic "whitespaced" tables
+  ////////
+
+  // Returns true if the given box has a whitespaced table within it. The
+  // table argument will be updated if the table exists. Also note
+  // that this method will fail if the guess_box center is not
+  // mostly within the table.
+  bool RecognizeWhitespacedTable(const TBOX& guess_box, StructuredTable* table);
+
+  // Finds the location of a horizontal split relative to y.
+  // This function is mostly unused now. If the SolveWhitespacedTable
+  // changes much, it can be removed. Note, it isn't really as reliable
+  // as I thought. I went with alternatives for most of the other uses.
+  int NextHorizontalSplit(int left, int right, int y, bool top_to_bottom);
+
+  // Indicates that a table row is weak. This means that it has
+  // many missing data cells or very large cell heights compared.
+  // to the rest of the table.
+  static bool IsWeakTableRow(StructuredTable* table, int row);
+
+  // Input data, used as read only data to make decisions.
+  ColPartitionGrid* text_grid_;    // Text ColPartitions
+  ColPartitionGrid* line_grid_;    // Line ColPartitions
+  // Table constraints, a "good" table must satisfy these.
+  int min_height_;
+  int min_width_;
+  // Filters, used to prevent awkward partitions from destroying structure.
+  int max_text_height_;  // Horizontal lines may intersect taller text.
+};
+
+}  // namespace tesseract
+
+#endif  /* TABLERECOG_H_ */
diff --git a/tesseract/src/textord/tabvector.cpp b/tesseract/src/textord/tabvector.cpp
new file mode 100644
index 00000000..95e75a38
--- /dev/null
+++ b/tesseract/src/textord/tabvector.cpp
@@ -0,0 +1,982 @@
+///////////////////////////////////////////////////////////////////////
+// File:        tabvector.cpp
+// Description: Class to hold a near-vertical vector representing a tab-stop.
+// Author:      Ray Smith
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "tabvector.h"
+#include "blobbox.h"
+#include "colfind.h"
+#include "colpartitionset.h"
+#include "detlinefit.h"
+#include "statistc.h"
+
+#include <algorithm>
+
+namespace tesseract {
+
+// Multiple of height used as a gutter for evaluation search.
+const int kGutterMultiple = 4;
+// Multiple of neighbour gap that we expect the gutter gap to be at minimum.
+const int kGutterToNeighbourRatio = 3;
+// Pixel distance for tab vectors to be considered the same.
+const int kSimilarVectorDist = 10;
+// Pixel distance for ragged tab vectors to be considered the same if there
+// is nothing in the overlap box
+const int kSimilarRaggedDist = 50;
+// Max multiple of height to allow filling in between blobs when evaluating.
+const int kMaxFillinMultiple = 11;
+// Min fraction of mean gutter size to allow a gutter on a good tab blob.
+const double kMinGutterFraction = 0.5;
+// Multiple of 1/n lines as a minimum gutter in evaluation.
+const double kLineCountReciprocal = 4.0;
+// Constant add-on for minimum gutter for aligned tabs.
+const double kMinAlignedGutter = 0.25;
+// Constant add-on for minimum gutter for ragged tabs.
+const double kMinRaggedGutter = 1.5;
+
+double_VAR(textord_tabvector_vertical_gap_fraction, 0.5,
+  "max fraction of mean blob width allowed for vertical gaps in vertical text");
+
+double_VAR(textord_tabvector_vertical_box_ratio, 0.5,
+  "Fraction of box matches required to declare a line vertical");
+
+ELISTIZE(TabConstraint)
+
+// Create a constraint for the top or bottom of this TabVector.
+void TabConstraint::CreateConstraint(TabVector* vector, bool is_top) {
+  auto* constraint = new TabConstraint(vector, is_top);
+  auto* constraints = new TabConstraint_LIST;
+  TabConstraint_IT it(constraints);
+  it.add_to_end(constraint);
+  if (is_top)
+    vector->set_top_constraints(constraints);
+  else
+    vector->set_bottom_constraints(constraints);
+}
+
+// Test to see if the constraints are compatible enough to merge.
+bool TabConstraint::CompatibleConstraints(TabConstraint_LIST* list1,
+                                          TabConstraint_LIST* list2) {
+  if (list1 == list2)
+    return false;
+  int y_min = -INT32_MAX;
+  int y_max = INT32_MAX;
+  if (textord_debug_tabfind > 3)
+    tprintf("Testing constraint compatibility\n");
+  GetConstraints(list1, &y_min, &y_max);
+  GetConstraints(list2, &y_min, &y_max);
+  if (textord_debug_tabfind > 3)
+    tprintf("Resulting range = [%d,%d]\n", y_min, y_max);
+  return y_max >= y_min;
+}
+
+// Merge the lists of constraints and update the TabVector pointers.
+// The second list is deleted.
+void TabConstraint::MergeConstraints(TabConstraint_LIST* list1,
+                                     TabConstraint_LIST* list2) {
+  if (list1 == list2)
+    return;
+  TabConstraint_IT it(list2);
+  if (textord_debug_tabfind > 3)
+    tprintf("Merging constraints\n");
+  // The vectors of all constraints on list2 are now going to be on list1.
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    TabConstraint* constraint = it.data();
+    if (textord_debug_tabfind> 3)
+      constraint->vector_->Print("Merge");
+    if (constraint->is_top_)
+      constraint->vector_->set_top_constraints(list1);
+    else
+      constraint->vector_->set_bottom_constraints(list1);
+  }
+  it = list1;
+  it.add_list_before(list2);
+  delete list2;
+}
+
+// Set all the tops and bottoms as appropriate to a mean of the
+// constrained range. Delete all the constraints and list.
+void TabConstraint::ApplyConstraints(TabConstraint_LIST* constraints) {
+  int y_min = -INT32_MAX;
+  int y_max = INT32_MAX;
+  GetConstraints(constraints, &y_min, &y_max);
+  int y = (y_min + y_max) / 2;
+  TabConstraint_IT it(constraints);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    TabConstraint* constraint = it.data();
+    TabVector* v = constraint->vector_;
+    if (constraint->is_top_) {
+      v->SetYEnd(y);
+      v->set_top_constraints(nullptr);
+    } else {
+      v->SetYStart(y);
+      v->set_bottom_constraints(nullptr);
+    }
+  }
+  delete constraints;
+}
+
+TabConstraint::TabConstraint(TabVector* vector, bool is_top)
+  : vector_(vector), is_top_(is_top) {
+  if (is_top) {
+    y_min_ = vector->endpt().y();
+    y_max_ = vector->extended_ymax();
+  } else {
+    y_max_ = vector->startpt().y();
+    y_min_ = vector->extended_ymin();
+  }
+}
+
+// Get the max of the mins and the min of the maxes.
+void TabConstraint::GetConstraints(TabConstraint_LIST* constraints,
+                                   int* y_min, int* y_max) {
+  TabConstraint_IT it(constraints);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    TabConstraint* constraint = it.data();
+    if (textord_debug_tabfind > 3) {
+      tprintf("Constraint is [%d,%d]", constraint->y_min_, constraint->y_max_);
+      constraint->vector_->Print(" for");
+    }
+    *y_min = std::max(*y_min, constraint->y_min_);
+    *y_max = std::min(*y_max, constraint->y_max_);
+  }
+}
+
+ELIST2IZE(TabVector)
+CLISTIZE(TabVector)
+
+// The constructor is private. See the bottom of the file...
+
+
+// Public factory to build a TabVector from a list of boxes.
+// The TabVector will be of the given alignment type.
+// The input vertical vector is used in fitting, and the output
+// vertical_x, vertical_y have the resulting line vector added to them
+// if the alignment is not ragged.
+// The extended_start_y and extended_end_y are the maximum possible
+// extension to the line segment that can be used to align with others.
+// The input CLIST of BLOBNBOX good_points is consumed and taken over.
+TabVector* TabVector::FitVector(TabAlignment alignment, ICOORD vertical,
+                                int  extended_start_y, int extended_end_y,
+                                BLOBNBOX_CLIST* good_points,
+                                int* vertical_x, int* vertical_y) {
+  auto* vector = new TabVector(extended_start_y, extended_end_y,
+                                    alignment, good_points);
+  if (!vector->Fit(vertical, false)) {
+    delete vector;
+    return nullptr;
+  }
+  if (!vector->IsRagged()) {
+    vertical = vector->endpt_ - vector->startpt_;
+    int weight = vector->BoxCount();
+    *vertical_x += vertical.x() * weight;
+    *vertical_y += vertical.y() * weight;
+  }
+  return vector;
+}
+
+// Build a ragged TabVector by copying another's direction, shifting it
+// to match the given blob, and making its initial extent the height
+// of the blob, but its extended bounds from the bounds of the original.
+TabVector::TabVector(const TabVector& src, TabAlignment alignment,
+                     const ICOORD& vertical_skew, BLOBNBOX* blob)
+  : extended_ymin_(src.extended_ymin_), extended_ymax_(src.extended_ymax_),
+    needs_refit_(true), needs_evaluation_(true),
+    alignment_(alignment) {
+  BLOBNBOX_C_IT it(&boxes_);
+  it.add_to_end(blob);
+  TBOX box = blob->bounding_box();
+  if (IsLeftTab()) {
+    startpt_ = box.botleft();
+    endpt_ = box.topleft();
+  } else {
+    startpt_ = box.botright();
+    endpt_ = box.topright();
+  }
+  sort_key_ = SortKey(vertical_skew,
+                      (startpt_.x() + endpt_.x()) / 2,
+                      (startpt_.y() + endpt_.y()) / 2);
+  if (textord_debug_tabfind > 3)
+    Print("Constructed a new tab vector:");
+}
+
+// Copies basic attributes of a tab vector for simple operations.
+// Copies things such startpt, endpt, range.
+// Does not copy things such as partners, boxes, or constraints.
+// This is useful if you only need vector information for processing, such
+// as in the table detection code.
+TabVector* TabVector::ShallowCopy() const {
+  auto* copy = new TabVector();
+  copy->startpt_ = startpt_;
+  copy->endpt_ = endpt_;
+  copy->alignment_ = alignment_;
+  copy->extended_ymax_ = extended_ymax_;
+  copy->extended_ymin_ = extended_ymin_;
+  copy->intersects_other_lines_ = intersects_other_lines_;
+  return copy;
+}
+
+// Extend this vector to include the supplied blob if it doesn't
+// already have it.
+void TabVector::ExtendToBox(BLOBNBOX* new_blob) {
+  TBOX new_box = new_blob->bounding_box();
+  BLOBNBOX_C_IT it(&boxes_);
+  if (!it.empty()) {
+    BLOBNBOX* blob = it.data();
+    TBOX box = blob->bounding_box();
+    while (!it.at_last() && box.top() <= new_box.top()) {
+      if (blob == new_blob)
+        return;  // We have it already.
+      it.forward();
+      blob = it.data();
+      box = blob->bounding_box();
+    }
+    if (box.top() >= new_box.top()) {
+      it.add_before_stay_put(new_blob);
+      needs_refit_ = true;
+      return;
+    }
+  }
+  needs_refit_ = true;
+  it.add_after_stay_put(new_blob);
+}
+
+// Set the ycoord of the start and move the xcoord to match.
+void TabVector::SetYStart(int start_y) {
+  startpt_.set_x(XAtY(start_y));
+  startpt_.set_y(start_y);
+}
+// Set the ycoord of the end and move the xcoord to match.
+void TabVector::SetYEnd(int end_y) {
+  endpt_.set_x(XAtY(end_y));
+  endpt_.set_y(end_y);
+}
+
+// Rotate the ends by the given vector. Auto flip start and end if needed.
+void TabVector::Rotate(const FCOORD& rotation) {
+  startpt_.rotate(rotation);
+  endpt_.rotate(rotation);
+  int dx = endpt_.x() - startpt_.x();
+  int dy = endpt_.y() - startpt_.y();
+  if ((dy < 0 && abs(dy) > abs(dx)) || (dx < 0 && abs(dx) > abs(dy))) {
+    // Need to flip start/end.
+    ICOORD tmp = startpt_;
+    startpt_ = endpt_;
+    endpt_ = tmp;
+  }
+}
+
+// Setup the initial constraints, being the limits of
+// the vector and the extended ends.
+void TabVector::SetupConstraints() {
+  TabConstraint::CreateConstraint(this, false);
+  TabConstraint::CreateConstraint(this, true);
+}
+
+// Setup the constraints between the partners of this TabVector.
+void TabVector::SetupPartnerConstraints() {
+  // With the first and last partner, we want a common bottom and top,
+  // respectively, and for each change of partner, we want a common
+  // top of first with bottom of next.
+  TabVector_C_IT it(&partners_);
+  TabVector* prev_partner = nullptr;
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    TabVector* partner = it.data();
+    if (partner->top_constraints_ == nullptr ||
+        partner->bottom_constraints_ == nullptr) {
+      partner->Print("Impossible: has no constraints");
+      Print("This vector has it as a partner");
+      continue;
+    }
+    if (prev_partner == nullptr) {
+      // This is the first partner, so common bottom.
+      if (TabConstraint::CompatibleConstraints(bottom_constraints_,
+                                               partner->bottom_constraints_))
+        TabConstraint::MergeConstraints(bottom_constraints_,
+                                        partner->bottom_constraints_);
+    } else {
+      // We need prev top to be common with partner bottom.
+      if (TabConstraint::CompatibleConstraints(prev_partner->top_constraints_,
+                                               partner->bottom_constraints_))
+        TabConstraint::MergeConstraints(prev_partner->top_constraints_,
+                                        partner->bottom_constraints_);
+    }
+    prev_partner = partner;
+    if (it.at_last()) {
+      // This is the last partner, so common top.
+      if (TabConstraint::CompatibleConstraints(top_constraints_,
+                                               partner->top_constraints_))
+        TabConstraint::MergeConstraints(top_constraints_,
+                                        partner->top_constraints_);
+    }
+  }
+}
+
+// Setup the constraints between this and its partner.
+void TabVector::SetupPartnerConstraints(TabVector* partner) {
+  if (TabConstraint::CompatibleConstraints(bottom_constraints_,
+                                           partner->bottom_constraints_))
+    TabConstraint::MergeConstraints(bottom_constraints_,
+                                    partner->bottom_constraints_);
+  if (TabConstraint::CompatibleConstraints(top_constraints_,
+                                           partner->top_constraints_))
+    TabConstraint::MergeConstraints(top_constraints_,
+                                    partner->top_constraints_);
+}
+
+// Use the constraints to modify the top and bottom.
+void TabVector::ApplyConstraints() {
+  if (top_constraints_ != nullptr)
+    TabConstraint::ApplyConstraints(top_constraints_);
+  if (bottom_constraints_ != nullptr)
+    TabConstraint::ApplyConstraints(bottom_constraints_);
+}
+
+// Merge close tab vectors of the same side that overlap.
+void TabVector::MergeSimilarTabVectors(const ICOORD& vertical,
+                                       TabVector_LIST* vectors,
+                                       BlobGrid* grid) {
+  TabVector_IT it1(vectors);
+  for (it1.mark_cycle_pt(); !it1.cycled_list(); it1.forward()) {
+    TabVector* v1 = it1.data();
+    TabVector_IT it2(it1);
+    for (it2.forward(); !it2.at_first(); it2.forward()) {
+      TabVector* v2 = it2.data();
+      if (v2->SimilarTo(vertical, *v1, grid)) {
+        // Merge into the forward one, in case the combined vector now
+        // overlaps one in between.
+        if (textord_debug_tabfind) {
+          v2->Print("Merging");
+          v1->Print("by deleting");
+        }
+        v2->MergeWith(vertical, it1.extract());
+        if (textord_debug_tabfind) {
+          v2->Print("Producing");
+        }
+        ICOORD merged_vector = v2->endpt();
+        merged_vector -= v2->startpt();
+        if (textord_debug_tabfind && abs(merged_vector.x()) > 100) {
+          v2->Print("Garbage result of merge?");
+        }
+        break;
+      }
+    }
+  }
+}
+
+// Return true if this vector is the same side, overlaps, and close
+// enough to the other to be merged.
+bool TabVector::SimilarTo(const ICOORD& vertical,
+                          const TabVector& other, BlobGrid* grid) const {
+  if ((IsRightTab() && other.IsRightTab()) ||
+      (IsLeftTab() && other.IsLeftTab())) {
+    // If they don't overlap, at least in extensions, then there is no chance.
+    if (ExtendedOverlap(other.extended_ymax_, other.extended_ymin_) < 0)
+      return false;
+    // A fast approximation to the scale factor of the sort_key_.
+    int v_scale = abs(vertical.y());
+    if (v_scale == 0)
+      v_scale = 1;
+    // If they are close enough, then OK.
+    if (sort_key_ + kSimilarVectorDist * v_scale >= other.sort_key_ &&
+        sort_key_ - kSimilarVectorDist * v_scale <= other.sort_key_)
+      return true;
+    // Ragged tabs get a bigger threshold.
+    if (!IsRagged() || !other.IsRagged() ||
+        sort_key_ + kSimilarRaggedDist * v_scale < other.sort_key_ ||
+        sort_key_ - kSimilarRaggedDist * v_scale > other.sort_key_)
+      return false;
+    if (grid == nullptr) {
+      // There is nothing else to test!
+      return true;
+    }
+    // If there is nothing in the rectangle between the vector that is going to
+    // move, and the place it is moving to, then they can be merged.
+    // Setup a vertical search for any blob.
+    const TabVector* mover = (IsRightTab() &&
+       sort_key_ < other.sort_key_) ? this : &other;
+    int top_y = mover->endpt_.y();
+    int bottom_y = mover->startpt_.y();
+    int left = std::min(mover->XAtY(top_y), mover->XAtY(bottom_y));
+    int right = std::max(mover->XAtY(top_y), mover->XAtY(bottom_y));
+    int shift = abs(sort_key_ - other.sort_key_) / v_scale;
+    if (IsRightTab()) {
+      right += shift;
+    } else {
+      left -= shift;
+    }
+
+    GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> vsearch(grid);
+    vsearch.StartVerticalSearch(left, right, top_y);
+    BLOBNBOX* blob;
+    while ((blob = vsearch.NextVerticalSearch(true)) != nullptr) {
+      const TBOX& box = blob->bounding_box();
+      if (box.top() > bottom_y)
+        return true;  // Nothing found.
+      if (box.bottom() < top_y)
+        continue;  // Doesn't overlap.
+      int left_at_box = XAtY(box.bottom());
+      int right_at_box = left_at_box;
+      if (IsRightTab())
+        right_at_box += shift;
+      else
+        left_at_box -= shift;
+      if (std::min(right_at_box, static_cast<int>(box.right())) > std::max(left_at_box, static_cast<int>(box.left())))
+        return false;
+    }
+    return true;  // Nothing found.
+  }
+  return false;
+}
+
+// Eat the other TabVector into this and delete it.
+void TabVector::MergeWith(const ICOORD& vertical, TabVector* other) {
+  extended_ymin_ = std::min(extended_ymin_, other->extended_ymin_);
+  extended_ymax_ = std::max(extended_ymax_, other->extended_ymax_);
+  if (other->IsRagged()) {
+    alignment_ = other->alignment_;
+  }
+  // Merge sort the two lists of boxes.
+  BLOBNBOX_C_IT it1(&boxes_);
+  BLOBNBOX_C_IT it2(&other->boxes_);
+  while (!it2.empty()) {
+    BLOBNBOX* bbox2 = it2.extract();
+    it2.forward();
+    TBOX box2 = bbox2->bounding_box();
+    BLOBNBOX* bbox1 = it1.data();
+    TBOX box1 = bbox1->bounding_box();
+    while (box1.bottom() < box2.bottom() && !it1.at_last()) {
+      it1.forward();
+      bbox1 = it1.data();
+      box1 = bbox1->bounding_box();
+    }
+    if (box1.bottom() < box2.bottom()) {
+      it1.add_to_end(bbox2);
+    } else if (bbox1 != bbox2) {
+      it1.add_before_stay_put(bbox2);
+    }
+  }
+  Fit(vertical, true);
+  other->Delete(this);
+}
+
+// Add a new element to the list of partner TabVectors.
+// Partners must be added in order of increasing y coordinate of the text line
+// that makes them partners.
+// Groups of identical partners are merged into one.
+void TabVector::AddPartner(TabVector* partner) {
+  if (IsSeparator() || partner->IsSeparator())
+    return;
+  TabVector_C_IT it(&partners_);
+  if (!it.empty()) {
+    it.move_to_last();
+    if (it.data() == partner)
+      return;
+  }
+  it.add_after_then_move(partner);
+}
+
+// Return true if other is a partner of this.
+bool TabVector::IsAPartner(const TabVector* other) {
+  TabVector_C_IT it(&partners_);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    if (it.data() == other)
+      return true;
+  }
+  return false;
+}
+
+// These names must be synced with the TabAlignment enum in tabvector.h.
+static const char* const kAlignmentNames[] = {
+  "Left Aligned",
+  "Left Ragged",
+  "Center",
+  "Right Aligned",
+  "Right Ragged",
+  "Separator"
+};
+
+// Print basic information about this tab vector.
+void TabVector::Print(const char* prefix) {
+  tprintf(
+      "%s %s (%d,%d)->(%d,%d) w=%d s=%d, sort key=%d, boxes=%d,"
+      " partners=%d\n",
+      prefix, kAlignmentNames[alignment_], startpt_.x(), startpt_.y(),
+      endpt_.x(), endpt_.y(), mean_width_, percent_score_, sort_key_,
+      boxes_.length(), partners_.length());
+}
+
+// Print basic information about this tab vector and every box in it.
+void TabVector::Debug(const char* prefix) {
+  Print(prefix);
+  BLOBNBOX_C_IT it(&boxes_);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    BLOBNBOX* bbox = it.data();
+    const TBOX& box = bbox->bounding_box();
+    tprintf("Box at (%d,%d)->(%d,%d)\n",
+            box.left(), box.bottom(), box.right(), box.top());
+  }
+}
+
+#ifndef GRAPHICS_DISABLED
+
+// Draw this tabvector in place in the given window.
+void TabVector::Display(ScrollView* tab_win) {
+  if (textord_debug_printable)
+    tab_win->Pen(ScrollView::BLUE);
+  else if (alignment_ == TA_LEFT_ALIGNED)
+    tab_win->Pen(ScrollView::LIME_GREEN);
+  else if (alignment_ == TA_LEFT_RAGGED)
+    tab_win->Pen(ScrollView::DARK_GREEN);
+  else if (alignment_ == TA_RIGHT_ALIGNED)
+    tab_win->Pen(ScrollView::PINK);
+  else if (alignment_ == TA_RIGHT_RAGGED)
+    tab_win->Pen(ScrollView::CORAL);
+  else
+    tab_win->Pen(ScrollView::WHITE);
+  tab_win->Line(startpt_.x(), startpt_.y(), endpt_.x(), endpt_.y());
+  tab_win->Pen(ScrollView::GREY);
+  tab_win->Line(startpt_.x(), startpt_.y(), startpt_.x(), extended_ymin_);
+  tab_win->Line(endpt_.x(), extended_ymax_, endpt_.x(), endpt_.y());
+  char score_buf[64];
+  snprintf(score_buf, sizeof(score_buf), "%d", percent_score_);
+  tab_win->TextAttributes("Times", 50, false, false, false);
+  tab_win->Text(startpt_.x(), startpt_.y(), score_buf);
+}
+
+#endif
+
+// Refit the line and/or re-evaluate the vector if the dirty flags are set.
+void TabVector::FitAndEvaluateIfNeeded(const ICOORD& vertical,
+                                       TabFind* finder) {
+  if (needs_refit_)
+    Fit(vertical, true);
+  if (needs_evaluation_)
+    Evaluate(vertical, finder);
+}
+
+// Evaluate the vector in terms of coverage of its length by good-looking
+// box edges. A good looking box is one where its nearest neighbour on the
+// inside is nearer than half the distance its nearest neighbour on the
+// outside of the putative column. Bad boxes are removed from the line.
+// A second pass then further filters boxes by requiring that the gutter
+// width be a minimum fraction of the mean gutter along the line.
+void TabVector::Evaluate(const ICOORD& vertical, TabFind* finder) {
+  bool debug = false;
+  needs_evaluation_ = false;
+  int length = endpt_.y() - startpt_.y();
+  if (length == 0 || boxes_.empty()) {
+    percent_score_ = 0;
+    Print("Zero length in evaluate");
+    return;
+  }
+  // Compute the mean box height.
+  BLOBNBOX_C_IT it(&boxes_);
+  int mean_height = 0;
+  int height_count = 0;
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    BLOBNBOX* bbox = it.data();
+    const TBOX& box = bbox->bounding_box();
+    int height = box.height();
+    mean_height += height;
+    ++height_count;
+  }
+  if (height_count > 0) mean_height /= height_count;
+  int max_gutter = kGutterMultiple * mean_height;
+  if (IsRagged()) {
+    // Ragged edges face a tougher test in that the gap must always be within
+    // the height of the blob.
+    max_gutter = kGutterToNeighbourRatio * mean_height;
+  }
+
+  STATS gutters(0, max_gutter + 1);
+  // Evaluate the boxes for their goodness, calculating the coverage as we go.
+  // Remove boxes that are not good and shorten the list to the first and
+  // last good boxes.
+  int num_deleted_boxes = 0;
+  bool text_on_image = false;
+  int good_length = 0;
+  const TBOX* prev_good_box = nullptr;
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    BLOBNBOX* bbox = it.data();
+    const TBOX& box = bbox->bounding_box();
+    int mid_y = (box.top() + box.bottom()) / 2;
+    if (TabFind::WithinTestRegion(2, XAtY(box.bottom()), box.bottom())) {
+      if (!debug) {
+        tprintf("After already deleting %d boxes, ", num_deleted_boxes);
+        Print("Starting evaluation");
+      }
+      debug = true;
+    }
+    // A good box is one where the nearest neighbour on the inside is closer
+    // than half the distance to the nearest neighbour on the outside
+    // (of the putative column).
+    bool left = IsLeftTab();
+    int tab_x = XAtY(mid_y);
+    int gutter_width;
+    int neighbour_gap;
+    finder->GutterWidthAndNeighbourGap(tab_x, mean_height, max_gutter, left,
+                                       bbox, &gutter_width, &neighbour_gap);
+    if (debug) {
+      tprintf("Box (%d,%d)->(%d,%d) has gutter %d, ndist %d\n",
+              box.left(), box.bottom(), box.right(), box.top(),
+              gutter_width, neighbour_gap);
+    }
+    // Now we can make the test.
+    if (neighbour_gap * kGutterToNeighbourRatio <= gutter_width) {
+      // A good box contributes its height to the good_length.
+      good_length += box.top() - box.bottom();
+      gutters.add(gutter_width, 1);
+      // Two good boxes together contribute the gap between them
+      // to the good_length as well, as long as the gap is not
+      // too big.
+      if (prev_good_box != nullptr) {
+        int vertical_gap = box.bottom() - prev_good_box->top();
+        double size1 = sqrt(static_cast<double>(prev_good_box->area()));
+        double size2 = sqrt(static_cast<double>(box.area()));
+        if (vertical_gap < kMaxFillinMultiple * std::min(size1, size2))
+          good_length += vertical_gap;
+        if (debug) {
+          tprintf("Box and prev good, gap=%d, target %g, goodlength=%d\n",
+                  vertical_gap, kMaxFillinMultiple * std::min(size1, size2),
+                  good_length);
+        }
+      } else {
+        // Adjust the start to the first good box.
+        SetYStart(box.bottom());
+      }
+      prev_good_box = &box;
+      if (bbox->flow() == BTFT_TEXT_ON_IMAGE)
+        text_on_image = true;
+    } else {
+      // Get rid of boxes that are not good.
+      if (debug) {
+        tprintf("Bad Box (%d,%d)->(%d,%d) with gutter %d, ndist %d\n",
+                box.left(), box.bottom(), box.right(), box.top(),
+                gutter_width, neighbour_gap);
+      }
+      it.extract();
+      ++num_deleted_boxes;
+    }
+  }
+  if (debug) {
+    Print("Evaluating:");
+  }
+  // If there are any good boxes, do it again, except this time get rid of
+  // boxes that have a gutter that is a small fraction of the mean gutter.
+  // This filters out ends that run into a coincidental gap in the text.
+  int search_top = endpt_.y();
+  int search_bottom = startpt_.y();
+  int median_gutter = IntCastRounded(gutters.median());
+  if (gutters.get_total() > 0) {
+    prev_good_box = nullptr;
+    for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+      BLOBNBOX* bbox = it.data();
+      const TBOX& box = bbox->bounding_box();
+      int mid_y = (box.top() + box.bottom()) / 2;
+      // A good box is one where the gutter width is at least some constant
+      // fraction of the mean gutter width.
+      bool left = IsLeftTab();
+      int tab_x = XAtY(mid_y);
+      int max_gutter = kGutterMultiple * mean_height;
+      if (IsRagged()) {
+        // Ragged edges face a tougher test in that the gap must always be
+        // within the height of the blob.
+        max_gutter = kGutterToNeighbourRatio * mean_height;
+      }
+      int gutter_width;
+      int neighbour_gap;
+      finder->GutterWidthAndNeighbourGap(tab_x, mean_height, max_gutter, left,
+                                         bbox, &gutter_width, &neighbour_gap);
+      // Now we can make the test.
+      if (gutter_width >= median_gutter * kMinGutterFraction) {
+        if (prev_good_box == nullptr) {
+          // Adjust the start to the first good box.
+          SetYStart(box.bottom());
+          search_bottom = box.top();
+        }
+        prev_good_box = &box;
+        search_top = box.bottom();
+      } else {
+        // Get rid of boxes that are not good.
+        if (debug) {
+          tprintf("Bad Box (%d,%d)->(%d,%d) with gutter %d, mean gutter %d\n",
+                  box.left(), box.bottom(), box.right(), box.top(),
+                  gutter_width, median_gutter);
+        }
+        it.extract();
+        ++num_deleted_boxes;
+      }
+    }
+  }
+  // If there has been a good box, adjust the end.
+  if (prev_good_box != nullptr) {
+    SetYEnd(prev_good_box->top());
+    // Compute the percentage of the vector that is occupied by good boxes.
+    int length = endpt_.y() - startpt_.y();
+    percent_score_ = 100 * good_length / length;
+    if (num_deleted_boxes > 0) {
+      needs_refit_ = true;
+      FitAndEvaluateIfNeeded(vertical, finder);
+      if (boxes_.empty())
+        return;
+    }
+    // Test the gutter over the whole vector, instead of just at the boxes.
+    int required_shift;
+    if (search_bottom > search_top) {
+      search_bottom = startpt_.y();
+      search_top = endpt_.y();
+    }
+    double min_gutter_width = kLineCountReciprocal / boxes_.length();
+    min_gutter_width += IsRagged() ? kMinRaggedGutter : kMinAlignedGutter;
+    min_gutter_width *= mean_height;
+    int max_gutter_width = IntCastRounded(min_gutter_width) + 1;
+    if (median_gutter > max_gutter_width)
+      max_gutter_width = median_gutter;
+    int gutter_width = finder->GutterWidth(search_bottom, search_top, *this,
+                                           text_on_image, max_gutter_width,
+                                           &required_shift);
+    if (gutter_width < min_gutter_width) {
+      if (debug) {
+        tprintf("Rejecting bad tab Vector with %d gutter vs %g min\n",
+                gutter_width, min_gutter_width);
+      }
+      boxes_.shallow_clear();
+      percent_score_ = 0;
+    } else if (debug) {
+      tprintf("Final gutter %d, vs limit of %g, required shift = %d\n",
+              gutter_width, min_gutter_width, required_shift);
+    }
+  } else {
+    // There are no good boxes left, so score is 0.
+    percent_score_ = 0;
+  }
+
+  if (debug) {
+    Print("Evaluation complete:");
+  }
+}
+
+// (Re)Fit a line to the stored points. Returns false if the line
+// is degenerate. Althougth the TabVector code mostly doesn't care about the
+// direction of lines, XAtY would give silly results for a horizontal line.
+// The class is mostly aimed at use for vertical lines representing
+// horizontal tab stops.
+bool TabVector::Fit(ICOORD vertical, bool force_parallel) {
+  needs_refit_ = false;
+  if (boxes_.empty()) {
+    // Don't refit something with no boxes, as that only happens
+    // in Evaluate, and we don't want to end up with a zero vector.
+    if (!force_parallel)
+      return false;
+    // If we are forcing parallel, then we just need to set the sort_key_.
+    ICOORD midpt = startpt_;
+    midpt += endpt_;
+    midpt /= 2;
+    sort_key_ = SortKey(vertical, midpt.x(), midpt.y());
+    return startpt_.y() != endpt_.y();
+  }
+  if (!force_parallel && !IsRagged()) {
+    // Use a fitted line as the vertical.
+    DetLineFit linepoints;
+    BLOBNBOX_C_IT it(&boxes_);
+    // Fit a line to all the boxes in the list.
+    for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+      BLOBNBOX* bbox = it.data();
+      const TBOX& box = bbox->bounding_box();
+      int x1 = IsRightTab() ? box.right() : box.left();
+      ICOORD boxpt(x1, box.bottom());
+      linepoints.Add(boxpt);
+      if (it.at_last()) {
+        ICOORD top_pt(x1, box.top());
+        linepoints.Add(top_pt);
+      }
+    }
+    linepoints.Fit(&startpt_, &endpt_);
+    if (startpt_.y() != endpt_.y()) {
+      vertical = endpt_;
+      vertical -= startpt_;
+    }
+  }
+  int start_y = startpt_.y();
+  int end_y = endpt_.y();
+  sort_key_ = IsLeftTab() ? INT32_MAX : -INT32_MAX;
+  BLOBNBOX_C_IT it(&boxes_);
+  // Choose a line parallel to the vertical such that all boxes are on the
+  // correct side of it.
+  mean_width_ = 0;
+  int width_count = 0;
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    BLOBNBOX* bbox = it.data();
+    const TBOX& box = bbox->bounding_box();
+    mean_width_ += box.width();
+    ++width_count;
+    int x1 = IsRightTab() ? box.right() : box.left();
+    // Test both the bottom and the top, as one will be more extreme, depending
+    // on the direction of skew.
+    int bottom_y = box.bottom();
+    int top_y = box.top();
+    int key = SortKey(vertical, x1, bottom_y);
+    if (IsLeftTab() == (key < sort_key_)) {
+      sort_key_ = key;
+      startpt_ = ICOORD(x1, bottom_y);
+    }
+    key = SortKey(vertical, x1, top_y);
+    if (IsLeftTab() == (key < sort_key_)) {
+      sort_key_ = key;
+      startpt_ = ICOORD(x1, top_y);
+    }
+    if (it.at_first())
+      start_y = bottom_y;
+    if (it.at_last())
+      end_y = top_y;
+  }
+  if (width_count > 0) {
+    mean_width_ = (mean_width_ + width_count - 1) / width_count;
+  }
+  endpt_ = startpt_ + vertical;
+  needs_evaluation_ = true;
+  if (start_y != end_y) {
+    // Set the ends of the vector to fully include the first and last blobs.
+    startpt_.set_x(XAtY(vertical, sort_key_, start_y));
+    startpt_.set_y(start_y);
+    endpt_.set_x(XAtY(vertical, sort_key_, end_y));
+    endpt_.set_y(end_y);
+    return true;
+  }
+  return false;
+}
+
+// Returns the singleton partner if there is one, or nullptr otherwise.
+TabVector* TabVector::GetSinglePartner() {
+  if (!partners_.singleton())
+    return nullptr;
+  TabVector_C_IT partner_it(&partners_);
+  TabVector* partner = partner_it.data();
+  return partner;
+}
+
+// Return the partner of this TabVector if the vector qualifies as
+// being a vertical text line, otherwise nullptr.
+TabVector* TabVector::VerticalTextlinePartner() {
+  if (!partners_.singleton())
+    return nullptr;
+  TabVector_C_IT partner_it(&partners_);
+  TabVector* partner = partner_it.data();
+  BLOBNBOX_C_IT box_it1(&boxes_);
+  BLOBNBOX_C_IT box_it2(&partner->boxes_);
+  // Count how many boxes are also in the other list.
+  // At the same time, gather the mean width and median vertical gap.
+  if (textord_debug_tabfind > 1) {
+    Print("Testing for vertical text");
+    partner->Print("           partner");
+  }
+  int num_matched = 0;
+  int num_unmatched = 0;
+  int total_widths = 0;
+  int width = startpt().x() - partner->startpt().x();
+  if (width < 0)
+    width = -width;
+  STATS gaps(0, width * 2);
+  BLOBNBOX* prev_bbox = nullptr;
+  box_it2.mark_cycle_pt();
+  for (box_it1.mark_cycle_pt(); !box_it1.cycled_list(); box_it1.forward()) {
+    BLOBNBOX* bbox = box_it1.data();
+    TBOX box = bbox->bounding_box();
+    if (prev_bbox != nullptr) {
+      gaps.add(box.bottom() - prev_bbox->bounding_box().top(), 1);
+    }
+    while (!box_it2.cycled_list() && box_it2.data() != bbox &&
+           box_it2.data()->bounding_box().bottom() < box.bottom()) {
+      box_it2.forward();
+    }
+    if (!box_it2.cycled_list() && box_it2.data() == bbox &&
+        bbox->region_type() >= BRT_UNKNOWN &&
+        (prev_bbox == nullptr || prev_bbox->region_type() >= BRT_UNKNOWN))
+      ++num_matched;
+    else
+      ++num_unmatched;
+    total_widths += box.width();
+    prev_bbox = bbox;
+  }
+  if (num_unmatched + num_matched == 0) return nullptr;
+  double avg_width = total_widths * 1.0 / (num_unmatched + num_matched);
+  double max_gap = textord_tabvector_vertical_gap_fraction * avg_width;
+  int min_box_match = static_cast<int>((num_matched + num_unmatched) *
+                                       textord_tabvector_vertical_box_ratio);
+  bool is_vertical = (gaps.get_total() > 0 &&
+                      num_matched >= min_box_match &&
+                      gaps.median() <= max_gap);
+  if (textord_debug_tabfind > 1) {
+    tprintf("gaps=%d, matched=%d, unmatched=%d, min_match=%d "
+            "median gap=%.2f, width=%.2f max_gap=%.2f Vertical=%s\n",
+            gaps.get_total(), num_matched, num_unmatched, min_box_match,
+            gaps.median(), avg_width, max_gap, is_vertical?"Yes":"No");
+  }
+  return (is_vertical) ? partner : nullptr;
+}
+
+// The constructor is private.
+TabVector::TabVector(int extended_ymin, int extended_ymax,
+                     TabAlignment alignment, BLOBNBOX_CLIST* boxes)
+  : extended_ymin_(extended_ymin), extended_ymax_(extended_ymax),
+    sort_key_(0), percent_score_(0), mean_width_(0),
+    needs_refit_(true), needs_evaluation_(true), alignment_(alignment),
+    top_constraints_(nullptr), bottom_constraints_(nullptr) {
+  BLOBNBOX_C_IT it(&boxes_);
+  it.add_list_after(boxes);
+}
+
+// Delete this, but first, repoint all the partners to point to
+// replacement. If replacement is nullptr, then partner relationships
+// are removed.
+void TabVector::Delete(TabVector* replacement) {
+  TabVector_C_IT it(&partners_);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    TabVector* partner = it.data();
+    TabVector_C_IT p_it(&partner->partners_);
+    // If partner already has replacement in its list, then make
+    // replacement null, and just remove this TabVector when we find it.
+    TabVector* partner_replacement = replacement;
+    for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward()) {
+      TabVector* p_partner = p_it.data();
+      if (p_partner == partner_replacement) {
+        partner_replacement = nullptr;
+        break;
+      }
+    }
+    // Remove all references to this, and replace with replacement if not nullptr.
+    for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward()) {
+      TabVector* p_partner = p_it.data();
+      if (p_partner == this) {
+        p_it.extract();
+        if (partner_replacement != nullptr)
+          p_it.add_before_stay_put(partner_replacement);
+      }
+    }
+    if (partner_replacement != nullptr) {
+      partner_replacement->AddPartner(partner);
+    }
+  }
+  delete this;
+}
+
+
+}  // namespace tesseract.
diff --git a/tesseract/src/textord/tabvector.h b/tesseract/src/textord/tabvector.h
new file mode 100644
index 00000000..ce7464b8
--- /dev/null
+++ b/tesseract/src/textord/tabvector.h
@@ -0,0 +1,429 @@
+///////////////////////////////////////////////////////////////////////
+// File:        tabvector.h
+// Description: Class to hold a near-vertical vector representing a tab-stop.
+// Author:      Ray Smith
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TEXTORD_TABVECTOR_H_
+#define TESSERACT_TEXTORD_TABVECTOR_H_
+
+#include "blobgrid.h"
+#include "clst.h"
+#include "elst.h"
+#include "elst2.h"
+#include "rect.h"
+#include "bbgrid.h"
+
+#include <algorithm>
+
+class BLOBNBOX;
+class ScrollView;
+
+namespace tesseract {
+
+
+extern double_VAR_H(textord_tabvector_vertical_gap_fraction, 0.5,
+  "Max fraction of mean blob width allowed for vertical gaps in vertical text");
+extern double_VAR_H(textord_tabvector_vertical_box_ratio, 0.5,
+  "Fraction of box matches required to declare a line vertical");
+
+// The alignment type that a tab vector represents.
+// Keep this enum synced with kAlignmentNames in tabvector.cpp.
+enum TabAlignment {
+  TA_LEFT_ALIGNED,
+  TA_LEFT_RAGGED,
+  TA_CENTER_JUSTIFIED,
+  TA_RIGHT_ALIGNED,
+  TA_RIGHT_RAGGED,
+  TA_SEPARATOR,
+  TA_COUNT
+};
+
+// Forward declarations. The classes use their own list types, so we
+// need to make the list types first.
+class TabFind;
+class TabVector;
+class TabConstraint;
+
+ELIST2IZEH(TabVector)
+CLISTIZEH(TabVector)
+ELISTIZEH(TabConstraint)
+
+// TabConstraint is a totally self-contained class to maintain
+// a list of [min,max] constraints, each referring to a TabVector.
+// The constraints are manipulated through static methods that act
+// on a list of constraints. The list itself is cooperatively owned
+// by the TabVectors of the constraints on the list and managed
+// by implicit reference counting via the elements of the list.
+class TabConstraint : public ELIST_LINK {
+ public:
+  // This empty constructor is here only so that the class can be ELISTIZED.
+  // TODO(rays) change deep_copy in elst.h line 955 to take a callback copier
+  // and eliminate CLASSNAME##_copier.
+  TabConstraint() = default;
+
+  // Create a constraint for the top or bottom of this TabVector.
+  static void CreateConstraint(TabVector* vector, bool is_top);
+
+  // Test to see if the constraints are compatible enough to merge.
+  static bool CompatibleConstraints(TabConstraint_LIST* list1,
+                                    TabConstraint_LIST* list2);
+
+  // Merge the lists of constraints and update the TabVector pointers.
+  // The second list is deleted.
+  static void MergeConstraints(TabConstraint_LIST* list1,
+                               TabConstraint_LIST* list2);
+
+  // Set all the tops and bottoms as appropriate to a mean of the
+  // constrained range. Delete all the constraints and list.
+  static void ApplyConstraints(TabConstraint_LIST* constraints);
+
+ private:
+  TabConstraint(TabVector* vector, bool is_top);
+
+  // Get the max of the mins and the min of the maxes.
+  static void GetConstraints(TabConstraint_LIST* constraints,
+                             int* y_min, int* y_max);
+
+  // The TabVector this constraint applies to.
+  TabVector* vector_;
+  // If true then we refer to the top of the vector_.
+  bool is_top_;
+  // The allowed range of this vector_.
+  int y_min_;
+  int y_max_;
+};
+
+// Class to hold information about a single vector
+// that represents a tab stop or a rule line.
+class TabVector : public ELIST2_LINK {
+ public:
+  // TODO(rays) fix this in elst.h line 1076, where it should use the
+  // copy constructor instead of operator=.
+  TabVector() = default;
+  ~TabVector() = default;
+
+  // Public factory to build a TabVector from a list of boxes.
+  // The TabVector will be of the given alignment type.
+  // The input vertical vector is used in fitting, and the output
+  // vertical_x, vertical_y have the resulting line vector added to them
+  // if the alignment is not ragged.
+  // The extended_start_y and extended_end_y are the maximum possible
+  // extension to the line segment that can be used to align with others.
+  // The input CLIST of BLOBNBOX good_points is consumed and taken over.
+  static TabVector* FitVector(TabAlignment alignment, ICOORD vertical,
+                              int  extended_start_y, int extended_end_y,
+                              BLOBNBOX_CLIST* good_points,
+                              int* vertical_x, int* vertical_y);
+
+  // Build a ragged TabVector by copying another's direction, shifting it
+  // to match the given blob, and making its initial extent the height
+  // of the blob, but its extended bounds from the bounds of the original.
+  TabVector(const TabVector& src, TabAlignment alignment,
+            const ICOORD& vertical_skew, BLOBNBOX* blob);
+
+  // Copies basic attributes of a tab vector for simple operations.
+  // Copies things such startpt, endpt, range, width.
+  // Does not copy things such as partners, boxes, or constraints.
+  // This is useful if you only need vector information for processing, such
+  // as in the table detection code.
+  TabVector* ShallowCopy() const;
+
+  // Simple accessors.
+  const ICOORD& startpt() const {
+    return startpt_;
+  }
+  const ICOORD& endpt() const {
+    return endpt_;
+  }
+  int extended_ymax() const {
+    return extended_ymax_;
+  }
+  int extended_ymin() const {
+    return extended_ymin_;
+  }
+  int sort_key() const {
+    return sort_key_;
+  }
+  int mean_width() const {
+    return mean_width_;
+  }
+  void set_top_constraints(TabConstraint_LIST* constraints) {
+    top_constraints_ = constraints;
+  }
+  void set_bottom_constraints(TabConstraint_LIST* constraints) {
+    bottom_constraints_ = constraints;
+  }
+  TabVector_CLIST* partners() {
+    return &partners_;
+  }
+  void set_startpt(const ICOORD& start) {
+    startpt_ = start;
+  }
+  void set_endpt(const ICOORD& end) {
+    endpt_ = end;
+  }
+  bool intersects_other_lines() const {
+    return intersects_other_lines_;
+  }
+  void set_intersects_other_lines(bool value) {
+    intersects_other_lines_ = value;
+  }
+
+  // Inline quasi-accessors that require some computation.
+
+  // Compute the x coordinate at the given y coordinate.
+  int XAtY(int y) const {
+    int height = endpt_.y() - startpt_.y();
+    if (height != 0)
+      return (y - startpt_.y()) * (endpt_.x() - startpt_.x()) / height +
+             startpt_.x();
+    else
+      return startpt_.x();
+  }
+
+  // Compute the vertical overlap with the other TabVector.
+  int VOverlap(const TabVector& other) const {
+    return std::min(other.endpt_.y(), endpt_.y()) -
+            std::max(other.startpt_.y(), startpt_.y());
+  }
+  // Compute the vertical overlap with the given y bounds.
+  int VOverlap(int top_y, int bottom_y) const {
+    return std::min(top_y, static_cast<int>(endpt_.y())) - std::max(bottom_y, static_cast<int>(startpt_.y()));
+  }
+  // Compute the extended vertical overlap with the given y bounds.
+  int ExtendedOverlap(int top_y, int bottom_y) const {
+    return std::min(top_y, extended_ymax_) - std::max(bottom_y, extended_ymin_);
+  }
+
+  // Return true if this is a left tab stop, either aligned, or ragged.
+  bool IsLeftTab() const {
+    return alignment_ == TA_LEFT_ALIGNED || alignment_ == TA_LEFT_RAGGED;
+  }
+  // Return true if this is a right tab stop, either aligned, or ragged.
+  bool IsRightTab() const {
+    return alignment_ == TA_RIGHT_ALIGNED || alignment_ == TA_RIGHT_RAGGED;
+  }
+  // Return true if this is a separator.
+  bool IsSeparator() const {
+    return alignment_ == TA_SEPARATOR;
+  }
+  // Return true if this is a center aligned tab stop.
+  bool IsCenterTab() const {
+    return alignment_ == TA_CENTER_JUSTIFIED;
+  }
+  // Return true if this is a ragged tab top, either left or right.
+  bool IsRagged() const {
+    return alignment_ == TA_LEFT_RAGGED || alignment_ == TA_RIGHT_RAGGED;
+  }
+
+  // Return true if this vector is to the left of the other in terms
+  // of sort_key_.
+  bool IsLeftOf(const TabVector& other) const {
+    return sort_key_ < other.sort_key_;
+  }
+
+  // Return true if the vector has no partners.
+  bool Partnerless() {
+    return partners_.empty();
+  }
+
+  // Return the number of tab boxes in this vector.
+  int BoxCount() {
+    return boxes_.length();
+  }
+
+  // Lock the vector from refits by clearing the boxes_ list.
+  void Freeze() {
+    boxes_.shallow_clear();
+  }
+
+  // Flip x and y on the ends so a vector can be created from flipped input.
+  void XYFlip() {
+    int x = startpt_.y();
+    startpt_.set_y(startpt_.x());
+    startpt_.set_x(x);
+    x = endpt_.y();
+    endpt_.set_y(endpt_.x());
+    endpt_.set_x(x);
+  }
+
+  // Reflect the tab vector in the y-axis.
+  void ReflectInYAxis() {
+    startpt_.set_x(-startpt_.x());
+    endpt_.set_x(-endpt_.x());
+    sort_key_ = -sort_key_;
+    if (alignment_ == TA_LEFT_ALIGNED)
+      alignment_ = TA_RIGHT_ALIGNED;
+    else if (alignment_ == TA_RIGHT_ALIGNED)
+      alignment_ = TA_LEFT_ALIGNED;
+    if (alignment_ == TA_LEFT_RAGGED)
+      alignment_ = TA_RIGHT_RAGGED;
+    else if (alignment_ == TA_RIGHT_RAGGED)
+      alignment_ = TA_LEFT_RAGGED;
+  }
+
+  // Separate function to compute the sort key for a given coordinate pair.
+  static int SortKey(const ICOORD& vertical, int x, int y) {
+    ICOORD pt(x, y);
+    return pt * vertical;
+  }
+
+  // Return the x at the given y for the given sort key.
+  static int XAtY(const ICOORD& vertical, int sort_key, int y) {
+    if (vertical.y() != 0)
+      return (vertical.x() * y + sort_key) / vertical.y();
+    else
+      return sort_key;
+  }
+
+  // Sort function for E2LIST::sort to sort by sort_key_.
+  static int SortVectorsByKey(const void* v1, const void* v2) {
+    const TabVector* tv1 = *static_cast<const TabVector* const*>(v1);
+    const TabVector* tv2 = *static_cast<const TabVector* const*>(v2);
+    return tv1->sort_key_ - tv2->sort_key_;
+  }
+
+  // More complex members.
+
+  // Extend this vector to include the supplied blob if it doesn't
+  // already have it.
+  void ExtendToBox(BLOBNBOX* blob);
+
+  // Set the ycoord of the start and move the xcoord to match.
+  void SetYStart(int start_y);
+  // Set the ycoord of the end and move the xcoord to match.
+  void SetYEnd(int end_y);
+
+  // Rotate the ends by the given vector.
+  void Rotate(const FCOORD& rotation);
+
+  // Setup the initial constraints, being the limits of
+  // the vector and the extended ends.
+  void SetupConstraints();
+
+  // Setup the constraints between the partners of this TabVector.
+  void SetupPartnerConstraints();
+
+  // Setup the constraints between this and its partner.
+  void SetupPartnerConstraints(TabVector* partner);
+
+  // Use the constraints to modify the top and bottom.
+  void ApplyConstraints();
+
+  // Merge close tab vectors of the same side that overlap.
+  static void MergeSimilarTabVectors(const ICOORD& vertical,
+                                     TabVector_LIST* vectors, BlobGrid* grid);
+
+  // Return true if this vector is the same side, overlaps, and close
+  // enough to the other to be merged.
+  bool SimilarTo(const ICOORD& vertical,
+                 const TabVector& other, BlobGrid* grid) const;
+
+  // Eat the other TabVector into this and delete it.
+  void MergeWith(const ICOORD& vertical, TabVector* other);
+
+  // Add a new element to the list of partner TabVectors.
+  // Partners must be added in order of increasing y coordinate of the text line
+  // that makes them partners.
+  // Groups of identical partners are merged into one.
+  void AddPartner(TabVector* partner);
+
+  // Return true if other is a partner of this.
+  bool IsAPartner(const TabVector* other);
+
+  // Print basic information about this tab vector.
+  void Print(const char* prefix);
+
+  // Print basic information about this tab vector and every box in it.
+  void Debug(const char* prefix);
+
+  // Draw this tabvector in place in the given window.
+  void Display(ScrollView* tab_win);
+
+  // Refit the line and/or re-evaluate the vector if the dirty flags are set.
+  void FitAndEvaluateIfNeeded(const ICOORD& vertical, TabFind* finder);
+
+  // Evaluate the vector in terms of coverage of its length by good-looking
+  // box edges. A good looking box is one where its nearest neighbour on the
+  // inside is nearer than half the distance its nearest neighbour on the
+  // outside of the putative column. Bad boxes are removed from the line.
+  // A second pass then further filters boxes by requiring that the gutter
+  // width be a minimum fraction of the mean gutter along the line.
+  void Evaluate(const ICOORD& vertical, TabFind* finder);
+
+  // (Re)Fit a line to the stored points. Returns false if the line
+  // is degenerate. Althougth the TabVector code mostly doesn't care about the
+  // direction of lines, XAtY would give silly results for a horizontal line.
+  // The class is mostly aimed at use for vertical lines representing
+  // horizontal tab stops.
+  bool Fit(ICOORD vertical, bool force_parallel);
+
+  // Return the partner of this TabVector if the vector qualifies as
+  // being a vertical text line, otherwise nullptr.
+  TabVector* VerticalTextlinePartner();
+
+  // Return the matching tabvector if there is exactly one partner, or
+  // nullptr otherwise.  This can be used after matching is done, eg. by
+  // VerticalTextlinePartner(), without checking if the line is vertical.
+  TabVector* GetSinglePartner();
+
+ private:
+  // Constructor is private as the static factory is the external way
+  // to build a TabVector.
+  TabVector(int extended_ymin, int extended_ymax,
+            TabAlignment alignment, BLOBNBOX_CLIST* boxes);
+
+  // Delete this, but first, repoint all the partners to point to
+  // replacement. If replacement is nullptr, then partner relationships
+  // are removed.
+  void Delete(TabVector* replacement);
+
+ private:
+  // The bottom of the tab line.
+  ICOORD startpt_;
+  // The top of the tab line.
+  ICOORD endpt_;
+  // The lowest y that the vector might extend to.
+  int extended_ymin_ = 0;
+  // The highest y that the vector might extend to.
+  int extended_ymax_ = 0;
+  // Perpendicular distance of vector from a given vertical for sorting.
+  int sort_key_ = 0;
+  // Result of Evaluate 0-100. Coverage of line with good boxes.
+  int percent_score_ = 0;
+  // The mean width of the blobs. Meaningful only for separator lines.
+  int mean_width_ = 0;
+  // True if the boxes_ list has been modified, so a refit is needed.
+  bool needs_refit_ = false;
+  // True if a fit has been done, so re-evaluation is needed.
+  bool needs_evaluation_ = false;
+  // True if a separator line intersects at least 2 other lines.
+  bool intersects_other_lines_ = false;
+  // The type of this TabVector.
+  TabAlignment alignment_ = TA_LEFT_ALIGNED;
+  // The list of boxes whose edges are aligned at this TabVector.
+  BLOBNBOX_CLIST boxes_;
+  // List of TabVectors that have a connection with this via a text line.
+  TabVector_CLIST partners_;
+  // Constraints used to resolve the exact location of the top and bottom
+  // of the tab line.
+  TabConstraint_LIST* top_constraints_ = nullptr;
+  TabConstraint_LIST* bottom_constraints_ = nullptr;
+};
+
+}  // namespace tesseract.
+
+#endif  // TESSERACT_TEXTORD_TABVECTOR_H_
diff --git a/tesseract/src/textord/textlineprojection.cpp b/tesseract/src/textord/textlineprojection.cpp
new file mode 100644
index 00000000..e52abaa0
--- /dev/null
+++ b/tesseract/src/textord/textlineprojection.cpp
@@ -0,0 +1,779 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+// Author: rays@google.com (Ray Smith)
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "textlineprojection.h"
+#include "allheaders.h"
+#include "bbgrid.h"         // Base class.
+#include "blobbox.h"        // BlobNeighourDir.
+#include "blobs.h"
+#include "colpartition.h"
+#include "normalis.h"
+
+#include <algorithm>
+
+// Padding factor to use on definitely oriented blobs
+const int kOrientedPadFactor = 8;
+// Padding factor to use on not definitely oriented blobs.
+const int kDefaultPadFactor = 2;
+// Penalty factor for going away from the line center.
+const int kWrongWayPenalty = 4;
+// Ratio between parallel gap and perpendicular gap used to measure total
+// distance of a box from a target box in curved textline space.
+// parallel-gap is treated more favorably by this factor to allow catching
+// quotes and elipsis at the end of textlines.
+const int kParaPerpDistRatio = 4;
+// Multiple of scale_factor_ that the inter-line gap must be before we start
+// padding the increment box perpendicular to the text line.
+const int kMinLineSpacingFactor = 4;
+// Maximum tab-stop overrun for horizontal padding, in projection pixels.
+const int kMaxTabStopOverrun = 6;
+
+namespace tesseract {
+
+TextlineProjection::TextlineProjection(int resolution)
+  : x_origin_(0), y_origin_(0), pix_(nullptr) {
+  // The projection map should be about 100 ppi, whatever the input.
+  scale_factor_ = IntCastRounded(resolution / 100.0);
+  if (scale_factor_ < 1) scale_factor_ = 1;
+}
+TextlineProjection::~TextlineProjection() {
+  pixDestroy(&pix_);
+}
+
+// Build the projection profile given the input_block containing lists of
+// blobs, a rotation to convert to image coords,
+// and a full-resolution nontext_map, marking out areas to avoid.
+// During construction, we have the following assumptions:
+// The rotation is a multiple of 90 degrees, ie no deskew yet.
+// The blobs have had their left and right rules set to also limit
+// the range of projection.
+void TextlineProjection::ConstructProjection(TO_BLOCK* input_block,
+                                             const FCOORD& rotation,
+                                             Pix* nontext_map) {
+  pixDestroy(&pix_);
+  TBOX image_box(0, 0, pixGetWidth(nontext_map), pixGetHeight(nontext_map));
+  x_origin_ = 0;
+  y_origin_ = image_box.height();
+  int width = (image_box.width() + scale_factor_ - 1) / scale_factor_;
+  int height = (image_box.height() + scale_factor_ - 1) / scale_factor_;
+
+  pix_ = pixCreate(width, height, 8);
+  ProjectBlobs(&input_block->blobs, rotation, image_box, nontext_map);
+  ProjectBlobs(&input_block->large_blobs, rotation, image_box, nontext_map);
+  Pix* final_pix = pixBlockconv(pix_, 1, 1);
+//  Pix* final_pix = pixBlockconv(pix_, 2, 2);
+  pixDestroy(&pix_);
+  pix_ = final_pix;
+}
+
+#ifndef GRAPHICS_DISABLED
+
+// Display the blobs in the window colored according to textline quality.
+void TextlineProjection::PlotGradedBlobs(BLOBNBOX_LIST* blobs,
+                                         ScrollView* win) {
+  BLOBNBOX_IT it(blobs);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    BLOBNBOX* blob = it.data();
+    const TBOX& box = blob->bounding_box();
+    bool bad_box = BoxOutOfHTextline(box, nullptr, false);
+    if (blob->UniquelyVertical())
+      win->Pen(ScrollView::YELLOW);
+    else
+      win->Pen(bad_box ? ScrollView::RED : ScrollView::BLUE);
+    win->Rectangle(box.left(), box.bottom(), box.right(), box.top());
+  }
+  win->Update();
+}
+
+#endif // !GRAPHICS_DISABLED
+
+// Moves blobs that look like they don't sit well on a textline from the
+// input blobs list to the output small_blobs list.
+// This gets them away from initial textline finding to stop diacritics
+// from forming incorrect textlines. (Introduced mainly to fix Thai.)
+void TextlineProjection::MoveNonTextlineBlobs(
+    BLOBNBOX_LIST* blobs, BLOBNBOX_LIST* small_blobs) const {
+  BLOBNBOX_IT it(blobs);
+  BLOBNBOX_IT small_it(small_blobs);
+  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+    BLOBNBOX* blob = it.data();
+    const TBOX& box = blob->bounding_box();
+    bool debug = AlignedBlob::WithinTestRegion(2, box.left(),
+                                               box.bottom());
+    if (BoxOutOfHTextline(box, nullptr, debug) && !blob->UniquelyVertical()) {
+      blob->ClearNeighbours();
+      small_it.add_to_end(it.extract());
+    }
+  }
+}
+
+#ifndef GRAPHICS_DISABLED
+
+// Create a window and display the projection in it.
+void TextlineProjection::DisplayProjection() const {
+  int width = pixGetWidth(pix_);
+  int height = pixGetHeight(pix_);
+  Pix* pixc = pixCreate(width, height, 32);
+  int src_wpl = pixGetWpl(pix_);
+  int col_wpl = pixGetWpl(pixc);
+  uint32_t* src_data = pixGetData(pix_);
+  uint32_t* col_data = pixGetData(pixc);
+  for (int y = 0; y < height; ++y, src_data += src_wpl, col_data += col_wpl) {
+    for (int x = 0; x < width; ++x) {
+      int pixel = GET_DATA_BYTE(src_data, x);
+      l_uint32 result;
+      if (pixel <= 17)
+        composeRGBPixel(0, 0, pixel * 15, &result);
+      else if (pixel <= 145)
+        composeRGBPixel(0, (pixel - 17) * 2, 255, &result);
+      else
+        composeRGBPixel((pixel - 145) * 2, 255, 255, &result);
+      col_data[x] = result;
+    }
+  }
+  auto* win = new ScrollView("Projection", 0, 0,
+                                   width, height, width, height);
+  win->Image(pixc, 0, 0);
+  win->Update();
+  pixDestroy(&pixc);
+}
+
+#endif // !GRAPHICS_DISABLED
+
+// Compute the distance of the box from the partition using curved projection
+// space. As DistanceOfBoxFromBox, except that the direction is taken from
+// the ColPartition and the median bounds of the ColPartition are used as
+// the to_box.
+int TextlineProjection::DistanceOfBoxFromPartition(const TBOX& box,
+                                                   const ColPartition& part,
+                                                   const DENORM* denorm,
+                                                   bool debug) const {
+  // Compute a partition box that uses the median top/bottom of the blobs
+  // within and median left/right for vertical.
+  TBOX part_box = part.bounding_box();
+  if (part.IsHorizontalType()) {
+    part_box.set_top(part.median_top());
+    part_box.set_bottom(part.median_bottom());
+  } else {
+    part_box.set_left(part.median_left());
+    part_box.set_right(part.median_right());
+  }
+  // Now use DistanceOfBoxFromBox to make the actual calculation.
+  return DistanceOfBoxFromBox(box, part_box, part.IsHorizontalType(),
+                              denorm, debug);
+}
+
+// Compute the distance from the from_box to the to_box using curved
+// projection space. Separation that involves a decrease in projection
+// density (moving from the from_box to the to_box) is weighted more heavily
+// than constant density, and an increase is weighted less.
+// If horizontal_textline is true, then curved space is used vertically,
+// as for a diacritic on the edge of a textline.
+// The projection uses original image coords, so denorm is used to get
+// back to the image coords from box/part space.
+// How the calculation works: Think of a diacritic near a textline.
+// Distance is measured from the far side of the from_box to the near side of
+// the to_box. Shown is the horizontal textline case.
+//          |------^-----|
+//          | from | box |
+//          |------|-----|
+//   perpendicular |
+//          <------v-------->|--------------------|
+//                  parallel |     to box         |
+//                           |--------------------|
+// Perpendicular distance uses "curved space" See VerticalDistance below.
+// Parallel distance is linear.
+// Result is perpendicular_gap + parallel_gap / kParaPerpDistRatio.
+int TextlineProjection::DistanceOfBoxFromBox(const TBOX& from_box,
+                                             const TBOX& to_box,
+                                             bool horizontal_textline,
+                                             const DENORM* denorm,
+                                             bool debug) const {
+  // The parallel_gap is the horizontal gap between a horizontal textline and
+  // the box. Analogous for vertical.
+  int parallel_gap = 0;
+  // start_pt is the box end of the line to be modified for curved space.
+  TPOINT start_pt;
+  // end_pt is the partition end of the line to be modified for curved space.
+  TPOINT end_pt;
+  if (horizontal_textline) {
+    parallel_gap = from_box.x_gap(to_box) + from_box.width();
+    start_pt.x = (from_box.left() + from_box.right()) / 2;
+    end_pt.x = start_pt.x;
+    if (from_box.top() - to_box.top() >= to_box.bottom() - from_box.bottom()) {
+      start_pt.y = from_box.top();
+      end_pt.y = std::min(to_box.top(), start_pt.y);
+    } else {
+      start_pt.y = from_box.bottom();
+      end_pt.y = std::max(to_box.bottom(), start_pt.y);
+    }
+  } else {
+    parallel_gap = from_box.y_gap(to_box) + from_box.height();
+    if (from_box.right() - to_box.right() >= to_box.left() - from_box.left()) {
+      start_pt.x = from_box.right();
+      end_pt.x = std::min(to_box.right(), start_pt.x);
+    } else {
+      start_pt.x = from_box.left();
+      end_pt.x = std::max(to_box.left(), start_pt.x);
+    }
+    start_pt.y = (from_box.bottom() + from_box.top()) / 2;
+    end_pt.y = start_pt.y;
+  }
+  // The perpendicular gap is the max vertical distance gap out of:
+  // top of from_box to to_box top and bottom of from_box to to_box bottom.
+  // This value is then modified for curved projection space.
+  // Analogous for vertical.
+  int perpendicular_gap = 0;
+  // If start_pt == end_pt, then the from_box lies entirely within the to_box
+  // (in the perpendicular direction), so we don't need to calculate the
+  // perpendicular_gap.
+  if (start_pt.x != end_pt.x || start_pt.y != end_pt.y) {
+    if (denorm != nullptr) {
+      // Denormalize the start and end.
+      denorm->DenormTransform(nullptr, start_pt, &start_pt);
+      denorm->DenormTransform(nullptr, end_pt, &end_pt);
+    }
+    if (abs(start_pt.y - end_pt.y) >= abs(start_pt.x - end_pt.x)) {
+      perpendicular_gap = VerticalDistance(debug, start_pt.x, start_pt.y,
+                                           end_pt.y);
+    } else {
+      perpendicular_gap = HorizontalDistance(debug, start_pt.x, end_pt.x,
+                                             start_pt.y);
+    }
+  }
+  // The parallel_gap weighs less than the perpendicular_gap.
+  return perpendicular_gap + parallel_gap / kParaPerpDistRatio;
+}
+
+// Compute the distance between (x, y1) and (x, y2) using the rule that
+// a decrease in textline density is weighted more heavily than an increase.
+// The coordinates are in source image space, ie processed by any denorm
+// already, but not yet scaled by scale_factor_.
+// Going from the outside of a textline to the inside should measure much
+// less distance than going from the inside of a textline to the outside.
+// How it works:
+// An increase is cheap (getting closer to a textline).
+// Constant costs unity.
+// A decrease is expensive (getting further from a textline).
+// Pixels in projection map Counted distance
+//              2
+//              3              1/x
+//              3               1
+//              2               x
+//              5              1/x
+//              7              1/x
+// Total: 1 + x + 3/x where x = kWrongWayPenalty.
+int TextlineProjection::VerticalDistance(bool debug, int x,
+                                         int y1, int y2) const {
+  x = ImageXToProjectionX(x);
+  y1 = ImageYToProjectionY(y1);
+  y2 = ImageYToProjectionY(y2);
+  if (y1 == y2) return 0;
+  int wpl = pixGetWpl(pix_);
+  int step = y1 < y2 ? 1 : -1;
+  uint32_t* data = pixGetData(pix_) + y1 * wpl;
+  wpl *= step;
+  int prev_pixel = GET_DATA_BYTE(data, x);
+  int distance = 0;
+  int right_way_steps = 0;
+  for (int y = y1; y != y2; y += step) {
+    data += wpl;
+    int pixel = GET_DATA_BYTE(data, x);
+    if (debug)
+      tprintf("At (%d,%d), pix = %d, prev=%d\n",
+              x, y + step, pixel, prev_pixel);
+    if (pixel < prev_pixel)
+      distance += kWrongWayPenalty;
+    else if (pixel > prev_pixel)
+      ++right_way_steps;
+    else
+      ++distance;
+    prev_pixel = pixel;
+  }
+  return distance * scale_factor_ +
+      right_way_steps * scale_factor_ / kWrongWayPenalty;
+}
+
+// Compute the distance between (x1, y) and (x2, y) using the rule that
+// a decrease in textline density is weighted more heavily than an increase.
+int TextlineProjection::HorizontalDistance(bool debug, int x1, int x2,
+                                           int y) const {
+  x1 = ImageXToProjectionX(x1);
+  x2 = ImageXToProjectionX(x2);
+  y = ImageYToProjectionY(y);
+  if (x1 == x2) return 0;
+  int wpl = pixGetWpl(pix_);
+  int step = x1 < x2 ? 1 : -1;
+  uint32_t* data = pixGetData(pix_) + y * wpl;
+  int prev_pixel = GET_DATA_BYTE(data, x1);
+  int distance = 0;
+  int right_way_steps = 0;
+  for (int x = x1; x != x2; x += step) {
+    int pixel = GET_DATA_BYTE(data, x + step);
+    if (debug)
+      tprintf("At (%d,%d), pix = %d, prev=%d\n",
+              x + step, y, pixel, prev_pixel);
+    if (pixel < prev_pixel)
+      distance += kWrongWayPenalty;
+    else if (pixel > prev_pixel)
+      ++right_way_steps;
+    else
+      ++distance;
+    prev_pixel = pixel;
+  }
+  return distance * scale_factor_ +
+      right_way_steps * scale_factor_ / kWrongWayPenalty;
+}
+
+// Returns true if the blob appears to be outside of a textline.
+// Such blobs are potentially diacritics (even if large in Thai) and should
+// be kept away from initial textline finding.
+bool TextlineProjection::BoxOutOfHTextline(const TBOX& box,
+                                          const DENORM* denorm,
+                                          bool debug) const {
+  int grad1 = 0;
+  int grad2 = 0;
+  EvaluateBoxInternal(box, denorm, debug, &grad1, &grad2, nullptr, nullptr);
+  int worst_result = std::min(grad1, grad2);
+  int total_result = grad1 + grad2;
+  if (total_result >= 6) return false;  // Strongly in textline.
+  // Medium strength: if either gradient is negative, it is likely outside
+  // the body of the textline.
+  if (worst_result < 0)
+    return true;
+  return false;
+}
+
+// Evaluates the textlineiness of a ColPartition. Uses EvaluateBox below,
+// but uses the median top/bottom for horizontal and median left/right for
+// vertical instead of the bounding box edges.
+// Evaluates for both horizontal and vertical and returns the best result,
+// with a positive value for horizontal and a negative value for vertical.
+int TextlineProjection::EvaluateColPartition(const ColPartition& part,
+                                             const DENORM* denorm,
+                                             bool debug) const {
+  if (part.IsSingleton())
+    return EvaluateBox(part.bounding_box(), denorm, debug);
+  // Test vertical orientation.
+  TBOX box = part.bounding_box();
+  // Use the partition median for left/right.
+  box.set_left(part.median_left());
+  box.set_right(part.median_right());
+  int vresult = EvaluateBox(box, denorm, debug);
+
+  // Test horizontal orientation.
+  box = part.bounding_box();
+  // Use the partition median for top/bottom.
+  box.set_top(part.median_top());
+  box.set_bottom(part.median_bottom());
+  int hresult = EvaluateBox(box, denorm, debug);
+  if (debug) {
+    tprintf("Partition hresult=%d, vresult=%d from:", hresult, vresult);
+    part.bounding_box().print();
+    part.Print();
+  }
+  return hresult >= -vresult ? hresult : vresult;
+}
+
+// Computes the mean projection gradients over the horizontal and vertical
+// edges of the box:
+//   -h-h-h-h-h-h
+//  |------------| mean=htop   -v|+v--------+v|-v
+//  |+h+h+h+h+h+h|             -v|+v        +v|-v
+//  |            |             -v|+v        +v|-v
+//  |    box     |             -v|+v  box   +v|-v
+//  |            |             -v|+v        +v|-v
+//  |+h+h+h+h+h+h|             -v|+v        +v|-v
+//  |------------| mean=hbot   -v|+v--------+v|-v
+//   -h-h-h-h-h-h
+//                           mean=vleft  mean=vright
+//
+// Returns MAX(htop,hbot) - MAX(vleft,vright), which is a positive number
+// for a horizontal textline, a negative number for a vertical textline,
+// and near zero for undecided. Undecided is most likely non-text.
+// All the gradients are truncated to remain non-negative, since negative
+// horizontal gradients don't give any indication of being vertical and
+// vice versa.
+// Additional complexity: The coordinates have to be transformed to original
+// image coordinates with denorm (if not null), scaled to match the projection
+// pix, and THEN step out 2 pixels each way from the edge to compute the
+// gradient, and tries 3 positions, each measuring the gradient over a
+// 4-pixel spread: (+3/-1), (+2/-2), (+1/-3).  This complexity is handled by
+// several layers of helpers below.
+int TextlineProjection::EvaluateBox(const TBOX& box, const DENORM* denorm,
+                                    bool debug) const {
+  return EvaluateBoxInternal(box, denorm, debug, nullptr, nullptr, nullptr, nullptr);
+}
+
+// Internal version of EvaluateBox returns the unclipped gradients as well
+// as the result of EvaluateBox.
+// hgrad1 and hgrad2 are the gradients for the horizontal textline.
+int TextlineProjection::EvaluateBoxInternal(const TBOX& box,
+                                            const DENORM* denorm, bool debug,
+                                            int* hgrad1, int* hgrad2,
+                                            int* vgrad1, int* vgrad2) const {
+  int top_gradient = BestMeanGradientInRow(denorm, box.left(), box.right(),
+                                           box.top(), true);
+  int bottom_gradient = -BestMeanGradientInRow(denorm, box.left(), box.right(),
+                                               box.bottom(), false);
+  int left_gradient = BestMeanGradientInColumn(denorm, box.left(), box.bottom(),
+                                               box.top(), true);
+  int right_gradient = -BestMeanGradientInColumn(denorm, box.right(),
+                                                 box.bottom(), box.top(),
+                                                 false);
+  int top_clipped = std::max(top_gradient, 0);
+  int bottom_clipped = std::max(bottom_gradient, 0);
+  int left_clipped = std::max(left_gradient, 0);
+  int right_clipped = std::max(right_gradient, 0);
+  if (debug) {
+    tprintf("Gradients: top = %d, bottom = %d, left= %d, right= %d for box:",
+            top_gradient, bottom_gradient, left_gradient, right_gradient);
+    box.print();
+  }
+  int result = std::max(top_clipped, bottom_clipped) -
+          std::max(left_clipped, right_clipped);
+  if (hgrad1 != nullptr && hgrad2 != nullptr) {
+    *hgrad1 = top_gradient;
+    *hgrad2 = bottom_gradient;
+  }
+  if (vgrad1 != nullptr && vgrad2 != nullptr) {
+    *vgrad1 = left_gradient;
+    *vgrad2 = right_gradient;
+  }
+  return result;
+}
+
+// Helper returns the mean gradient value for the horizontal row at the given
+// y, (in the external coordinates) by subtracting the mean of the transformed
+// row 2 pixels above from the mean of the transformed row 2 pixels below.
+// This gives a positive value for a good top edge and negative for bottom.
+// Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
+int TextlineProjection::BestMeanGradientInRow(const DENORM* denorm,
+                                              int16_t min_x, int16_t max_x, int16_t y,
+                                              bool best_is_max) const {
+  TPOINT start_pt(min_x, y);
+  TPOINT end_pt(max_x, y);
+  int upper = MeanPixelsInLineSegment(denorm, -2, start_pt, end_pt);
+  int lower = MeanPixelsInLineSegment(denorm, 2, start_pt, end_pt);
+  int best_gradient = lower - upper;
+  upper = MeanPixelsInLineSegment(denorm, -1, start_pt, end_pt);
+  lower = MeanPixelsInLineSegment(denorm, 3, start_pt, end_pt);
+  int gradient = lower - upper;
+  if ((gradient > best_gradient) == best_is_max)
+    best_gradient = gradient;
+  upper = MeanPixelsInLineSegment(denorm, -3, start_pt, end_pt);
+  lower = MeanPixelsInLineSegment(denorm, 1, start_pt, end_pt);
+  gradient = lower - upper;
+  if ((gradient > best_gradient) == best_is_max)
+    best_gradient = gradient;
+  return best_gradient;
+}
+
+// Helper returns the mean gradient value for the vertical column at the
+// given x, (in the external coordinates) by subtracting the mean of the
+// transformed column 2 pixels left from the mean of the transformed column
+// 2 pixels to the right.
+// This gives a positive value for a good left edge and negative for right.
+// Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
+int TextlineProjection::BestMeanGradientInColumn(const DENORM* denorm, int16_t x,
+                                                 int16_t min_y, int16_t max_y,
+                                                 bool best_is_max) const {
+  TPOINT start_pt(x, min_y);
+  TPOINT end_pt(x, max_y);
+  int left = MeanPixelsInLineSegment(denorm, -2, start_pt, end_pt);
+  int right = MeanPixelsInLineSegment(denorm, 2, start_pt, end_pt);
+  int best_gradient = right - left;
+  left = MeanPixelsInLineSegment(denorm, -1, start_pt, end_pt);
+  right = MeanPixelsInLineSegment(denorm, 3, start_pt, end_pt);
+  int gradient = right - left;
+  if ((gradient > best_gradient) == best_is_max)
+    best_gradient = gradient;
+  left = MeanPixelsInLineSegment(denorm, -3, start_pt, end_pt);
+  right = MeanPixelsInLineSegment(denorm, 1, start_pt, end_pt);
+  gradient = right - left;
+  if ((gradient > best_gradient) == best_is_max)
+    best_gradient = gradient;
+  return best_gradient;
+}
+
+// Helper returns the mean pixel value over the line between the start_pt and
+// end_pt (inclusive), but shifted perpendicular to the line in the projection
+// image by offset pixels. For simplicity, it is assumed that the vector is
+// either nearly horizontal or nearly vertical. It works on skewed textlines!
+// The end points are in external coordinates, and will be denormalized with
+// the denorm if not nullptr before further conversion to pix coordinates.
+// After all the conversions, the offset is added to the direction
+// perpendicular to the line direction. The offset is thus in projection image
+// coordinates, which allows the caller to get a guaranteed displacement
+// between pixels used to calculate gradients.
+int TextlineProjection::MeanPixelsInLineSegment(const DENORM* denorm,
+                                                int offset,
+                                                TPOINT start_pt,
+                                                TPOINT end_pt) const {
+  TransformToPixCoords(denorm, &start_pt);
+  TransformToPixCoords(denorm, &end_pt);
+  TruncateToImageBounds(&start_pt);
+  TruncateToImageBounds(&end_pt);
+  int wpl = pixGetWpl(pix_);
+  uint32_t* data = pixGetData(pix_);
+  int total = 0;
+  int count = 0;
+  int x_delta = end_pt.x - start_pt.x;
+  int y_delta = end_pt.y - start_pt.y;
+  if (abs(x_delta) >= abs(y_delta)) {
+    if (x_delta == 0)
+      return 0;
+    // Horizontal line. Add the offset vertically.
+    int x_step = x_delta > 0 ? 1 : -1;
+    // Correct offset for rotation, keeping it anti-clockwise of the delta.
+    offset *= x_step;
+    start_pt.y += offset;
+    end_pt.y += offset;
+    TruncateToImageBounds(&start_pt);
+    TruncateToImageBounds(&end_pt);
+    x_delta = end_pt.x - start_pt.x;
+    y_delta = end_pt.y - start_pt.y;
+    count = x_delta * x_step + 1;
+    for (int x = start_pt.x; x != end_pt.x; x += x_step) {
+      int y = start_pt.y + DivRounded(y_delta * (x - start_pt.x), x_delta);
+      total += GET_DATA_BYTE(data + wpl * y, x);
+    }
+  } else {
+    // Vertical line. Add the offset horizontally.
+    int y_step = y_delta > 0 ? 1 : -1;
+    // Correct offset for rotation, keeping it anti-clockwise of the delta.
+    // Pix holds the image with y=0 at the top, so the offset is negated.
+    offset *= -y_step;
+    start_pt.x += offset;
+    end_pt.x += offset;
+    TruncateToImageBounds(&start_pt);
+    TruncateToImageBounds(&end_pt);
+    x_delta = end_pt.x - start_pt.x;
+    y_delta = end_pt.y - start_pt.y;
+    count = y_delta * y_step + 1;
+    for (int y = start_pt.y; y != end_pt.y; y += y_step) {
+      int x = start_pt.x + DivRounded(x_delta * (y - start_pt.y), y_delta);
+      total += GET_DATA_BYTE(data + wpl * y, x);
+    }
+  }
+  return DivRounded(total, count);
+}
+
+// Given an input pix, and a box, the sides of the box are shrunk inwards until
+// they bound any black pixels found within the original box.
+// The function converts between tesseract coords and the pix coords assuming
+// that this pix is full resolution equal in size to the original image.
+// Returns an empty box if there are no black pixels in the source box.
+static TBOX BoundsWithinBox(Pix* pix, const TBOX& box) {
+  int im_height = pixGetHeight(pix);
+  Box* input_box = boxCreate(box.left(), im_height - box.top(),
+                             box.width(), box.height());
+  Box* output_box = nullptr;
+  pixClipBoxToForeground(pix, input_box, nullptr, &output_box);
+  TBOX result_box;
+  if (output_box != nullptr) {
+    l_int32 x, y, width, height;
+    boxGetGeometry(output_box, &x, &y, &width, &height);
+    result_box.set_left(x);
+    result_box.set_right(x + width);
+    result_box.set_top(im_height - y);
+    result_box.set_bottom(result_box.top() - height);
+    boxDestroy(&output_box);
+  }
+  boxDestroy(&input_box);
+  return result_box;
+}
+
+// Splits the given box in half at x_middle or y_middle according to split_on_x
+// and checks for nontext_map pixels in each half. Reduces the bbox so that it
+// still includes the middle point, but does not touch any fg pixels in
+// nontext_map. An empty box may be returned if there is no such box.
+static void TruncateBoxToMissNonText(int x_middle, int y_middle,
+                                     bool split_on_x, Pix* nontext_map,
+                                     TBOX* bbox) {
+  TBOX box1(*bbox);
+  TBOX box2(*bbox);
+  TBOX im_box;
+  if (split_on_x) {
+    box1.set_right(x_middle);
+    im_box = BoundsWithinBox(nontext_map, box1);
+    if (!im_box.null_box()) box1.set_left(im_box.right());
+    box2.set_left(x_middle);
+    im_box = BoundsWithinBox(nontext_map, box2);
+    if (!im_box.null_box()) box2.set_right(im_box.left());
+  } else {
+    box1.set_bottom(y_middle);
+    im_box = BoundsWithinBox(nontext_map, box1);
+    if (!im_box.null_box()) box1.set_top(im_box.bottom());
+    box2.set_top(y_middle);
+    im_box = BoundsWithinBox(nontext_map, box2);
+    if (!im_box.null_box()) box2.set_bottom(im_box.top());
+  }
+  box1 += box2;
+  *bbox = box1;
+}
+
+
+// Helper function to add 1 to a rectangle in source image coords to the
+// internal projection pix_.
+void TextlineProjection::IncrementRectangle8Bit(const TBOX& box) {
+  int scaled_left = ImageXToProjectionX(box.left());
+  int scaled_top = ImageYToProjectionY(box.top());
+  int scaled_right = ImageXToProjectionX(box.right());
+  int scaled_bottom = ImageYToProjectionY(box.bottom());
+  int wpl = pixGetWpl(pix_);
+  uint32_t* data = pixGetData(pix_) + scaled_top * wpl;
+  for (int y = scaled_top; y <= scaled_bottom; ++y) {
+    for (int x = scaled_left; x <= scaled_right; ++x) {
+      int pixel = GET_DATA_BYTE(data, x);
+      if (pixel < 255)
+        SET_DATA_BYTE(data, x, pixel + 1);
+    }
+    data += wpl;
+  }
+}
+
+// Inserts a list of blobs into the projection.
+// Rotation is a multiple of 90 degrees to get from blob coords to
+// nontext_map coords, nontext_map_box is the bounds of the nontext_map.
+// Blobs are spread horizontally or vertically according to their internal
+// flags, but the spreading is truncated by set pixels in the nontext_map
+// and also by the horizontal rule line limits on the blobs.
+void TextlineProjection::ProjectBlobs(BLOBNBOX_LIST* blobs,
+                                      const FCOORD& rotation,
+                                      const TBOX& nontext_map_box,
+                                      Pix* nontext_map) {
+  BLOBNBOX_IT blob_it(blobs);
+  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+    BLOBNBOX* blob = blob_it.data();
+    TBOX bbox = blob->bounding_box();
+    ICOORD middle((bbox.left() + bbox.right()) / 2,
+                  (bbox.bottom() + bbox.top()) / 2);
+    bool spreading_horizontally = PadBlobBox(blob, &bbox);
+    // Rotate to match the nontext_map.
+    bbox.rotate(rotation);
+    middle.rotate(rotation);
+    if (rotation.x() == 0.0f)
+      spreading_horizontally = !spreading_horizontally;
+    // Clip to the image before applying the increments.
+    bbox &= nontext_map_box;  // This is in-place box intersection.
+    // Check for image pixels before spreading.
+    TruncateBoxToMissNonText(middle.x(), middle.y(), spreading_horizontally,
+                             nontext_map, &bbox);
+    if (bbox.area() > 0) {
+      IncrementRectangle8Bit(bbox);
+    }
+  }
+}
+
+// Pads the bounding box of the given blob according to whether it is on
+// a horizontal or vertical text line, taking into account tab-stops near
+// the blob. Returns true if padding was in the horizontal direction.
+bool TextlineProjection::PadBlobBox(BLOBNBOX* blob, TBOX* bbox) {
+  // Determine which direction to spread.
+  // If text is well spaced out, it can be useful to pad perpendicular to
+  // the textline direction, so as to ensure diacritics get absorbed
+  // correctly, but if the text is tightly spaced, this will destroy the
+  // blank space between textlines in the projection map, and that would
+  // be very bad.
+  int pad_limit = scale_factor_ * kMinLineSpacingFactor;
+  int xpad = 0;
+  int ypad = 0;
+  bool padding_horizontally = false;
+  if (blob->UniquelyHorizontal()) {
+    xpad = bbox->height() * kOrientedPadFactor;
+    padding_horizontally = true;
+    // If the text appears to be very well spaced, pad the other direction by a
+    // single pixel in the projection profile space to help join diacritics to
+    // the textline.
+    if ((blob->neighbour(BND_ABOVE) == nullptr ||
+        bbox->y_gap(blob->neighbour(BND_ABOVE)->bounding_box()) > pad_limit) &&
+        (blob->neighbour(BND_BELOW) == nullptr ||
+        bbox->y_gap(blob->neighbour(BND_BELOW)->bounding_box()) > pad_limit)) {
+      ypad = scale_factor_;
+    }
+  } else if (blob->UniquelyVertical()) {
+    ypad = bbox->width() * kOrientedPadFactor;
+    if ((blob->neighbour(BND_LEFT) == nullptr ||
+        bbox->x_gap(blob->neighbour(BND_LEFT)->bounding_box()) > pad_limit) &&
+        (blob->neighbour(BND_RIGHT) == nullptr ||
+        bbox->x_gap(blob->neighbour(BND_RIGHT)->bounding_box()) > pad_limit)) {
+      xpad = scale_factor_;
+    }
+  } else {
+    if ((blob->neighbour(BND_ABOVE) != nullptr &&
+         blob->neighbour(BND_ABOVE)->neighbour(BND_BELOW) == blob) ||
+        (blob->neighbour(BND_BELOW) != nullptr &&
+            blob->neighbour(BND_BELOW)->neighbour(BND_ABOVE) == blob)) {
+      ypad = bbox->width() * kDefaultPadFactor;
+    }
+    if ((blob->neighbour(BND_RIGHT) != nullptr &&
+         blob->neighbour(BND_RIGHT)->neighbour(BND_LEFT) == blob) ||
+        (blob->neighbour(BND_LEFT) != nullptr &&
+            blob->neighbour(BND_LEFT)->neighbour(BND_RIGHT) == blob)) {
+      xpad = bbox->height() * kDefaultPadFactor;
+      padding_horizontally = true;
+    }
+  }
+  bbox->pad(xpad, ypad);
+  pad_limit = scale_factor_ * kMaxTabStopOverrun;
+  // Now shrink horizontally to avoid stepping more than pad_limit over a
+  // tab-stop.
+  if (bbox->left() < blob->left_rule() - pad_limit) {
+    bbox->set_left(blob->left_rule() - pad_limit);
+  }
+  if (bbox->right() > blob->right_rule() + pad_limit) {
+    bbox->set_right(blob->right_rule() + pad_limit);
+  }
+  return padding_horizontally;
+}
+
+// Helper denormalizes the TPOINT with the denorm if not nullptr, then
+// converts to pix_ coordinates.
+void TextlineProjection::TransformToPixCoords(const DENORM* denorm,
+                                              TPOINT* pt) const {
+  if (denorm != nullptr) {
+    // Denormalize the point.
+    denorm->DenormTransform(nullptr, *pt, pt);
+  }
+  pt->x = ImageXToProjectionX(pt->x);
+  pt->y = ImageYToProjectionY(pt->y);
+}
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#pragma optimize("g", off)
+#endif  // _MSC_VER
+// Helper truncates the TPOINT to be within the pix_.
+void TextlineProjection::TruncateToImageBounds(TPOINT* pt) const {
+  pt->x = ClipToRange<int>(pt->x, 0, pixGetWidth(pix_) - 1);
+  pt->y = ClipToRange<int>(pt->y, 0, pixGetHeight(pix_) - 1);
+}
+#if defined(_MSC_VER) && !defined(__clang__)
+#pragma optimize("", on)
+#endif  // _MSC_VER
+
+// Transform tesseract image coordinates to coordinates used in the projection.
+int TextlineProjection::ImageXToProjectionX(int x) const {
+  x = ClipToRange((x - x_origin_) / scale_factor_, 0, pixGetWidth(pix_) - 1);
+  return x;
+}
+int TextlineProjection::ImageYToProjectionY(int y) const {
+  y = ClipToRange((y_origin_ - y) / scale_factor_, 0, pixGetHeight(pix_) - 1);
+  return y;
+}
+
+}  // namespace tesseract.
diff --git a/tesseract/src/textord/textlineprojection.h b/tesseract/src/textord/textlineprojection.h
new file mode 100644
index 00000000..c35ae350
--- /dev/null
+++ b/tesseract/src/textord/textlineprojection.h
@@ -0,0 +1,206 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+// Author: rays@google.com (Ray Smith)
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TESSERACT_TEXTORD_TEXTLINEPROJECTION_H_
+#define TESSERACT_TEXTORD_TEXTLINEPROJECTION_H_
+
+#include "blobgrid.h"      // For BlobGrid
+
+struct Pix;
+
+namespace tesseract {
+
+class DENORM;
+struct TPOINT;
+class ColPartition;
+
+// Simple class to encapsulate the computation of an image representing
+// local textline density, and function(s) to make use of it.
+// The underlying principle is that if you smear connected components
+// horizontally (vertically for components on a vertically written textline)
+// and count the number of smeared components in an image, then the resulting
+// image shows the density of the textlines at each image position.
+class TESS_API TextlineProjection {
+ public:
+  // The down-scaling factor is computed to obtain a projection resolution
+  // of about 100 dpi, whatever the input.
+  explicit TextlineProjection(int resolution);
+  ~TextlineProjection();
+
+  // Build the projection profile given the input_block containing lists of
+  // blobs, a rotation to convert to image coords,
+  // and a full-resolution nontext_map, marking out areas to avoid.
+  // During construction, we have the following assumptions:
+  // The rotation is a multiple of 90 degrees, ie no deskew yet.
+  // The blobs have had their left and right rules set to also limit
+  // the range of projection.
+  void ConstructProjection(TO_BLOCK* input_block,
+                           const FCOORD& rotation, Pix* nontext_map);
+
+  // Display the blobs in the window colored according to textline quality.
+  void PlotGradedBlobs(BLOBNBOX_LIST* blobs, ScrollView* win);
+
+  // Moves blobs that look like they don't sit well on a textline from the
+  // input blobs list to the output small_blobs list.
+  // This gets them away from initial textline finding to stop diacritics
+  // from forming incorrect textlines. (Introduced mainly to fix Thai.)
+  void MoveNonTextlineBlobs(BLOBNBOX_LIST* blobs,
+                            BLOBNBOX_LIST* small_blobs) const;
+
+  // Create a window and display the projection in it.
+  void DisplayProjection() const;
+
+  // Compute the distance of the box from the partition using curved projection
+  // space. As DistanceOfBoxFromBox, except that the direction is taken from
+  // the ColPartition and the median bounds of the ColPartition are used as
+  // the to_box.
+  int DistanceOfBoxFromPartition(const TBOX& box, const ColPartition& part,
+                                 const DENORM* denorm, bool debug) const;
+
+  // Compute the distance from the from_box to the to_box using curved
+  // projection space. Separation that involves a decrease in projection
+  // density (moving from the from_box to the to_box) is weighted more heavily
+  // than constant density, and an increase is weighted less.
+  // If horizontal_textline is true, then curved space is used vertically,
+  // as for a diacritic on the edge of a textline.
+  // The projection uses original image coords, so denorm is used to get
+  // back to the image coords from box/part space.
+  int DistanceOfBoxFromBox(const TBOX& from_box, const TBOX& to_box,
+                           bool horizontal_textline,
+                           const DENORM* denorm, bool debug) const;
+
+  // Compute the distance between (x, y1) and (x, y2) using the rule that
+  // a decrease in textline density is weighted more heavily than an increase.
+  // The coordinates are in source image space, ie processed by any denorm
+  // already, but not yet scaled by scale_factor_.
+  // Going from the outside of a textline to the inside should measure much
+  // less distance than going from the inside of a textline to the outside.
+  int VerticalDistance(bool debug, int x, int y1, int y2) const;
+
+  // Compute the distance between (x1, y) and (x2, y) using the rule that
+  // a decrease in textline density is weighted more heavily than an increase.
+  int HorizontalDistance(bool debug, int x1, int x2, int y) const;
+
+  // Returns true if the blob appears to be outside of a horizontal textline.
+  // Such blobs are potentially diacritics (even if large in Thai) and should
+  // be kept away from initial textline finding.
+  bool BoxOutOfHTextline(const TBOX& box, const DENORM* denorm,
+                        bool debug) const;
+
+  // Evaluates the textlineiness of a ColPartition. Uses EvaluateBox below,
+  // but uses the median top/bottom for horizontal and median left/right for
+  // vertical instead of the bounding box edges.
+  // Evaluates for both horizontal and vertical and returns the best result,
+  // with a positive value for horizontal and a negative value for vertical.
+  int EvaluateColPartition(const ColPartition& part, const DENORM* denorm,
+                           bool debug) const;
+
+  // Computes the mean projection gradients over the horizontal and vertical
+  // edges of the box:
+  //   -h-h-h-h-h-h
+  //  |------------| mean=htop   -v|+v--------+v|-v
+  //  |+h+h+h+h+h+h|             -v|+v        +v|-v
+  //  |            |             -v|+v        +v|-v
+  //  |    box     |             -v|+v  box   +v|-v
+  //  |            |             -v|+v        +v|-v
+  //  |+h+h+h+h+h+h|             -v|+v        +v|-v
+  //  |------------| mean=hbot   -v|+v--------+v|-v
+  //   -h-h-h-h-h-h
+  //                           mean=vleft  mean=vright
+  //
+  // Returns MAX(htop,hbot) - MAX(vleft,vright), which is a positive number
+  // for a horizontal textline, a negative number for a vertical textline,
+  // and near zero for undecided. Undecided is most likely non-text.
+  int EvaluateBox(const TBOX& box, const DENORM* denorm, bool debug) const;
+
+ private:
+  // Internal version of EvaluateBox returns the unclipped gradients as well
+  // as the result of EvaluateBox.
+  // hgrad1 and hgrad2 are the gradients for the horizontal textline.
+  int EvaluateBoxInternal(const TBOX& box, const DENORM* denorm, bool debug,
+                          int* hgrad1, int* hgrad2,
+                          int* vgrad1, int* vgrad2) const;
+
+  // Helper returns the mean gradient value for the horizontal row at the given
+  // y, (in the external coordinates) by subtracting the mean of the transformed
+  // row 2 pixels above from the mean of the transformed row 2 pixels below.
+  // This gives a positive value for a good top edge and negative for bottom.
+  // Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
+  int BestMeanGradientInRow(const DENORM* denorm, int16_t min_x, int16_t max_x,
+                            int16_t y, bool best_is_max) const;
+
+  // Helper returns the mean gradient value for the vertical column at the
+  // given x, (in the external coordinates) by subtracting the mean of the
+  // transformed column 2 pixels left from the mean of the transformed column
+  // 2 pixels to the right.
+  // This gives a positive value for a good left edge and negative for right.
+  // Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
+  int BestMeanGradientInColumn(const DENORM* denorm, int16_t x, int16_t min_y,
+                               int16_t max_y, bool best_is_max) const;
+
+  // Helper returns the mean pixel value over the line between the start_pt and
+  // end_pt (inclusive), but shifted perpendicular to the line in the projection
+  // image by offset pixels. For simplicity, it is assumed that the vector is
+  // either nearly horizontal or nearly vertical. It works on skewed textlines!
+  // The end points are in external coordinates, and will be denormalized with
+  // the denorm if not nullptr before further conversion to pix coordinates.
+  // After all the conversions, the offset is added to the direction
+  // perpendicular to the line direction. The offset is thus in projection image
+  // coordinates, which allows the caller to get a guaranteed displacement
+  // between pixels used to calculate gradients.
+  int MeanPixelsInLineSegment(const DENORM* denorm, int offset,
+                              TPOINT start_pt, TPOINT end_pt) const;
+
+  // Helper function to add 1 to a rectangle in source image coords to the
+  // internal projection pix_.
+  void IncrementRectangle8Bit(const TBOX& box);
+  // Inserts a list of blobs into the projection.
+  // Rotation is a multiple of 90 degrees to get from blob coords to
+  // nontext_map coords, image_box is the bounds of the nontext_map.
+  // Blobs are spread horizontally or vertically according to their internal
+  // flags, but the spreading is truncated by set pixels in the nontext_map
+  // and also by the horizontal rule line limits on the blobs.
+  void ProjectBlobs(BLOBNBOX_LIST* blobs, const FCOORD& rotation,
+                    const TBOX& image_box, Pix* nontext_map);
+  // Pads the bounding box of the given blob according to whether it is on
+  // a horizontal or vertical text line, taking into account tab-stops near
+  // the blob. Returns true if padding was in the horizontal direction.
+  bool PadBlobBox(BLOBNBOX* blob, TBOX* bbox);
+
+  // Helper denormalizes the TPOINT with the denorm if not nullptr, then
+  // converts to pix_ coordinates.
+  void TransformToPixCoords(const DENORM* denorm, TPOINT* pt) const;
+
+  // Helper truncates the TPOINT to be within the pix_.
+  void TruncateToImageBounds(TPOINT* pt) const;
+
+  // Transform tesseract coordinates to coordinates used in the pix.
+  int ImageXToProjectionX(int x) const;
+  int ImageYToProjectionY(int y) const;
+
+  // The down-sampling scale factor used in building the image.
+  int scale_factor_;
+  // The blob coordinates of the top-left (origin of the pix_) in tesseract
+  // coordinates. Used to transform the bottom-up tesseract coordinates to
+  // the top-down coordinates of the pix.
+  int x_origin_;
+  int y_origin_;
+  // The image of horizontally smeared blob boxes summed to provide a
+  // textline density map. As with a horizontal projection, the map has
+  // dips in the gaps between textlines.
+  Pix* pix_;
+};
+
+}  // namespace tesseract.
+
+#endif  // TESSERACT_TEXTORD_TEXTLINEPROJECTION_H_
diff --git a/tesseract/src/textord/textord.cpp b/tesseract/src/textord/textord.cpp
new file mode 100644
index 00000000..756ca78a
--- /dev/null
+++ b/tesseract/src/textord/textord.cpp
@@ -0,0 +1,349 @@
+///////////////////////////////////////////////////////////////////////
+// File:        textord.cpp
+// Description: The top-level text line and word finding functionality.
+// Author:      Ray Smith
+// Created:     Fri Mar 13 14:43:01 PDT 2009
+//
+// (C) Copyright 2009, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+// Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "baselinedetect.h"
+#include "drawtord.h"
+#include "textord.h"
+#include "makerow.h"
+#include "pageres.h"
+#include "tordmain.h"
+#include "wordseg.h"
+
+namespace tesseract {
+
+Textord::Textord(CCStruct* ccstruct)
+    : ccstruct_(ccstruct),
+      use_cjk_fp_model_(false),
+      // makerow.cpp ///////////////////////////////////////////
+      BOOL_MEMBER(textord_single_height_mode, false,
+                  "Script has no xheight, so use a single mode",
+                  ccstruct_->params()),
+      // tospace.cpp ///////////////////////////////////////////
+      BOOL_MEMBER(tosp_old_to_method, false, "Space stats use prechopping?",
+                  ccstruct_->params()),
+      BOOL_MEMBER(tosp_old_to_constrain_sp_kn, false,
+                  "Constrain relative values of inter and intra-word gaps for "
+                  "old_to_method.",
+                  ccstruct_->params()),
+      BOOL_MEMBER(tosp_only_use_prop_rows, true,
+                  "Block stats to use fixed pitch rows?", ccstruct_->params()),
+      BOOL_MEMBER(tosp_force_wordbreak_on_punct, false,
+                  "Force word breaks on punct to break long lines in non-space "
+                  "delimited langs",
+                  ccstruct_->params()),
+      BOOL_MEMBER(tosp_use_pre_chopping, false, "Space stats use prechopping?",
+                  ccstruct_->params()),
+      BOOL_MEMBER(tosp_old_to_bug_fix, false, "Fix suspected bug in old code",
+                  ccstruct_->params()),
+      BOOL_MEMBER(tosp_block_use_cert_spaces, true, "Only stat OBVIOUS spaces",
+                  ccstruct_->params()),
+      BOOL_MEMBER(tosp_row_use_cert_spaces, true, "Only stat OBVIOUS spaces",
+                  ccstruct_->params()),
+      BOOL_MEMBER(tosp_narrow_blobs_not_cert, true, "Only stat OBVIOUS spaces",
+                  ccstruct_->params()),
+      BOOL_MEMBER(tosp_row_use_cert_spaces1, true, "Only stat OBVIOUS spaces",
+                  ccstruct_->params()),
+      BOOL_MEMBER(tosp_recovery_isolated_row_stats, true,
+                  "Use row alone when inadequate cert spaces",
+                  ccstruct_->params()),
+      BOOL_MEMBER(tosp_only_small_gaps_for_kern, false, "Better guess",
+                  ccstruct_->params()),
+      BOOL_MEMBER(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?",
+                  ccstruct_->params()),
+      BOOL_MEMBER(tosp_fuzzy_limit_all, true,
+                  "Don't restrict kn->sp fuzzy limit to tables",
+                  ccstruct_->params()),
+      BOOL_MEMBER(tosp_stats_use_xht_gaps, true,
+                  "Use within xht gap for wd breaks", ccstruct_->params()),
+      BOOL_MEMBER(tosp_use_xht_gaps, true, "Use within xht gap for wd breaks",
+                  ccstruct_->params()),
+      BOOL_MEMBER(tosp_only_use_xht_gaps, false,
+                  "Only use within xht gap for wd breaks", ccstruct_->params()),
+      BOOL_MEMBER(tosp_rule_9_test_punct, false,
+                  "Don't chng kn to space next to punct", ccstruct_->params()),
+      BOOL_MEMBER(tosp_flip_fuzz_kn_to_sp, true, "Default flip",
+                  ccstruct_->params()),
+      BOOL_MEMBER(tosp_flip_fuzz_sp_to_kn, true, "Default flip",
+                  ccstruct_->params()),
+      BOOL_MEMBER(tosp_improve_thresh, false, "Enable improvement heuristic",
+                  ccstruct_->params()),
+      INT_MEMBER(tosp_debug_level, 0, "Debug data", ccstruct_->params()),
+      INT_MEMBER(tosp_enough_space_samples_for_median, 3,
+                 "or should we use mean", ccstruct_->params()),
+      INT_MEMBER(tosp_redo_kern_limit, 10,
+                 "No.samples reqd to reestimate for row", ccstruct_->params()),
+      INT_MEMBER(tosp_few_samples, 40,
+                 "No.gaps reqd with 1 large gap to treat as a table",
+                 ccstruct_->params()),
+      INT_MEMBER(tosp_short_row, 20,
+                 "No.gaps reqd with few cert spaces to use certs",
+                 ccstruct_->params()),
+      INT_MEMBER(tosp_sanity_method, 1, "How to avoid being silly",
+                 ccstruct_->params()),
+      double_MEMBER(tosp_old_sp_kn_th_factor, 2.0,
+                    "Factor for defining space threshold in terms of space and "
+                    "kern sizes",
+                    ccstruct_->params()),
+      double_MEMBER(tosp_threshold_bias1, 0, "how far between kern and space?",
+                    ccstruct_->params()),
+      double_MEMBER(tosp_threshold_bias2, 0, "how far between kern and space?",
+                    ccstruct_->params()),
+      double_MEMBER(tosp_narrow_fraction, 0.3, "Fract of xheight for narrow",
+                    ccstruct_->params()),
+      double_MEMBER(tosp_narrow_aspect_ratio, 0.48,
+                    "narrow if w/h less than this", ccstruct_->params()),
+      double_MEMBER(tosp_wide_fraction, 0.52, "Fract of xheight for wide",
+                    ccstruct_->params()),
+      double_MEMBER(tosp_wide_aspect_ratio, 0.0, "wide if w/h less than this",
+                    ccstruct_->params()),
+      double_MEMBER(tosp_fuzzy_space_factor, 0.6,
+                    "Fract of xheight for fuzz sp", ccstruct_->params()),
+      double_MEMBER(tosp_fuzzy_space_factor1, 0.5,
+                    "Fract of xheight for fuzz sp", ccstruct_->params()),
+      double_MEMBER(tosp_fuzzy_space_factor2, 0.72,
+                    "Fract of xheight for fuzz sp", ccstruct_->params()),
+      double_MEMBER(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern",
+                    ccstruct_->params()),
+      double_MEMBER(tosp_kern_gap_factor1, 2.0, "gap ratio to flip kern->sp",
+                    ccstruct_->params()),
+      double_MEMBER(tosp_kern_gap_factor2, 1.3, "gap ratio to flip kern->sp",
+                    ccstruct_->params()),
+      double_MEMBER(tosp_kern_gap_factor3, 2.5, "gap ratio to flip kern->sp",
+                    ccstruct_->params()),
+      double_MEMBER(tosp_ignore_big_gaps, -1, "xht multiplier",
+                    ccstruct_->params()),
+      double_MEMBER(tosp_ignore_very_big_gaps, 3.5, "xht multiplier",
+                    ccstruct_->params()),
+      double_MEMBER(tosp_rep_space, 1.6, "rep gap multiplier for space",
+                    ccstruct_->params()),
+      double_MEMBER(tosp_enough_small_gaps, 0.65,
+                    "Fract of kerns reqd for isolated row stats",
+                    ccstruct_->params()),
+      double_MEMBER(tosp_table_kn_sp_ratio, 2.25,
+                    "Min difference of kn & sp in table", ccstruct_->params()),
+      double_MEMBER(tosp_table_xht_sp_ratio, 0.33,
+                    "Expect spaces bigger than this", ccstruct_->params()),
+      double_MEMBER(tosp_table_fuzzy_kn_sp_ratio, 3.0,
+                    "Fuzzy if less than this", ccstruct_->params()),
+      double_MEMBER(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg",
+                    ccstruct_->params()),
+      double_MEMBER(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg",
+                    ccstruct_->params()),
+      double_MEMBER(tosp_min_sane_kn_sp, 1.5,
+                    "Don't trust spaces less than this time kn",
+                    ccstruct_->params()),
+      double_MEMBER(tosp_init_guess_kn_mult, 2.2,
+                    "Thresh guess - mult kn by this", ccstruct_->params()),
+      double_MEMBER(tosp_init_guess_xht_mult, 0.28,
+                    "Thresh guess - mult xht by this", ccstruct_->params()),
+      double_MEMBER(tosp_max_sane_kn_thresh, 5.0,
+                    "Multiplier on kn to limit thresh", ccstruct_->params()),
+      double_MEMBER(tosp_flip_caution, 0.0,
+                    "Don't autoflip kn to sp when large separation",
+                    ccstruct_->params()),
+      double_MEMBER(tosp_large_kerning, 0.19,
+                    "Limit use of xht gap with large kns", ccstruct_->params()),
+      double_MEMBER(tosp_dont_fool_with_small_kerns, -1,
+                    "Limit use of xht gap with odd small kns",
+                    ccstruct_->params()),
+      double_MEMBER(tosp_near_lh_edge, 0,
+                    "Don't reduce box if the top left is non blank",
+                    ccstruct_->params()),
+      double_MEMBER(tosp_silly_kn_sp_gap, 0.2,
+                    "Don't let sp minus kn get too small", ccstruct_->params()),
+      double_MEMBER(tosp_pass_wide_fuzz_sp_to_context, 0.75,
+                    "How wide fuzzies need context", ccstruct_->params()),
+      // tordmain.cpp ///////////////////////////////////////////
+      BOOL_MEMBER(textord_no_rejects, false, "Don't remove noise blobs",
+                  ccstruct_->params()),
+      BOOL_MEMBER(textord_show_blobs, false, "Display unsorted blobs",
+                  ccstruct_->params()),
+      BOOL_MEMBER(textord_show_boxes, false, "Display unsorted blobs",
+                  ccstruct_->params()),
+      INT_MEMBER(textord_max_noise_size, 7, "Pixel size of noise",
+                 ccstruct_->params()),
+      INT_MEMBER(textord_baseline_debug, 0, "Baseline debug level",
+                 ccstruct_->params()),
+      double_MEMBER(textord_noise_area_ratio, 0.7,
+                    "Fraction of bounding box for noise", ccstruct_->params()),
+      double_MEMBER(textord_initialx_ile, 0.75,
+                    "Ile of sizes for xheight guess", ccstruct_->params()),
+      double_MEMBER(textord_initialasc_ile, 0.90,
+                    "Ile of sizes for xheight guess", ccstruct_->params()),
+      INT_MEMBER(textord_noise_sizefraction, 10, "Fraction of size for maxima",
+                 ccstruct_->params()),
+      double_MEMBER(textord_noise_sizelimit, 0.5,
+                    "Fraction of x for big t count", ccstruct_->params()),
+      INT_MEMBER(textord_noise_translimit, 16, "Transitions for normal blob",
+                 ccstruct_->params()),
+      double_MEMBER(textord_noise_normratio, 2.0,
+                    "Dot to norm ratio for deletion", ccstruct_->params()),
+      BOOL_MEMBER(textord_noise_rejwords, true, "Reject noise-like words",
+                  ccstruct_->params()),
+      BOOL_MEMBER(textord_noise_rejrows, true, "Reject noise-like rows",
+                  ccstruct_->params()),
+      double_MEMBER(textord_noise_syfract, 0.2,
+                    "xh fract height error for norm blobs",
+                    ccstruct_->params()),
+      double_MEMBER(textord_noise_sxfract, 0.4,
+                    "xh fract width error for norm blobs", ccstruct_->params()),
+      double_MEMBER(textord_noise_hfract, 1.0 / 64,
+                    "Height fraction to discard outlines as speckle noise",
+                    ccstruct_->params()),
+      INT_MEMBER(textord_noise_sncount, 1, "super norm blobs to save row",
+                 ccstruct_->params()),
+      double_MEMBER(textord_noise_rowratio, 6.0,
+                    "Dot to norm ratio for deletion", ccstruct_->params()),
+      BOOL_MEMBER(textord_noise_debug, false, "Debug row garbage detector",
+                  ccstruct_->params()),
+      double_MEMBER(textord_blshift_maxshift, 0.00, "Max baseline shift",
+                    ccstruct_->params()),
+      double_MEMBER(textord_blshift_xfraction, 9.99,
+                    "Min size of baseline shift", ccstruct_->params()) {}
+
+// Make the textlines and words inside each block.
+void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD& reskew,
+                          int width, int height, Pix* binary_pix,
+                          Pix* thresholds_pix, Pix* grey_pix,
+                          bool use_box_bottoms, BLOBNBOX_LIST* diacritic_blobs,
+                          BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks) {
+  page_tr_.set_x(width);
+  page_tr_.set_y(height);
+  if (to_blocks->empty()) {
+    // AutoPageSeg was not used, so we need to find_components first.
+    find_components(binary_pix, blocks, to_blocks);
+    TO_BLOCK_IT it(to_blocks);
+    for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+      TO_BLOCK* to_block = it.data();
+      // Compute the edge offsets whether or not there is a grey_pix.
+      // We have by-passed auto page seg, so we have to run it here.
+      // By page segmentation mode there is no non-text to avoid running on.
+      to_block->ComputeEdgeOffsets(thresholds_pix, grey_pix);
+    }
+  } else if (!PSM_SPARSE(pageseg_mode)) {
+    // AutoPageSeg does not need to find_components as it did that already.
+    // Filter_blobs sets up the TO_BLOCKs the same as find_components does.
+    filter_blobs(page_tr_, to_blocks, true);
+  }
+
+  ASSERT_HOST(!to_blocks->empty());
+  if (pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT) {
+    const FCOORD anticlockwise90(0.0f, 1.0f);
+    const FCOORD clockwise90(0.0f, -1.0f);
+    TO_BLOCK_IT it(to_blocks);
+    for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+      TO_BLOCK* to_block = it.data();
+      BLOCK* block = to_block->block;
+      // Create a fake poly_block in block from its bounding box.
+      block->pdblk.set_poly_block(new POLY_BLOCK(block->pdblk.bounding_box(),
+                                           PT_VERTICAL_TEXT));
+      // Rotate the to_block along with its contained block and blobnbox lists.
+      to_block->rotate(anticlockwise90);
+      // Set the block's rotation values to obey the convention followed in
+      // layout analysis for vertical text.
+      block->set_re_rotation(clockwise90);
+      block->set_classify_rotation(clockwise90);
+    }
+  }
+
+  TO_BLOCK_IT to_block_it(to_blocks);
+  TO_BLOCK* to_block = to_block_it.data();
+  // Make the rows in the block.
+  float gradient;
+  // Do it the old fashioned way.
+  if (PSM_LINE_FIND_ENABLED(pageseg_mode)) {
+    gradient = make_rows(page_tr_, to_blocks);
+  } else if (!PSM_SPARSE(pageseg_mode)) {
+    // RAW_LINE, SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row.
+    gradient = make_single_row(page_tr_, pageseg_mode != PSM_RAW_LINE,
+                               to_block, to_blocks);
+  } else {
+    gradient = 0.0f;
+  }
+  BaselineDetect baseline_detector(textord_baseline_debug,
+                                   reskew, to_blocks);
+  baseline_detector.ComputeStraightBaselines(use_box_bottoms);
+  baseline_detector.ComputeBaselineSplinesAndXheights(
+      page_tr_, pageseg_mode != PSM_RAW_LINE, textord_heavy_nr,
+      textord_show_final_rows, this);
+  // Now make the words in the lines.
+  if (PSM_WORD_FIND_ENABLED(pageseg_mode)) {
+    // SINGLE_LINE uses the old word maker on the single line.
+    make_words(this, page_tr_, gradient, blocks, to_blocks);
+  } else {
+    // SINGLE_WORD and SINGLE_CHAR cram all the blobs into a
+    // single word, and in SINGLE_CHAR mode, all the outlines
+    // go in a single blob.
+    TO_BLOCK* to_block = to_block_it.data();
+    make_single_word(pageseg_mode == PSM_SINGLE_CHAR,
+                     to_block->get_rows(), to_block->block->row_list());
+  }
+  // Remove empties.
+  cleanup_blocks(PSM_WORD_FIND_ENABLED(pageseg_mode), blocks);
+  TransferDiacriticsToBlockGroups(diacritic_blobs, blocks);
+  // Compute the margins for each row in the block, to be used later for
+  // paragraph detection.
+  BLOCK_IT b_it(blocks);
+  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
+    b_it.data()->compute_row_margins();
+  }
+#ifndef GRAPHICS_DISABLED
+  close_to_win();
+#endif
+}
+
+// If we were supposed to return only a single textline, and there is more
+// than one, clean up and leave only the best.
+void Textord::CleanupSingleRowResult(PageSegMode pageseg_mode,
+                                     PAGE_RES* page_res) {
+  if (PSM_LINE_FIND_ENABLED(pageseg_mode) || PSM_SPARSE(pageseg_mode))
+    return;  // No cleanup required.
+  PAGE_RES_IT it(page_res);
+  // Find the best row, being the greatest mean word conf.
+  float row_total_conf = 0.0f;
+  int row_word_count = 0;
+  ROW_RES* best_row = nullptr;
+  float best_conf = 0.0f;
+  for (it.restart_page(); it.word() != nullptr; it.forward()) {
+    WERD_RES* word = it.word();
+    row_total_conf += word->best_choice->certainty();
+    ++row_word_count;
+    if (it.next_row() != it.row()) {
+      row_total_conf /= row_word_count;
+      if (best_row == nullptr || best_conf < row_total_conf) {
+        best_row = it.row();
+        best_conf = row_total_conf;
+      }
+      row_total_conf = 0.0f;
+      row_word_count = 0;
+    }
+  }
+  // Now eliminate any word not in the best row.
+  for (it.restart_page(); it.word() != nullptr; it.forward()) {
+    if (it.row() != best_row)
+      it.DeleteCurrentWord();
+  }
+}
+
+}  // namespace tesseract.
diff --git a/tesseract/src/textord/textord.h b/tesseract/src/textord/textord.h
new file mode 100644
index 00000000..b2ca7079
--- /dev/null
+++ b/tesseract/src/textord/textord.h
@@ -0,0 +1,403 @@
+///////////////////////////////////////////////////////////////////////
+// File:        textord.h
+// Description: The Textord class definition gathers text line and word
+//              finding functionality.
+// Author:      Ray Smith
+// Created:     Fri Mar 13 14:29:01 PDT 2009
+//
+// (C) Copyright 2009, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TEXTORD_TEXTORD_H_
+#define TESSERACT_TEXTORD_TEXTORD_H_
+
+#include "ccstruct.h"
+#include "bbgrid.h"
+#include "blobbox.h"
+#include "gap_map.h"
+
+#include <tesseract/publictypes.h>  // For PageSegMode.
+
+namespace tesseract {
+
+class FCOORD;
+class BLOCK_LIST;
+class PAGE_RES;
+class TO_BLOCK;
+class TO_BLOCK_LIST;
+class ScrollView;
+
+// A simple class that can be used by BBGrid to hold a word and an expanded
+// bounding box that makes it easy to find words to put diacritics.
+class WordWithBox {
+ public:
+  WordWithBox() : word_(nullptr) {}
+  explicit WordWithBox(WERD *word)
+      : word_(word), bounding_box_(word->bounding_box()) {
+    int height = bounding_box_.height();
+    bounding_box_.pad(height, height);
+  }
+
+  const TBOX &bounding_box() const { return bounding_box_; }
+  // Returns the bounding box of only the good blobs.
+  TBOX true_bounding_box() const { return word_->true_bounding_box(); }
+  C_BLOB_LIST *RejBlobs() const { return word_->rej_cblob_list(); }
+  const WERD *word() const { return word_; }
+
+ private:
+  // Borrowed pointer to a real word somewhere that must outlive this class.
+  WERD *word_;
+  // Cached expanded bounding box of the word, padded all round by its height.
+  TBOX bounding_box_;
+};
+
+// Make it usable by BBGrid.
+CLISTIZEH(WordWithBox)
+using WordGrid = BBGrid<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT>;
+using WordSearch = GridSearch<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT>;
+
+class Textord {
+ public:
+  explicit Textord(CCStruct* ccstruct);
+  ~Textord() = default;
+
+  // Make the textlines and words inside each block.
+  // binary_pix is mandatory and is the binarized input after line removal.
+  // grey_pix is optional, but if present must match the binary_pix in size,
+  // and must be a *real* grey image instead of binary_pix * 255.
+  // thresholds_pix is expected to be present iff grey_pix is present and
+  // can be an integer factor reduction of the grey_pix. It represents the
+  // thresholds that were used to create the binary_pix from the grey_pix.
+  // diacritic_blobs contain small confusing components that should be added
+  // to the appropriate word(s) in case they are really diacritics.
+  void TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width,
+                   int height, Pix *binary_pix, Pix *thresholds_pix,
+                   Pix *grey_pix, bool use_box_bottoms,
+                   BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks,
+                   TO_BLOCK_LIST *to_blocks);
+
+  // If we were supposed to return only a single textline, and there is more
+  // than one, clean up and leave only the best.
+  void CleanupSingleRowResult(PageSegMode pageseg_mode, PAGE_RES* page_res);
+
+  bool use_cjk_fp_model() const {
+    return use_cjk_fp_model_;
+  }
+  void set_use_cjk_fp_model(bool flag) {
+    use_cjk_fp_model_ = flag;
+  }
+
+  // tospace.cpp ///////////////////////////////////////////
+  void to_spacing(
+      ICOORD page_tr,        //topright of page
+      TO_BLOCK_LIST *blocks  //blocks on page
+                                         );
+  ROW *make_prop_words(TO_ROW *row,     // row to make
+                       FCOORD rotation  // for drawing
+                       );
+  ROW *make_blob_words(TO_ROW *row,     // row to make
+                       FCOORD rotation  // for drawing
+                       );
+  // tordmain.cpp ///////////////////////////////////////////
+  void find_components(Pix* pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks);
+  void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST* blocks, bool testing_on);
+
+ private:
+  // For underlying memory management and other utilities.
+  CCStruct* ccstruct_;
+
+  // The size of the input image.
+  ICOORD page_tr_;
+
+  bool use_cjk_fp_model_;
+
+  // makerow.cpp ///////////////////////////////////////////
+  // Make the textlines inside each block.
+  void MakeRows(PageSegMode pageseg_mode, const FCOORD& skew,
+                int width, int height, TO_BLOCK_LIST* to_blocks);
+  // Make the textlines inside a single block.
+  void MakeBlockRows(int min_spacing, int max_spacing,
+                     const FCOORD& skew, TO_BLOCK* block,
+                     ScrollView* win);
+
+ public:
+  void compute_block_xheight(TO_BLOCK *block, float gradient);
+  void compute_row_xheight(TO_ROW *row,          // row to do
+                           const FCOORD& rotation,
+                           float gradient,       // global skew
+                           int block_line_size);
+  void make_spline_rows(TO_BLOCK* block,   // block to do
+                        float gradient,    // gradient to fit
+                        bool testing_on);
+ private:
+  //// oldbasel.cpp ////////////////////////////////////////
+  void make_old_baselines(TO_BLOCK* block,   // block to do
+                          bool testing_on,  // correct orientation
+                          float gradient);
+  void correlate_lines(TO_BLOCK *block, float gradient);
+  void correlate_neighbours(TO_BLOCK *block,  // block rows are in.
+                            TO_ROW **rows,    // rows of block.
+                            int rowcount);    // no of rows to do.
+  int correlate_with_stats(TO_ROW **rows,  // rows of block.
+                           int rowcount,   // no of rows to do.
+                           TO_BLOCK* block);
+  void find_textlines(TO_BLOCK *block,  // block row is in
+                      TO_ROW *row,      // row to do
+                      int degree,       // required approximation
+                      QSPLINE *spline);  // starting spline
+  // tospace.cpp ///////////////////////////////////////////
+  //DEBUG USE ONLY
+  void block_spacing_stats(TO_BLOCK* block,
+                           GAPMAP* gapmap,
+                           bool& old_text_ord_proportional,
+          //resulting estimate
+                           int16_t& block_space_gap_width,
+          //resulting estimate
+                           int16_t& block_non_space_gap_width
+  );
+  void row_spacing_stats(TO_ROW *row,
+                         GAPMAP *gapmap,
+                         int16_t block_idx,
+                         int16_t row_idx,
+                         //estimate for block
+                         int16_t block_space_gap_width,
+                         //estimate for block
+                         int16_t block_non_space_gap_width
+                         );
+  void old_to_method(TO_ROW *row,
+                     STATS *all_gap_stats,
+                     STATS *space_gap_stats,
+                     STATS *small_gap_stats,
+                     int16_t block_space_gap_width,
+                     //estimate for block
+                     int16_t block_non_space_gap_width
+                     );
+  bool isolated_row_stats(TO_ROW* row,
+                          GAPMAP* gapmap,
+                          STATS* all_gap_stats,
+                          bool suspected_table,
+                          int16_t block_idx,
+                          int16_t row_idx);
+  int16_t stats_count_under(STATS *stats, int16_t threshold);
+  void improve_row_threshold(TO_ROW *row, STATS *all_gap_stats);
+  bool make_a_word_break(TO_ROW* row,   // row being made
+                         TBOX blob_box, // for next_blob // how many blanks?
+                         int16_t prev_gap,
+                         TBOX prev_blob_box,
+                         int16_t real_current_gap,
+                         int16_t within_xht_current_gap,
+                         TBOX next_blob_box,
+                         int16_t next_gap,
+                         uint8_t& blanks,
+                         bool& fuzzy_sp,
+                         bool& fuzzy_non,
+                         bool& prev_gap_was_a_space,
+                         bool& break_at_next_gap);
+  bool narrow_blob(TO_ROW* row, TBOX blob_box);
+  bool wide_blob(TO_ROW* row, TBOX blob_box);
+  bool suspected_punct_blob(TO_ROW* row, TBOX box);
+  void peek_at_next_gap(TO_ROW *row,
+                        BLOBNBOX_IT box_it,
+                        TBOX &next_blob_box,
+                        int16_t &next_gap,
+                        int16_t &next_within_xht_gap);
+  void mark_gap(TBOX blob,    //blob following gap
+                int16_t rule,  // heuristic id
+                int16_t prev_gap,
+                int16_t prev_blob_width,
+                int16_t current_gap,
+                int16_t next_blob_width,
+                int16_t next_gap);
+  float find_mean_blob_spacing(WERD *word);
+  bool ignore_big_gap(TO_ROW* row,
+                      int32_t row_length,
+                      GAPMAP* gapmap,
+                      int16_t left,
+                      int16_t right);
+  //get bounding box
+  TBOX reduced_box_next(TO_ROW *row,     //current row
+                        BLOBNBOX_IT *it  //iterator to blobds
+                        );
+  TBOX reduced_box_for_blob(BLOBNBOX *blob, TO_ROW *row, int16_t *left_above_xht);
+  // tordmain.cpp ///////////////////////////////////////////
+  float filter_noise_blobs(BLOBNBOX_LIST *src_list,
+                           BLOBNBOX_LIST *noise_list,
+                           BLOBNBOX_LIST *small_list,
+                           BLOBNBOX_LIST *large_list);
+  // Fixes the block so it obeys all the rules:
+  // Must have at least one ROW.
+  // Must have at least one WERD.
+  // WERDs contain a fake blob.
+  void cleanup_nontext_block(BLOCK* block);
+  void cleanup_blocks(bool clean_noise, BLOCK_LIST *blocks);
+  bool clean_noise_from_row(ROW* row);
+  void clean_noise_from_words(ROW *row);
+  // Remove outlines that are a tiny fraction in either width or height
+  // of the word height.
+  void clean_small_noise_from_words(ROW *row);
+  // Groups blocks by rotation, then, for each group, makes a WordGrid and calls
+  // TransferDiacriticsToWords to copy the diacritic blobs to the most
+  // appropriate words in the group of blocks. Source blobs are not touched.
+  void TransferDiacriticsToBlockGroups(BLOBNBOX_LIST* diacritic_blobs,
+                                       BLOCK_LIST* blocks);
+  // Places a copy of blobs that are near a word (after applying rotation to the
+  // blob) in the most appropriate word, unless there is doubt, in which case a
+  // blob can end up in two words. Source blobs are not touched.
+  void TransferDiacriticsToWords(BLOBNBOX_LIST *diacritic_blobs,
+                                 const FCOORD &rotation, WordGrid *word_grid);
+
+ public:
+  // makerow.cpp ///////////////////////////////////////////
+  BOOL_VAR_H(textord_single_height_mode, false,
+             "Script has no xheight, so use a single mode for horizontal text");
+  // tospace.cpp ///////////////////////////////////////////
+  BOOL_VAR_H(tosp_old_to_method, false, "Space stats use prechopping?");
+  BOOL_VAR_H(tosp_old_to_constrain_sp_kn, false,
+             "Constrain relative values of inter and intra-word gaps for "
+             "old_to_method.");
+  BOOL_VAR_H(tosp_only_use_prop_rows, true,
+             "Block stats to use fixed pitch rows?");
+  BOOL_VAR_H(tosp_force_wordbreak_on_punct, false,
+             "Force word breaks on punct to break long lines in non-space "
+             "delimited langs");
+  BOOL_VAR_H(tosp_use_pre_chopping, false,
+             "Space stats use prechopping?");
+  BOOL_VAR_H(tosp_old_to_bug_fix, false,
+             "Fix suspected bug in old code");
+  BOOL_VAR_H(tosp_block_use_cert_spaces, true,
+             "Only stat OBVIOUS spaces");
+  BOOL_VAR_H(tosp_row_use_cert_spaces, true,
+             "Only stat OBVIOUS spaces");
+  BOOL_VAR_H(tosp_narrow_blobs_not_cert, true,
+             "Only stat OBVIOUS spaces");
+  BOOL_VAR_H(tosp_row_use_cert_spaces1, true,
+             "Only stat OBVIOUS spaces");
+  BOOL_VAR_H(tosp_recovery_isolated_row_stats, true,
+             "Use row alone when inadequate cert spaces");
+  BOOL_VAR_H(tosp_only_small_gaps_for_kern, false, "Better guess");
+  BOOL_VAR_H(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?");
+  BOOL_VAR_H(tosp_fuzzy_limit_all, true,
+             "Don't restrict kn->sp fuzzy limit to tables");
+  BOOL_VAR_H(tosp_stats_use_xht_gaps, true,
+             "Use within xht gap for wd breaks");
+  BOOL_VAR_H(tosp_use_xht_gaps, true,
+             "Use within xht gap for wd breaks");
+  BOOL_VAR_H(tosp_only_use_xht_gaps, false,
+             "Only use within xht gap for wd breaks");
+  BOOL_VAR_H(tosp_rule_9_test_punct, false,
+             "Don't chng kn to space next to punct");
+  BOOL_VAR_H(tosp_flip_fuzz_kn_to_sp, true, "Default flip");
+  BOOL_VAR_H(tosp_flip_fuzz_sp_to_kn, true, "Default flip");
+  BOOL_VAR_H(tosp_improve_thresh, false,
+             "Enable improvement heuristic");
+  INT_VAR_H(tosp_debug_level, 0, "Debug data");
+  INT_VAR_H(tosp_enough_space_samples_for_median, 3,
+            "or should we use mean");
+  INT_VAR_H(tosp_redo_kern_limit, 10,
+            "No.samples reqd to reestimate for row");
+  INT_VAR_H(tosp_few_samples, 40,
+            "No.gaps reqd with 1 large gap to treat as a table");
+  INT_VAR_H(tosp_short_row, 20,
+            "No.gaps reqd with few cert spaces to use certs");
+  INT_VAR_H(tosp_sanity_method, 1, "How to avoid being silly");
+  double_VAR_H(tosp_old_sp_kn_th_factor, 2.0,
+               "Factor for defining space threshold in terms of space and "
+               "kern sizes");
+  double_VAR_H(tosp_threshold_bias1, 0,
+               "how far between kern and space?");
+  double_VAR_H(tosp_threshold_bias2, 0,
+               "how far between kern and space?");
+  double_VAR_H(tosp_narrow_fraction, 0.3,
+               "Fract of xheight for narrow");
+  double_VAR_H(tosp_narrow_aspect_ratio, 0.48,
+               "narrow if w/h less than this");
+  double_VAR_H(tosp_wide_fraction, 0.52, "Fract of xheight for wide");
+  double_VAR_H(tosp_wide_aspect_ratio, 0.0,
+               "wide if w/h less than this");
+  double_VAR_H(tosp_fuzzy_space_factor, 0.6,
+               "Fract of xheight for fuzz sp");
+  double_VAR_H(tosp_fuzzy_space_factor1, 0.5,
+               "Fract of xheight for fuzz sp");
+  double_VAR_H(tosp_fuzzy_space_factor2, 0.72,
+               "Fract of xheight for fuzz sp");
+  double_VAR_H(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern");
+  double_VAR_H(tosp_kern_gap_factor1, 2.0,
+               "gap ratio to flip kern->sp");
+  double_VAR_H(tosp_kern_gap_factor2, 1.3,
+               "gap ratio to flip kern->sp");
+  double_VAR_H(tosp_kern_gap_factor3, 2.5,
+               "gap ratio to flip kern->sp");
+  double_VAR_H(tosp_ignore_big_gaps, -1, "xht multiplier");
+  double_VAR_H(tosp_ignore_very_big_gaps, 3.5, "xht multiplier");
+  double_VAR_H(tosp_rep_space, 1.6, "rep gap multiplier for space");
+  double_VAR_H(tosp_enough_small_gaps, 0.65,
+               "Fract of kerns reqd for isolated row stats");
+  double_VAR_H(tosp_table_kn_sp_ratio, 2.25,
+               "Min difference of kn & sp in table");
+  double_VAR_H(tosp_table_xht_sp_ratio, 0.33,
+               "Expect spaces bigger than this");
+  double_VAR_H(tosp_table_fuzzy_kn_sp_ratio, 3.0,
+               "Fuzzy if less than this");
+  double_VAR_H(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg");
+  double_VAR_H(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg");
+  double_VAR_H(tosp_min_sane_kn_sp, 1.5,
+               "Don't trust spaces less than this time kn");
+  double_VAR_H(tosp_init_guess_kn_mult, 2.2,
+               "Thresh guess - mult kn by this");
+  double_VAR_H(tosp_init_guess_xht_mult, 0.28,
+               "Thresh guess - mult xht by this");
+  double_VAR_H(tosp_max_sane_kn_thresh, 5.0,
+               "Multiplier on kn to limit thresh");
+  double_VAR_H(tosp_flip_caution, 0.0,
+               "Don't autoflip kn to sp when large separation");
+  double_VAR_H(tosp_large_kerning, 0.19,
+               "Limit use of xht gap with large kns");
+  double_VAR_H(tosp_dont_fool_with_small_kerns, -1,
+               "Limit use of xht gap with odd small kns");
+  double_VAR_H(tosp_near_lh_edge, 0,
+               "Don't reduce box if the top left is non blank");
+  double_VAR_H(tosp_silly_kn_sp_gap, 0.2,
+               "Don't let sp minus kn get too small");
+  double_VAR_H(tosp_pass_wide_fuzz_sp_to_context, 0.75,
+               "How wide fuzzies need context");
+  // tordmain.cpp ///////////////////////////////////////////
+  BOOL_VAR_H(textord_no_rejects, false, "Don't remove noise blobs");
+  BOOL_VAR_H(textord_show_blobs, false, "Display unsorted blobs");
+  BOOL_VAR_H(textord_show_boxes, false, "Display boxes");
+  INT_VAR_H(textord_max_noise_size, 7, "Pixel size of noise");
+  INT_VAR_H(textord_baseline_debug, 0, "Baseline debug level");
+  double_VAR_H(textord_noise_area_ratio, 0.7,
+               "Fraction of bounding box for noise");
+  double_VAR_H(textord_initialx_ile, 0.75, "Ile of sizes for xheight guess");
+  double_VAR_H(textord_initialasc_ile, 0.90, "Ile of sizes for xheight guess");
+  INT_VAR_H(textord_noise_sizefraction, 10, "Fraction of size for maxima");
+  double_VAR_H(textord_noise_sizelimit, 0.5, "Fraction of x for big t count");
+  INT_VAR_H(textord_noise_translimit, 16, "Transitions for normal blob");
+  double_VAR_H(textord_noise_normratio, 2.0, "Dot to norm ratio for deletion");
+  BOOL_VAR_H(textord_noise_rejwords, true, "Reject noise-like words");
+  BOOL_VAR_H(textord_noise_rejrows, true, "Reject noise-like rows");
+  double_VAR_H(textord_noise_syfract, 0.2, "xh fract error for norm blobs");
+  double_VAR_H(textord_noise_sxfract, 0.4,
+               "xh fract width error for norm blobs");
+  double_VAR_H(textord_noise_hfract, 1.0/64,
+               "Height fraction to discard outlines as speckle noise");
+  INT_VAR_H(textord_noise_sncount, 1, "super norm blobs to save row");
+  double_VAR_H(textord_noise_rowratio, 6.0, "Dot to norm ratio for deletion");
+  BOOL_VAR_H(textord_noise_debug, false, "Debug row garbage detector");
+  double_VAR_H(textord_blshift_maxshift, 0.00, "Max baseline shift");
+  double_VAR_H(textord_blshift_xfraction, 9.99, "Min size of baseline shift");
+};
+
+} // namespace tesseract
+
+#endif  // TESSERACT_TEXTORD_TEXTORD_H_
diff --git a/tesseract/src/textord/topitch.cpp b/tesseract/src/textord/topitch.cpp
new file mode 100644
index 00000000..655f75bd
--- /dev/null
+++ b/tesseract/src/textord/topitch.cpp
@@ -0,0 +1,1847 @@
+/**********************************************************************
+ * File:        topitch.cpp  (Formerly to_pitch.c)
+ * Description: Code to determine fixed pitchness and the pitch if fixed.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1993, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+ // Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "topitch.h"
+
+#include "blobbox.h"
+#include "statistc.h"
+#include "drawtord.h"
+#include "makerow.h"
+#include "pitsync1.h"
+#include "pithsync.h"
+#include "tovars.h"
+#include "wordseg.h"
+
+#include "helpers.h"
+
+#include <memory>
+
+namespace tesseract {
+
+static BOOL_VAR (textord_all_prop, false, "All doc is proportial text");
+BOOL_VAR (textord_debug_pitch_test, false,
+"Debug on fixed pitch test");
+static BOOL_VAR (textord_disable_pitch_test, false,
+"Turn off dp fixed pitch algorithm");
+BOOL_VAR (textord_fast_pitch_test, false,
+"Do even faster pitch algorithm");
+BOOL_VAR (textord_debug_pitch_metric, false,
+"Write full metric stuff");
+BOOL_VAR (textord_show_row_cuts, false, "Draw row-level cuts");
+BOOL_VAR (textord_show_page_cuts, false, "Draw page-level cuts");
+BOOL_VAR (textord_pitch_cheat, false,
+"Use correct answer for fixed/prop");
+BOOL_VAR (textord_blockndoc_fixed, false,
+"Attempt whole doc/block fixed pitch");
+double_VAR (textord_projection_scale, 0.200, "Ding rate for mid-cuts");
+double_VAR (textord_balance_factor, 1.0,
+"Ding rate for unbalanced char cells");
+
+#define BLOCK_STATS_CLUSTERS  10
+#define MAX_ALLOWED_PITCH 100    //max pixel pitch.
+
+// qsort function to sort 2 floats.
+static int sort_floats(const void *arg1, const void *arg2) {
+  float diff = *reinterpret_cast<const float*>(arg1) -
+               *reinterpret_cast<const float*>(arg2);
+  if (diff > 0) {
+    return 1;
+  } else if (diff < 0) {
+    return -1;
+  } else {
+    return 0;
+  }
+}
+
+/**********************************************************************
+ * compute_fixed_pitch
+ *
+ * Decide whether each row is fixed pitch individually.
+ * Correlate definite and uncertain results to obtain an individual
+ * result for each row in the TO_ROW class.
+ **********************************************************************/
+
+void compute_fixed_pitch(ICOORD page_tr,              // top right
+                         TO_BLOCK_LIST* port_blocks,  // input list
+                         float gradient,              // page skew
+                         FCOORD rotation,             // for drawing
+                         bool testing_on) {          // correct orientation
+  TO_BLOCK_IT block_it;          //iterator
+  TO_BLOCK *block;               //current block;
+  TO_ROW *row;                   //current row
+  int block_index;               //block number
+  int row_index;                 //row number
+
+#ifndef GRAPHICS_DISABLED
+  if (textord_show_initial_words && testing_on) {
+    if (to_win == nullptr)
+      create_to_win(page_tr);
+  }
+#endif
+
+  block_it.set_to_list (port_blocks);
+  block_index = 1;
+  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
+  block_it.forward ()) {
+    block = block_it.data ();
+    compute_block_pitch(block, rotation, block_index, testing_on);
+    block_index++;
+  }
+
+  if (!try_doc_fixed (page_tr, port_blocks, gradient)) {
+    block_index = 1;
+    for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
+    block_it.forward ()) {
+      block = block_it.data ();
+      if (!try_block_fixed (block, block_index))
+        try_rows_fixed(block, block_index, testing_on);
+      block_index++;
+    }
+  }
+
+  block_index = 1;
+  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
+       block_it.forward()) {
+    block = block_it.data ();
+    POLY_BLOCK* pb = block->block->pdblk.poly_block();
+    if (pb != nullptr && !pb->IsText()) continue;  // Non-text doesn't exist!
+    // row iterator
+    TO_ROW_IT row_it(block->get_rows());
+    row_index = 1;
+    for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+      row = row_it.data ();
+      fix_row_pitch(row, block, port_blocks, row_index, block_index);
+      row_index++;
+    }
+    block_index++;
+  }
+#ifndef GRAPHICS_DISABLED
+  if (textord_show_initial_words && testing_on) {
+    ScrollView::Update();
+  }
+#endif
+}
+
+
+/**********************************************************************
+ * fix_row_pitch
+ *
+ * Get a pitch_decision for this row by voting among similar rows in the
+ * block, then similar rows over all the page, or any other rows at all.
+ **********************************************************************/
+
+void fix_row_pitch(TO_ROW *bad_row,        // row to fix
+                   TO_BLOCK *bad_block,    // block of bad_row
+                   TO_BLOCK_LIST *blocks,  // blocks to scan
+                   int32_t row_target,       // number of row
+                   int32_t block_target) {   // number of block
+  int16_t mid_cuts;
+  int block_votes;               //votes in block
+  int like_votes;                //votes over page
+  int other_votes;               //votes of unlike blocks
+  int block_index;               //number of block
+  int row_index;                 //number of row
+  int maxwidth;                  //max pitch
+  TO_BLOCK_IT block_it = blocks; //block iterator
+  TO_BLOCK *block;               //current block
+  TO_ROW *row;                   //current row
+  float sp_sd;                   //space deviation
+  STATS block_stats;             //pitches in block
+  STATS like_stats;              //pitches in page
+
+  block_votes = like_votes = other_votes = 0;
+  maxwidth = static_cast<int32_t>(ceil (bad_row->xheight * textord_words_maxspace));
+  if (bad_row->pitch_decision != PITCH_DEF_FIXED
+  && bad_row->pitch_decision != PITCH_DEF_PROP) {
+    block_stats.set_range (0, maxwidth);
+    like_stats.set_range (0, maxwidth);
+    block_index = 1;
+    for (block_it.mark_cycle_pt(); !block_it.cycled_list();
+         block_it.forward()) {
+      block = block_it.data();
+      POLY_BLOCK* pb = block->block->pdblk.poly_block();
+      if (pb != nullptr && !pb->IsText()) continue;  // Non text doesn't exist!
+      row_index = 1;
+      TO_ROW_IT row_it(block->get_rows());
+      for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
+      row_it.forward ()) {
+        row = row_it.data ();
+        if ((bad_row->all_caps
+          && row->xheight + row->ascrise
+          <
+          (bad_row->xheight + bad_row->ascrise) * (1 +
+          textord_pitch_rowsimilarity)
+          && row->xheight + row->ascrise >
+          (bad_row->xheight + bad_row->ascrise) * (1 -
+          textord_pitch_rowsimilarity))
+          || (!bad_row->all_caps
+          && row->xheight <
+          bad_row->xheight * (1 + textord_pitch_rowsimilarity)
+          && row->xheight >
+        bad_row->xheight * (1 - textord_pitch_rowsimilarity))) {
+          if (block_index == block_target) {
+            if (row->pitch_decision == PITCH_DEF_FIXED) {
+              block_votes += textord_words_veto_power;
+              block_stats.add (static_cast<int32_t>(row->fixed_pitch),
+                textord_words_veto_power);
+            }
+            else if (row->pitch_decision == PITCH_MAYBE_FIXED
+            || row->pitch_decision == PITCH_CORR_FIXED) {
+              block_votes++;
+              block_stats.add (static_cast<int32_t>(row->fixed_pitch), 1);
+            }
+            else if (row->pitch_decision == PITCH_DEF_PROP)
+              block_votes -= textord_words_veto_power;
+            else if (row->pitch_decision == PITCH_MAYBE_PROP
+              || row->pitch_decision == PITCH_CORR_PROP)
+              block_votes--;
+          }
+          else {
+            if (row->pitch_decision == PITCH_DEF_FIXED) {
+              like_votes += textord_words_veto_power;
+              like_stats.add (static_cast<int32_t>(row->fixed_pitch),
+                textord_words_veto_power);
+            }
+            else if (row->pitch_decision == PITCH_MAYBE_FIXED
+            || row->pitch_decision == PITCH_CORR_FIXED) {
+              like_votes++;
+              like_stats.add (static_cast<int32_t>(row->fixed_pitch), 1);
+            }
+            else if (row->pitch_decision == PITCH_DEF_PROP)
+              like_votes -= textord_words_veto_power;
+            else if (row->pitch_decision == PITCH_MAYBE_PROP
+              || row->pitch_decision == PITCH_CORR_PROP)
+              like_votes--;
+          }
+        }
+        else {
+          if (row->pitch_decision == PITCH_DEF_FIXED)
+            other_votes += textord_words_veto_power;
+          else if (row->pitch_decision == PITCH_MAYBE_FIXED
+            || row->pitch_decision == PITCH_CORR_FIXED)
+            other_votes++;
+          else if (row->pitch_decision == PITCH_DEF_PROP)
+            other_votes -= textord_words_veto_power;
+          else if (row->pitch_decision == PITCH_MAYBE_PROP
+            || row->pitch_decision == PITCH_CORR_PROP)
+            other_votes--;
+        }
+        row_index++;
+      }
+      block_index++;
+    }
+    if (block_votes > textord_words_veto_power) {
+      bad_row->fixed_pitch = block_stats.ile (0.5);
+      bad_row->pitch_decision = PITCH_CORR_FIXED;
+    }
+    else if (block_votes <= textord_words_veto_power && like_votes > 0) {
+      bad_row->fixed_pitch = like_stats.ile (0.5);
+      bad_row->pitch_decision = PITCH_CORR_FIXED;
+    }
+    else {
+      bad_row->pitch_decision = PITCH_CORR_PROP;
+      if (block_votes == 0 && like_votes == 0 && other_votes > 0
+        && (textord_debug_pitch_test || textord_debug_pitch_metric))
+        tprintf
+          ("Warning:row %d of block %d set prop with no like rows against trend\n",
+          row_target, block_target);
+    }
+  }
+  if (textord_debug_pitch_metric) {
+    tprintf(":b_votes=%d:l_votes=%d:o_votes=%d",
+            block_votes, like_votes, other_votes);
+    tprintf("x=%g:asc=%g\n", bad_row->xheight, bad_row->ascrise);
+  }
+  if (bad_row->pitch_decision == PITCH_CORR_FIXED) {
+    if (bad_row->fixed_pitch < textord_min_xheight) {
+      if (block_votes > 0)
+        bad_row->fixed_pitch = block_stats.ile (0.5);
+      else if (block_votes == 0 && like_votes > 0)
+        bad_row->fixed_pitch = like_stats.ile (0.5);
+      else {
+        tprintf
+          ("Warning:guessing pitch as xheight on row %d, block %d\n",
+          row_target, block_target);
+        bad_row->fixed_pitch = bad_row->xheight;
+      }
+    }
+    if (bad_row->fixed_pitch < textord_min_xheight)
+      bad_row->fixed_pitch = (float) textord_min_xheight;
+    bad_row->kern_size = bad_row->fixed_pitch / 4;
+    bad_row->min_space = static_cast<int32_t>(bad_row->fixed_pitch * 0.6);
+    bad_row->max_nonspace = static_cast<int32_t>(bad_row->fixed_pitch * 0.4);
+    bad_row->space_threshold =
+      (bad_row->min_space + bad_row->max_nonspace) / 2;
+    bad_row->space_size = bad_row->fixed_pitch;
+    if (bad_row->char_cells.empty() && !bad_row->blob_list()->empty()) {
+      tune_row_pitch (bad_row, &bad_row->projection,
+        bad_row->projection_left, bad_row->projection_right,
+        (bad_row->fixed_pitch +
+        bad_row->max_nonspace * 3) / 4, bad_row->fixed_pitch,
+        sp_sd, mid_cuts, &bad_row->char_cells, false);
+    }
+  }
+  else if (bad_row->pitch_decision == PITCH_CORR_PROP
+  || bad_row->pitch_decision == PITCH_DEF_PROP) {
+    bad_row->fixed_pitch = 0.0f;
+    bad_row->char_cells.clear ();
+  }
+}
+
+
+/**********************************************************************
+ * compute_block_pitch
+ *
+ * Decide whether each block is fixed pitch individually.
+ **********************************************************************/
+
+void compute_block_pitch(TO_BLOCK* block,     // input list
+                         FCOORD rotation,     // for drawing
+                         int32_t block_index,   // block number
+                         bool testing_on) {  // correct orientation
+   TBOX block_box;                 //bounding box
+
+  block_box = block->block->pdblk.bounding_box ();
+  if (testing_on && textord_debug_pitch_test) {
+    tprintf ("Block %d at (%d,%d)->(%d,%d)\n",
+      block_index,
+      block_box.left (), block_box.bottom (),
+      block_box.right (), block_box.top ());
+  }
+  block->min_space = static_cast<int32_t>(floor (block->xheight
+    * textord_words_default_minspace));
+  block->max_nonspace = static_cast<int32_t>(ceil (block->xheight
+    * textord_words_default_nonspace));
+  block->fixed_pitch = 0.0f;
+  block->space_size = static_cast<float>(block->min_space);
+  block->kern_size = static_cast<float>(block->max_nonspace);
+  block->pr_nonsp = block->xheight * words_default_prop_nonspace;
+  block->pr_space = block->pr_nonsp * textord_spacesize_ratioprop;
+  if (!block->get_rows ()->empty ()) {
+    ASSERT_HOST (block->xheight > 0);
+    find_repeated_chars(block, textord_show_initial_words && testing_on);
+#ifndef GRAPHICS_DISABLED
+    if (textord_show_initial_words && testing_on)
+      //overlap_picture_ops(true);
+      ScrollView::Update();
+#endif
+    compute_rows_pitch(block,
+                       block_index,
+                       textord_debug_pitch_test && testing_on);
+  }
+}
+
+
+/**********************************************************************
+ * compute_rows_pitch
+ *
+ * Decide whether each row is fixed pitch individually.
+ **********************************************************************/
+
+bool compute_rows_pitch(                    //find line stats
+        TO_BLOCK* block,    //block to do
+        int32_t block_index,  //block number
+        bool testing_on    //correct orientation
+) {
+  int32_t maxwidth;                //of spaces
+  TO_ROW *row;                   //current row
+  int32_t row_index;               //row number.
+  float lower, upper;            //cluster thresholds
+  TO_ROW_IT row_it = block->get_rows ();
+
+  row_index = 1;
+  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+    row = row_it.data ();
+    ASSERT_HOST (row->xheight > 0);
+    row->compute_vertical_projection ();
+    maxwidth = static_cast<int32_t>(ceil (row->xheight * textord_words_maxspace));
+    if (row_pitch_stats (row, maxwidth, testing_on)
+      && find_row_pitch (row, maxwidth,
+      textord_dotmatrix_gap + 1, block, block_index,
+    row_index, testing_on)) {
+      if (row->fixed_pitch == 0) {
+        lower = row->pr_nonsp;
+        upper = row->pr_space;
+        row->space_size = upper;
+        row->kern_size = lower;
+      }
+    }
+    else {
+      row->fixed_pitch = 0.0f;   //insufficient data
+      row->pitch_decision = PITCH_DUNNO;
+    }
+    row_index++;
+  }
+  return false;
+}
+
+
+/**********************************************************************
+ * try_doc_fixed
+ *
+ * Attempt to call the entire document fixed pitch.
+ **********************************************************************/
+
+bool try_doc_fixed(                             //determine pitch
+        ICOORD page_tr,              //top right
+        TO_BLOCK_LIST* port_blocks,  //input list
+        float gradient               //page skew
+) {
+  int16_t master_x;                //uniform shifts
+  int16_t pitch;                   //median pitch.
+  int x;                         //profile coord
+  int prop_blocks;               //correct counts
+  int fixed_blocks;
+  int total_row_count;           //total in page
+                                 //iterator
+  TO_BLOCK_IT block_it = port_blocks;
+  TO_BLOCK *block;               //current block;
+  TO_ROW *row;                   //current row
+  int16_t projection_left;         //edges
+  int16_t projection_right;
+  int16_t row_left;                //edges of row
+  int16_t row_right;
+  float master_y;                //uniform shifts
+  float shift_factor;            //page skew correction
+  float final_pitch;             //output pitch
+  float row_y;                   //baseline
+  STATS projection;              //entire page
+  STATS pitches (0, MAX_ALLOWED_PITCH);
+  //for median
+  float sp_sd;                   //space sd
+  int16_t mid_cuts;                //no of cheap cuts
+  float pitch_sd;                //sync rating
+
+  if (block_it.empty ()
+    //      || block_it.data()==block_it.data_relative(1)
+    || !textord_blockndoc_fixed)
+    return false;
+  shift_factor = gradient / (gradient * gradient + 1);
+  // row iterator
+  TO_ROW_IT row_it(block_it.data ()->get_rows());
+  master_x = row_it.data ()->projection_left;
+  master_y = row_it.data ()->baseline.y (master_x);
+  projection_left = INT16_MAX;
+  projection_right = -INT16_MAX;
+  prop_blocks = 0;
+  fixed_blocks = 0;
+  total_row_count = 0;
+
+  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
+  block_it.forward ()) {
+    block = block_it.data ();
+    row_it.set_to_list (block->get_rows ());
+    for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+      row = row_it.data ();
+      total_row_count++;
+      if (row->fixed_pitch > 0)
+        pitches.add (static_cast<int32_t>(row->fixed_pitch), 1);
+      //find median
+      row_y = row->baseline.y (master_x);
+      row_left =
+        static_cast<int16_t>(row->projection_left -
+        shift_factor * (master_y - row_y));
+      row_right =
+        static_cast<int16_t>(row->projection_right -
+        shift_factor * (master_y - row_y));
+      if (row_left < projection_left)
+        projection_left = row_left;
+      if (row_right > projection_right)
+        projection_right = row_right;
+    }
+  }
+  if (pitches.get_total () == 0)
+    return false;
+  projection.set_range (projection_left, projection_right);
+
+  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
+  block_it.forward ()) {
+    block = block_it.data ();
+    row_it.set_to_list (block->get_rows ());
+    for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+      row = row_it.data ();
+      row_y = row->baseline.y (master_x);
+      row_left =
+        static_cast<int16_t>(row->projection_left -
+        shift_factor * (master_y - row_y));
+      for (x = row->projection_left; x < row->projection_right;
+      x++, row_left++) {
+        projection.add (row_left, row->projection.pile_count (x));
+      }
+    }
+  }
+
+  row_it.set_to_list (block_it.data ()->get_rows ());
+  row = row_it.data ();
+#ifndef GRAPHICS_DISABLED
+  if (textord_show_page_cuts && to_win != nullptr)
+    projection.plot (to_win, projection_left,
+      row->intercept (), 1.0f, -1.0f, ScrollView::CORAL);
+#endif
+  final_pitch = pitches.ile (0.5);
+  pitch = static_cast<int16_t>(final_pitch);
+  pitch_sd =
+    tune_row_pitch (row, &projection, projection_left, projection_right,
+    pitch * 0.75, final_pitch, sp_sd, mid_cuts,
+    &row->char_cells, false);
+
+  if (textord_debug_pitch_metric)
+    tprintf
+      ("try_doc:props=%d:fixed=%d:pitch=%d:final_pitch=%g:pitch_sd=%g:sp_sd=%g:sd/trc=%g:sd/p=%g:sd/trc/p=%g\n",
+      prop_blocks, fixed_blocks, pitch, final_pitch, pitch_sd, sp_sd,
+      pitch_sd / total_row_count, pitch_sd / pitch,
+      pitch_sd / total_row_count / pitch);
+
+#ifndef GRAPHICS_DISABLED
+  if (textord_show_page_cuts && to_win != nullptr) {
+    float row_shift;               //shift for row
+    ICOORDELT_LIST *master_cells;  //cells for page
+    master_cells = &row->char_cells;
+    for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
+    block_it.forward ()) {
+      block = block_it.data ();
+      row_it.set_to_list (block->get_rows ());
+      for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
+           row_it.forward ()) {
+        row = row_it.data ();
+        row_y = row->baseline.y (master_x);
+        row_shift = shift_factor * (master_y - row_y);
+        plot_row_cells(to_win, ScrollView::GOLDENROD, row, row_shift, master_cells);
+      }
+    }
+  }
+#endif
+  row->char_cells.clear ();
+  return false;
+}
+
+
+/**********************************************************************
+ * try_block_fixed
+ *
+ * Try to call the entire block fixed.
+ **********************************************************************/
+
+bool try_block_fixed(                   //find line stats
+        TO_BLOCK* block,   //block to do
+        int32_t block_index  //block number
+) {
+  return false;
+}
+
+
+/**********************************************************************
+ * try_rows_fixed
+ *
+ * Decide whether each row is fixed pitch individually.
+ **********************************************************************/
+
+bool try_rows_fixed(                    //find line stats
+        TO_BLOCK* block,    //block to do
+        int32_t block_index,  //block number
+        bool testing_on    //correct orientation
+) {
+  TO_ROW *row;                   //current row
+  int32_t row_index;               //row number.
+  int32_t def_fixed = 0;           //counters
+  int32_t def_prop = 0;
+  int32_t maybe_fixed = 0;
+  int32_t maybe_prop = 0;
+  int32_t dunno = 0;
+  int32_t corr_fixed = 0;
+  int32_t corr_prop = 0;
+  float lower, upper;            //cluster thresholds
+  TO_ROW_IT row_it = block->get_rows ();
+
+  row_index = 1;
+  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+    row = row_it.data ();
+    ASSERT_HOST (row->xheight > 0);
+    if (row->fixed_pitch > 0 &&
+        fixed_pitch_row(row, block->block, block_index)) {
+      if (row->fixed_pitch == 0) {
+        lower = row->pr_nonsp;
+        upper = row->pr_space;
+        row->space_size = upper;
+        row->kern_size = lower;
+      }
+    }
+    row_index++;
+  }
+  count_block_votes(block,
+                    def_fixed,
+                    def_prop,
+                    maybe_fixed,
+                    maybe_prop,
+                    corr_fixed,
+                    corr_prop,
+                    dunno);
+  if (testing_on
+    && (textord_debug_pitch_test
+  || textord_blocksall_prop || textord_blocksall_fixed)) {
+    tprintf ("Initially:");
+    print_block_counts(block, block_index);
+  }
+  if (def_fixed > def_prop * textord_words_veto_power)
+    block->pitch_decision = PITCH_DEF_FIXED;
+  else if (def_prop > def_fixed * textord_words_veto_power)
+    block->pitch_decision = PITCH_DEF_PROP;
+  else if (def_fixed > 0 || def_prop > 0)
+    block->pitch_decision = PITCH_DUNNO;
+  else if (maybe_fixed > maybe_prop * textord_words_veto_power)
+    block->pitch_decision = PITCH_MAYBE_FIXED;
+  else if (maybe_prop > maybe_fixed * textord_words_veto_power)
+    block->pitch_decision = PITCH_MAYBE_PROP;
+  else
+    block->pitch_decision = PITCH_DUNNO;
+  return false;
+}
+
+
+/**********************************************************************
+ * print_block_counts
+ *
+ * Count up how many rows have what decision and print the results.
+ **********************************************************************/
+
+void print_block_counts(                   //find line stats
+                        TO_BLOCK *block,   //block to do
+                        int32_t block_index  //block number
+                       ) {
+  int32_t def_fixed = 0;           //counters
+  int32_t def_prop = 0;
+  int32_t maybe_fixed = 0;
+  int32_t maybe_prop = 0;
+  int32_t dunno = 0;
+  int32_t corr_fixed = 0;
+  int32_t corr_prop = 0;
+
+  count_block_votes(block,
+                    def_fixed,
+                    def_prop,
+                    maybe_fixed,
+                    maybe_prop,
+                    corr_fixed,
+                    corr_prop,
+                    dunno);
+  tprintf ("Block %d has (%d,%d,%d)",
+    block_index, def_fixed, maybe_fixed, corr_fixed);
+  if (textord_blocksall_prop && (def_fixed || maybe_fixed || corr_fixed))
+    tprintf (" (Wrongly)");
+  tprintf (" fixed, (%d,%d,%d)", def_prop, maybe_prop, corr_prop);
+  if (textord_blocksall_fixed && (def_prop || maybe_prop || corr_prop))
+    tprintf (" (Wrongly)");
+  tprintf (" prop, %d dunno\n", dunno);
+}
+
+
+/**********************************************************************
+ * count_block_votes
+ *
+ * Count the number of rows in the block with each kind of pitch_decision.
+ **********************************************************************/
+
+void count_block_votes(                   //find line stats
+                       TO_BLOCK *block,   //block to do
+                       int32_t &def_fixed,  //add to counts
+                       int32_t &def_prop,
+                       int32_t &maybe_fixed,
+                       int32_t &maybe_prop,
+                       int32_t &corr_fixed,
+                       int32_t &corr_prop,
+                       int32_t &dunno) {
+  TO_ROW *row;                   //current row
+  TO_ROW_IT row_it = block->get_rows ();
+
+  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+    row = row_it.data ();
+    switch (row->pitch_decision) {
+      case PITCH_DUNNO:
+        dunno++;
+        break;
+      case PITCH_DEF_PROP:
+        def_prop++;
+        break;
+      case PITCH_MAYBE_PROP:
+        maybe_prop++;
+        break;
+      case PITCH_DEF_FIXED:
+        def_fixed++;
+        break;
+      case PITCH_MAYBE_FIXED:
+        maybe_fixed++;
+        break;
+      case PITCH_CORR_PROP:
+        corr_prop++;
+        break;
+      case PITCH_CORR_FIXED:
+        corr_fixed++;
+        break;
+    }
+  }
+}
+
+
+/**********************************************************************
+ * row_pitch_stats
+ *
+ * Decide whether each row is fixed pitch individually.
+ **********************************************************************/
+
+bool row_pitch_stats(                  //find line stats
+        TO_ROW* row,      //current row
+        int32_t maxwidth,   //of spaces
+        bool testing_on  //correct orientation
+) {
+  BLOBNBOX *blob;                //current blob
+  int gap_index;                 //current gap
+  int32_t prev_x;                  //end of prev blob
+  int32_t cluster_count;           //no of clusters
+  int32_t prev_count;              //of clusters
+  int32_t smooth_factor;           //for smoothing stats
+  TBOX blob_box;                  //bounding box
+  float lower, upper;            //cluster thresholds
+                                 //gap sizes
+  float gaps[BLOCK_STATS_CLUSTERS];
+                                 //blobs
+  BLOBNBOX_IT blob_it = row->blob_list ();
+  STATS gap_stats (0, maxwidth);
+  STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1];
+  //clusters
+
+  smooth_factor =
+    static_cast<int32_t>(row->xheight * textord_wordstats_smooth_factor + 1.5);
+  if (!blob_it.empty ()) {
+    prev_x = blob_it.data ()->bounding_box ().right ();
+    blob_it.forward ();
+    while (!blob_it.at_first ()) {
+      blob = blob_it.data ();
+      if (!blob->joined_to_prev ()) {
+        blob_box = blob->bounding_box ();
+        if (blob_box.left () - prev_x < maxwidth)
+          gap_stats.add (blob_box.left () - prev_x, 1);
+        prev_x = blob_box.right ();
+      }
+      blob_it.forward ();
+    }
+  }
+  if (gap_stats.get_total () == 0) {
+    return false;
+  }
+  cluster_count = 0;
+  lower = row->xheight * words_initial_lower;
+  upper = row->xheight * words_initial_upper;
+  gap_stats.smooth (smooth_factor);
+  do {
+    prev_count = cluster_count;
+    cluster_count = gap_stats.cluster (lower, upper,
+      textord_spacesize_ratioprop,
+      BLOCK_STATS_CLUSTERS, cluster_stats);
+  }
+  while (cluster_count > prev_count && cluster_count < BLOCK_STATS_CLUSTERS);
+  if (cluster_count < 1) {
+    return false;
+  }
+  for (gap_index = 0; gap_index < cluster_count; gap_index++)
+    gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
+  //get medians
+  if (testing_on) {
+    tprintf ("cluster_count=%d:", cluster_count);
+    for (gap_index = 0; gap_index < cluster_count; gap_index++)
+      tprintf (" %g(%d)", gaps[gap_index],
+        cluster_stats[gap_index + 1].get_total ());
+    tprintf ("\n");
+  }
+  qsort (gaps, cluster_count, sizeof (float), sort_floats);
+
+  //Try to find proportional non-space and space for row.
+  lower = row->xheight * words_default_prop_nonspace;
+  upper = row->xheight * textord_words_min_minspace;
+  for (gap_index = 0; gap_index < cluster_count
+    && gaps[gap_index] < lower; gap_index++);
+  if (gap_index == 0) {
+    if (testing_on)
+      tprintf ("No clusters below nonspace threshold!!\n");
+    if (cluster_count > 1) {
+      row->pr_nonsp = gaps[0];
+      row->pr_space = gaps[1];
+    }
+    else {
+      row->pr_nonsp = lower;
+      row->pr_space = gaps[0];
+    }
+  }
+  else {
+    row->pr_nonsp = gaps[gap_index - 1];
+    while (gap_index < cluster_count && gaps[gap_index] < upper)
+      gap_index++;
+    if (gap_index == cluster_count) {
+      if (testing_on)
+        tprintf ("No clusters above nonspace threshold!!\n");
+      row->pr_space = lower * textord_spacesize_ratioprop;
+    }
+    else
+      row->pr_space = gaps[gap_index];
+  }
+
+  //Now try to find the fixed pitch space and non-space.
+  upper = row->xheight * words_default_fixed_space;
+  for (gap_index = 0; gap_index < cluster_count
+    && gaps[gap_index] < upper; gap_index++);
+  if (gap_index == 0) {
+    if (testing_on)
+      tprintf ("No clusters below space threshold!!\n");
+    row->fp_nonsp = upper;
+    row->fp_space = gaps[0];
+  }
+  else {
+    row->fp_nonsp = gaps[gap_index - 1];
+    if (gap_index == cluster_count) {
+      if (testing_on)
+        tprintf ("No clusters above space threshold!!\n");
+      row->fp_space = row->xheight;
+    }
+    else
+      row->fp_space = gaps[gap_index];
+  }
+  if (testing_on) {
+    tprintf
+      ("Initial estimates:pr_nonsp=%g, pr_space=%g, fp_nonsp=%g, fp_space=%g\n",
+      row->pr_nonsp, row->pr_space, row->fp_nonsp, row->fp_space);
+  }
+  return true;                   //computed some stats
+}
+
+
+/**********************************************************************
+ * find_row_pitch
+ *
+ * Check to see if this row could be fixed pitch using the given spacings.
+ * Blobs with gaps smaller than the lower threshold are assumed to be one.
+ * The larger threshold is the word gap threshold.
+ **********************************************************************/
+
+bool find_row_pitch(                    //find lines
+        TO_ROW* row,        //row to do
+        int32_t maxwidth,     //max permitted space
+        int32_t dm_gap,       //ignorable gaps
+        TO_BLOCK* block,    //block of row
+        int32_t block_index,  //block_number
+        int32_t row_index,    //number of row
+        bool testing_on    //correct orientation
+) {
+  bool used_dm_model;           //looks like dot matrix
+  float min_space;               //estimate threshold
+  float non_space;               //gap size
+  float gap_iqr;                 //interquartile range
+  float pitch_iqr;
+  float dm_gap_iqr;              //interquartile range
+  float dm_pitch_iqr;
+  float dm_pitch;                //pitch with dm on
+  float pitch;                   //revised estimate
+  float initial_pitch;           //guess at pitch
+  STATS gap_stats (0, maxwidth);
+                                 //centre-centre
+  STATS pitch_stats (0, maxwidth);
+
+  row->fixed_pitch = 0.0f;
+  initial_pitch = row->fp_space;
+  if (initial_pitch > row->xheight * (1 + words_default_fixed_limit))
+    initial_pitch = row->xheight;//keep pitch decent
+  non_space = row->fp_nonsp;
+  if (non_space > initial_pitch)
+    non_space = initial_pitch;
+  min_space = (initial_pitch + non_space) / 2;
+
+  if (!count_pitch_stats (row, &gap_stats, &pitch_stats,
+  initial_pitch, min_space, true, false, dm_gap)) {
+    dm_gap_iqr = 0.0001f;
+    dm_pitch_iqr = maxwidth * 2.0f;
+    dm_pitch = initial_pitch;
+  }
+  else {
+    dm_gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
+    dm_pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
+    dm_pitch = pitch_stats.ile (0.5);
+  }
+  gap_stats.clear ();
+  pitch_stats.clear ();
+  if (!count_pitch_stats (row, &gap_stats, &pitch_stats,
+  initial_pitch, min_space, true, false, 0)) {
+    gap_iqr = 0.0001f;
+    pitch_iqr = maxwidth * 3.0f;
+  }
+  else {
+    gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
+    pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
+    if (testing_on)
+      tprintf
+        ("First fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, pitch=%g\n",
+        initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile (0.5));
+    initial_pitch = pitch_stats.ile (0.5);
+    if (min_space > initial_pitch
+      && count_pitch_stats (row, &gap_stats, &pitch_stats,
+    initial_pitch, initial_pitch, true, false, 0)) {
+      min_space = initial_pitch;
+      gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
+      pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
+      if (testing_on)
+        tprintf
+          ("Revised fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, pitch=%g\n",
+          initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile (0.5));
+      initial_pitch = pitch_stats.ile (0.5);
+    }
+  }
+  if (textord_debug_pitch_metric)
+    tprintf("Blk=%d:Row=%d:%c:p_iqr=%g:g_iqr=%g:dm_p_iqr=%g:dm_g_iqr=%g:%c:",
+            block_index, row_index, 'X',
+            pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr,
+            pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth ? 'D' :
+              (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr ? 'S' : 'M'));
+  if (pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth) {
+    row->pitch_decision = PITCH_DUNNO;
+    if (textord_debug_pitch_metric)
+      tprintf ("\n");
+    return false;                //insufficient data
+  }
+  if (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr) {
+    if (testing_on)
+      tprintf
+        ("Choosing non dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, dm_gap_iqr=%g\n",
+        pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr);
+    gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
+    pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
+    pitch = pitch_stats.ile (0.5);
+    used_dm_model = false;
+  }
+  else {
+    if (testing_on)
+      tprintf
+        ("Choosing dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, dm_gap_iqr=%g\n",
+        pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr);
+    gap_iqr = dm_gap_iqr;
+    pitch_iqr = dm_pitch_iqr;
+    pitch = dm_pitch;
+    used_dm_model = true;
+  }
+  if (textord_debug_pitch_metric) {
+    tprintf ("rev_p_iqr=%g:rev_g_iqr=%g:pitch=%g:",
+      pitch_iqr, gap_iqr, pitch);
+    tprintf ("p_iqr/g=%g:p_iqr/x=%g:iqr_res=%c:",
+      pitch_iqr / gap_iqr, pitch_iqr / block->xheight,
+      pitch_iqr < gap_iqr * textord_fpiqr_ratio
+      && pitch_iqr < block->xheight * textord_max_pitch_iqr
+      && pitch < block->xheight * textord_words_default_maxspace
+      ? 'F' : 'P');
+  }
+  if (pitch_iqr < gap_iqr * textord_fpiqr_ratio
+    && pitch_iqr < block->xheight * textord_max_pitch_iqr
+    && pitch < block->xheight * textord_words_default_maxspace)
+    row->pitch_decision = PITCH_MAYBE_FIXED;
+  else
+    row->pitch_decision = PITCH_MAYBE_PROP;
+  row->fixed_pitch = pitch;
+  row->kern_size = gap_stats.ile (0.5);
+  row->min_space = static_cast<int32_t>(row->fixed_pitch + non_space) / 2;
+  if (row->min_space > row->fixed_pitch)
+    row->min_space = static_cast<int32_t>(row->fixed_pitch);
+  row->max_nonspace = row->min_space;
+  row->space_size = row->fixed_pitch;
+  row->space_threshold = (row->max_nonspace + row->min_space) / 2;
+  row->used_dm_model = used_dm_model;
+  return true;
+}
+
+
+/**********************************************************************
+ * fixed_pitch_row
+ *
+ * Check to see if this row could be fixed pitch using the given spacings.
+ * Blobs with gaps smaller than the lower threshold are assumed to be one.
+ * The larger threshold is the word gap threshold.
+ **********************************************************************/
+
+bool fixed_pitch_row(TO_ROW* row,       // row to do
+                     BLOCK* block,
+                     int32_t block_index  // block_number
+) {
+  const char *res_string;        // pitch result
+  int16_t mid_cuts;                // no of cheap cuts
+  float non_space;               // gap size
+  float pitch_sd;                // error on pitch
+  float sp_sd = 0.0f;            // space sd
+
+  non_space = row->fp_nonsp;
+  if (non_space > row->fixed_pitch)
+    non_space = row->fixed_pitch;
+  POLY_BLOCK* pb = block != nullptr ? block->pdblk.poly_block() : nullptr;
+  if (textord_all_prop || (pb != nullptr && !pb->IsText())) {
+    // Set the decision to definitely proportional.
+    pitch_sd = textord_words_def_prop * row->fixed_pitch;
+    row->pitch_decision = PITCH_DEF_PROP;
+  } else {
+    pitch_sd = tune_row_pitch (row, &row->projection, row->projection_left,
+                               row->projection_right,
+                               (row->fixed_pitch + non_space * 3) / 4,
+                               row->fixed_pitch, sp_sd, mid_cuts,
+                               &row->char_cells,
+                               block_index == textord_debug_block);
+    if (pitch_sd < textord_words_pitchsd_threshold * row->fixed_pitch
+      && ((pitsync_linear_version & 3) < 3
+      || ((pitsync_linear_version & 3) >= 3 && (row->used_dm_model
+      || sp_sd > 20
+    || (pitch_sd == 0 && sp_sd > 10))))) {
+      if (pitch_sd < textord_words_def_fixed * row->fixed_pitch
+        && !row->all_caps
+        && ((pitsync_linear_version & 3) < 3 || sp_sd > 20))
+        row->pitch_decision = PITCH_DEF_FIXED;
+      else
+        row->pitch_decision = PITCH_MAYBE_FIXED;
+    }
+    else if ((pitsync_linear_version & 3) < 3
+      || sp_sd > 20
+      || mid_cuts > 0
+      || pitch_sd >= textord_words_pitchsd_threshold * row->fixed_pitch) {
+      if (pitch_sd < textord_words_def_prop * row->fixed_pitch)
+        row->pitch_decision = PITCH_MAYBE_PROP;
+      else
+        row->pitch_decision = PITCH_DEF_PROP;
+    }
+    else
+      row->pitch_decision = PITCH_DUNNO;
+  }
+
+  if (textord_debug_pitch_metric) {
+    res_string = "??";
+    switch (row->pitch_decision) {
+      case PITCH_DEF_PROP:
+        res_string = "DP";
+        break;
+      case PITCH_MAYBE_PROP:
+        res_string = "MP";
+        break;
+      case PITCH_DEF_FIXED:
+        res_string = "DF";
+        break;
+      case PITCH_MAYBE_FIXED:
+        res_string = "MF";
+        break;
+      default:
+        res_string = "??";
+    }
+    tprintf (":sd/p=%g:occ=%g:init_res=%s\n",
+      pitch_sd / row->fixed_pitch, sp_sd, res_string);
+  }
+  return true;
+}
+
+
+/**********************************************************************
+ * count_pitch_stats
+ *
+ * Count up the gap and pitch stats on the block to see if it is fixed pitch.
+ * Blobs with gaps smaller than the lower threshold are assumed to be one.
+ * The larger threshold is the word gap threshold.
+ * The return value indicates whether there were any decent values to use.
+ **********************************************************************/
+
+bool count_pitch_stats(                       //find lines
+        TO_ROW* row,           //row to do
+        STATS* gap_stats,      //blob gaps
+        STATS* pitch_stats,    //centre-centre stats
+        float initial_pitch,   //guess at pitch
+        float min_space,       //estimate space size
+        bool ignore_outsize,  //discard big objects
+        bool split_outsize,   //split big objects
+        int32_t dm_gap           //ignorable gaps
+) {
+  bool prev_valid;              //not word broken
+  BLOBNBOX *blob;                //current blob
+                                 //blobs
+  BLOBNBOX_IT blob_it = row->blob_list ();
+  int32_t prev_right;              //end of prev blob
+  int32_t prev_centre;             //centre of previous blob
+  int32_t x_centre;                //centre of this blob
+  int32_t blob_width;              //width of blob
+  int32_t width_units;             //no of widths in blob
+  float width;                   //blob width
+  TBOX blob_box;                  //bounding box
+  TBOX joined_box;                //of super blob
+
+  gap_stats->clear ();
+  pitch_stats->clear ();
+  if (blob_it.empty ())
+    return false;
+  prev_valid = false;
+  prev_centre = 0;
+  prev_right = 0;  // stop compiler warning
+  joined_box = blob_it.data ()->bounding_box ();
+  do {
+    blob_it.forward ();
+    blob = blob_it.data ();
+    if (!blob->joined_to_prev ()) {
+      blob_box = blob->bounding_box ();
+      if ((blob_box.left () - joined_box.right () < dm_gap
+        && !blob_it.at_first ())
+        || blob->cblob() == nullptr)
+        joined_box += blob_box;  //merge blobs
+      else {
+        blob_width = joined_box.width ();
+        if (split_outsize) {
+          width_units =
+            static_cast<int32_t>(floor (static_cast<float>(blob_width) / initial_pitch + 0.5));
+          if (width_units < 1)
+            width_units = 1;
+          width_units--;
+        }
+        else if (ignore_outsize) {
+          width = static_cast<float>(blob_width) / initial_pitch;
+          width_units = width < 1 + words_default_fixed_limit
+            && width > 1 - words_default_fixed_limit ? 0 : -1;
+        }
+        else
+          width_units = 0;       //everything in
+        x_centre = static_cast<int32_t>(joined_box.left ()
+          + (blob_width -
+          width_units * initial_pitch) / 2);
+        if (prev_valid && width_units >= 0) {
+          //                                              if (width_units>0)
+          //                                              {
+          //                                                      tprintf("wu=%d, width=%d, xc=%d, adding %d\n",
+          //                                                              width_units,blob_width,x_centre,x_centre-prev_centre);
+          //                                              }
+          gap_stats->add (joined_box.left () - prev_right, 1);
+          pitch_stats->add (x_centre - prev_centre, 1);
+        }
+        prev_centre = static_cast<int32_t>(x_centre + width_units * initial_pitch);
+        prev_right = joined_box.right ();
+        prev_valid = blob_box.left () - joined_box.right () < min_space;
+        prev_valid = prev_valid && width_units >= 0;
+        joined_box = blob_box;
+      }
+    }
+  }
+  while (!blob_it.at_first ());
+  return gap_stats->get_total () >= 3;
+}
+
+
+/**********************************************************************
+ * tune_row_pitch
+ *
+ * Use a dp algorithm to fit the character cells and return the sd of
+ * the cell size over the row.
+ **********************************************************************/
+
+float tune_row_pitch(                             //find fp cells
+        TO_ROW* row,                 //row to do
+        STATS* projection,           //vertical projection
+        int16_t projection_left,       //edge of projection
+        int16_t projection_right,      //edge of projection
+        float space_size,            //size of blank
+        float& initial_pitch,        //guess at pitch
+        float& best_sp_sd,           //space sd
+        int16_t& best_mid_cuts,        //no of cheap cuts
+        ICOORDELT_LIST* best_cells,  //row cells
+        bool testing_on             //inidividual words
+) {
+  int pitch_delta;               //offset pitch
+  int16_t mid_cuts;                //cheap cuts
+  float pitch_sd;                //current sd
+  float best_sd;                 //best result
+  float best_pitch;              //pitch for best result
+  float initial_sd;              //starting error
+  float sp_sd;                   //space sd
+  ICOORDELT_LIST test_cells;     //row cells
+  ICOORDELT_IT best_it;          //start of best list
+
+  if (textord_fast_pitch_test)
+    return tune_row_pitch2 (row, projection, projection_left,
+      projection_right, space_size, initial_pitch,
+      best_sp_sd,
+    //space sd
+      best_mid_cuts, best_cells, testing_on);
+  if (textord_disable_pitch_test) {
+    best_sp_sd = initial_pitch;
+    return initial_pitch;
+  }
+  initial_sd =
+    compute_pitch_sd(row,
+                     projection,
+                     projection_left,
+                     projection_right,
+                     space_size,
+                     initial_pitch,
+                     best_sp_sd,
+                     best_mid_cuts,
+                     best_cells,
+                     testing_on);
+  best_sd = initial_sd;
+  best_pitch = initial_pitch;
+  if (testing_on)
+    tprintf ("tune_row_pitch:start pitch=%g, sd=%g\n", best_pitch, best_sd);
+  for (pitch_delta = 1; pitch_delta <= textord_pitch_range; pitch_delta++) {
+    pitch_sd =
+      compute_pitch_sd (row, projection, projection_left, projection_right,
+      space_size, initial_pitch + pitch_delta, sp_sd,
+      mid_cuts, &test_cells, testing_on);
+    if (testing_on)
+      tprintf ("testing pitch at %g, sd=%g\n", initial_pitch + pitch_delta,
+        pitch_sd);
+    if (pitch_sd < best_sd) {
+      best_sd = pitch_sd;
+      best_mid_cuts = mid_cuts;
+      best_sp_sd = sp_sd;
+      best_pitch = initial_pitch + pitch_delta;
+      best_cells->clear ();
+      best_it.set_to_list (best_cells);
+      best_it.add_list_after (&test_cells);
+    }
+    else
+      test_cells.clear ();
+    if (pitch_sd > initial_sd)
+      break;                     //getting worse
+  }
+  for (pitch_delta = 1; pitch_delta <= textord_pitch_range; pitch_delta++) {
+    pitch_sd =
+      compute_pitch_sd (row, projection, projection_left, projection_right,
+      space_size, initial_pitch - pitch_delta, sp_sd,
+      mid_cuts, &test_cells, testing_on);
+    if (testing_on)
+      tprintf ("testing pitch at %g, sd=%g\n", initial_pitch - pitch_delta,
+        pitch_sd);
+    if (pitch_sd < best_sd) {
+      best_sd = pitch_sd;
+      best_mid_cuts = mid_cuts;
+      best_sp_sd = sp_sd;
+      best_pitch = initial_pitch - pitch_delta;
+      best_cells->clear ();
+      best_it.set_to_list (best_cells);
+      best_it.add_list_after (&test_cells);
+    }
+    else
+      test_cells.clear ();
+    if (pitch_sd > initial_sd)
+      break;
+  }
+  initial_pitch = best_pitch;
+
+  if (textord_debug_pitch_metric)
+    print_pitch_sd(row,
+                   projection,
+                   projection_left,
+                   projection_right,
+                   space_size,
+                   best_pitch);
+
+  return best_sd;
+}
+
+
+/**********************************************************************
+ * tune_row_pitch
+ *
+ * Use a dp algorithm to fit the character cells and return the sd of
+ * the cell size over the row.
+ **********************************************************************/
+
+float tune_row_pitch2(                             //find fp cells
+        TO_ROW* row,                 //row to do
+        STATS* projection,           //vertical projection
+        int16_t projection_left,       //edge of projection
+        int16_t projection_right,      //edge of projection
+        float space_size,            //size of blank
+        float& initial_pitch,        //guess at pitch
+        float& best_sp_sd,           //space sd
+        int16_t& best_mid_cuts,        //no of cheap cuts
+        ICOORDELT_LIST* best_cells,  //row cells
+        bool testing_on             //inidividual words
+) {
+  int pitch_delta;               //offset pitch
+  int16_t pixel;                   //pixel coord
+  int16_t best_pixel;              //pixel coord
+  int16_t best_delta;              //best pitch
+  int16_t best_pitch;              //best pitch
+  int16_t start;                   //of good range
+  int16_t end;                     //of good range
+  int32_t best_count;              //lowest sum
+  float best_sd;                 //best result
+
+  best_sp_sd = initial_pitch;
+
+  best_pitch = static_cast<int>(initial_pitch);
+  if (textord_disable_pitch_test || best_pitch <= textord_pitch_range) {
+    return initial_pitch;
+  }
+  std::unique_ptr<STATS[]> sum_proj(new STATS[textord_pitch_range * 2 + 1]); //summed projection
+
+  for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range;
+    pitch_delta++)
+  sum_proj[textord_pitch_range + pitch_delta].set_range (0,
+      best_pitch +
+      pitch_delta + 1);
+  for (pixel = projection_left; pixel <= projection_right; pixel++) {
+    for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range;
+         pitch_delta++) {
+      sum_proj[textord_pitch_range + pitch_delta].add(
+          (pixel - projection_left) % (best_pitch + pitch_delta),
+          projection->pile_count(pixel));
+    }
+  }
+  best_count = sum_proj[textord_pitch_range].pile_count (0);
+  best_delta = 0;
+  best_pixel = 0;
+  for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range;
+  pitch_delta++) {
+    for (pixel = 0; pixel < best_pitch + pitch_delta; pixel++) {
+      if (sum_proj[textord_pitch_range + pitch_delta].pile_count (pixel)
+      < best_count) {
+        best_count =
+          sum_proj[textord_pitch_range +
+          pitch_delta].pile_count (pixel);
+        best_delta = pitch_delta;
+        best_pixel = pixel;
+      }
+    }
+  }
+  if (testing_on)
+    tprintf ("tune_row_pitch:start pitch=%g, best_delta=%d, count=%d\n",
+      initial_pitch, best_delta, best_count);
+  best_pitch += best_delta;
+  initial_pitch = best_pitch;
+  best_count++;
+  best_count += best_count;
+  for (start = best_pixel - 2; start > best_pixel - best_pitch
+    && sum_proj[textord_pitch_range +
+    best_delta].pile_count (start % best_pitch) <= best_count;
+    start--);
+  for (end = best_pixel + 2;
+    end < best_pixel + best_pitch
+    && sum_proj[textord_pitch_range +
+    best_delta].pile_count (end % best_pitch) <= best_count;
+    end++);
+
+  best_sd =
+    compute_pitch_sd(row,
+                     projection,
+                     projection_left,
+                     projection_right,
+                     space_size,
+                     initial_pitch,
+                     best_sp_sd,
+                     best_mid_cuts,
+                     best_cells,
+                     testing_on,
+                     start,
+                     end);
+  if (testing_on)
+    tprintf ("tune_row_pitch:output pitch=%g, sd=%g\n", initial_pitch,
+      best_sd);
+
+  if (textord_debug_pitch_metric)
+    print_pitch_sd(row,
+                   projection,
+                   projection_left,
+                   projection_right,
+                   space_size,
+                   initial_pitch);
+
+  return best_sd;
+}
+
+
+/**********************************************************************
+ * compute_pitch_sd
+ *
+ * Use a dp algorithm to fit the character cells and return the sd of
+ * the cell size over the row.
+ **********************************************************************/
+
+float compute_pitch_sd(                            //find fp cells
+        TO_ROW* row,                //row to do
+        STATS* projection,          //vertical projection
+        int16_t projection_left,      //edge
+        int16_t projection_right,     //edge
+        float space_size,           //size of blank
+        float initial_pitch,        //guess at pitch
+        float& sp_sd,               //space sd
+        int16_t& mid_cuts,            //no of free cuts
+        ICOORDELT_LIST* row_cells,  //list of chop pts
+        bool testing_on,           //inidividual words
+        int16_t start,                //start of good range
+        int16_t end                   //end of good range
+) {
+  int16_t occupation;              //no of cells in word.
+                                 //blobs
+  BLOBNBOX_IT blob_it = row->blob_list ();
+  BLOBNBOX_IT start_it;          //start of word
+  BLOBNBOX_IT plot_it;           //for plotting
+  int16_t blob_count;              //no of blobs
+  TBOX blob_box;                  //bounding box
+  TBOX prev_box;                  //of super blob
+  int32_t prev_right;              //of word sync
+  int scale_factor;              //on scores for big words
+  int32_t sp_count;                //spaces
+  FPSEGPT_LIST seg_list;         //char cells
+  FPSEGPT_IT seg_it;             //iterator
+  int16_t segpos;                  //position of segment
+  int16_t cellpos;                 //previous cell boundary
+                                 //iterator
+  ICOORDELT_IT cell_it = row_cells;
+  ICOORDELT *cell;               //new cell
+  double sqsum;                  //sum of squares
+  double spsum;                  //of spaces
+  double sp_var;                 //space error
+  double word_sync;              //result for word
+  int32_t total_count;             //total blobs
+
+  if ((pitsync_linear_version & 3) > 1) {
+    word_sync = compute_pitch_sd2 (row, projection, projection_left,
+      projection_right, initial_pitch,
+      occupation, mid_cuts, row_cells,
+      testing_on, start, end);
+    sp_sd = occupation;
+    return word_sync;
+  }
+  mid_cuts = 0;
+  cellpos = 0;
+  total_count = 0;
+  sqsum = 0;
+  sp_count = 0;
+  spsum = 0;
+  prev_right = -1;
+  if (blob_it.empty ())
+    return space_size * 10;
+#ifndef GRAPHICS_DISABLED
+  if (testing_on && to_win != nullptr) {
+    blob_box = blob_it.data ()->bounding_box ();
+    projection->plot (to_win, projection_left,
+      row->intercept (), 1.0f, -1.0f, ScrollView::CORAL);
+  }
+#endif
+  start_it = blob_it;
+  blob_count = 0;
+  blob_box = box_next (&blob_it);//first blob
+  blob_it.mark_cycle_pt ();
+  do {
+    for (; blob_count > 0; blob_count--)
+      box_next(&start_it);
+    do {
+      prev_box = blob_box;
+      blob_count++;
+      blob_box = box_next (&blob_it);
+    }
+    while (!blob_it.cycled_list ()
+      && blob_box.left () - prev_box.right () < space_size);
+    plot_it = start_it;
+    if (pitsync_linear_version & 3)
+      word_sync =
+        check_pitch_sync2 (&start_it, blob_count, static_cast<int16_t>(initial_pitch), 2,
+        projection, projection_left, projection_right,
+        row->xheight * textord_projection_scale,
+        occupation, &seg_list, start, end);
+    else
+      word_sync =
+        check_pitch_sync (&start_it, blob_count, static_cast<int16_t>(initial_pitch), 2,
+        projection, &seg_list);
+    if (testing_on) {
+      tprintf ("Word ending at (%d,%d), len=%d, sync rating=%g, ",
+        prev_box.right (), prev_box.top (),
+        seg_list.length () - 1, word_sync);
+      seg_it.set_to_list (&seg_list);
+      for (seg_it.mark_cycle_pt (); !seg_it.cycled_list ();
+      seg_it.forward ()) {
+        if (seg_it.data ()->faked)
+          tprintf ("(F)");
+        tprintf ("%d, ", seg_it.data ()->position ());
+        //                              tprintf("C=%g, s=%g, sq=%g\n",
+        //                                      seg_it.data()->cost_function(),
+        //                                      seg_it.data()->sum(),
+        //                                      seg_it.data()->squares());
+      }
+      tprintf ("\n");
+    }
+#ifndef GRAPHICS_DISABLED
+    if (textord_show_fixed_cuts && blob_count > 0 && to_win != nullptr)
+      plot_fp_cells2(to_win, ScrollView::GOLDENROD, row, &seg_list);
+#endif
+    seg_it.set_to_list (&seg_list);
+    if (prev_right >= 0) {
+      sp_var = seg_it.data ()->position () - prev_right;
+      sp_var -= floor (sp_var / initial_pitch + 0.5) * initial_pitch;
+      sp_var *= sp_var;
+      spsum += sp_var;
+      sp_count++;
+    }
+    for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
+      segpos = seg_it.data ()->position ();
+      if (cell_it.empty () || segpos > cellpos + initial_pitch / 2) {
+                                 //big gap
+        while (!cell_it.empty () && segpos > cellpos + initial_pitch * 3 / 2) {
+          cell = new ICOORDELT (cellpos + static_cast<int16_t>(initial_pitch), 0);
+          cell_it.add_after_then_move (cell);
+          cellpos += static_cast<int16_t>(initial_pitch);
+        }
+                                 //make new one
+        cell = new ICOORDELT (segpos, 0);
+        cell_it.add_after_then_move (cell);
+        cellpos = segpos;
+      }
+      else if (segpos > cellpos - initial_pitch / 2) {
+        cell = cell_it.data ();
+                                 //average positions
+        cell->set_x ((cellpos + segpos) / 2);
+        cellpos = cell->x ();
+      }
+    }
+    seg_it.move_to_last ();
+    prev_right = seg_it.data ()->position ();
+    if (textord_pitch_scalebigwords) {
+      scale_factor = (seg_list.length () - 2) / 2;
+      if (scale_factor < 1)
+        scale_factor = 1;
+    }
+    else
+      scale_factor = 1;
+    sqsum += word_sync * scale_factor;
+    total_count += (seg_list.length () - 1) * scale_factor;
+    seg_list.clear ();
+  }
+  while (!blob_it.cycled_list ());
+  sp_sd = sp_count > 0 ? sqrt (spsum / sp_count) : 0;
+  return total_count > 0 ? sqrt (sqsum / total_count) : space_size * 10;
+}
+
+
+/**********************************************************************
+ * compute_pitch_sd2
+ *
+ * Use a dp algorithm to fit the character cells and return the sd of
+ * the cell size over the row.
+ **********************************************************************/
+
+float compute_pitch_sd2(                            //find fp cells
+        TO_ROW* row,                //row to do
+        STATS* projection,          //vertical projection
+        int16_t projection_left,      //edge
+        int16_t projection_right,     //edge
+        float initial_pitch,        //guess at pitch
+        int16_t& occupation,          //no of occupied cells
+        int16_t& mid_cuts,            //no of free cuts
+        ICOORDELT_LIST* row_cells,  //list of chop pts
+        bool testing_on,           //inidividual words
+        int16_t start,                //start of good range
+        int16_t end                   //end of good range
+) {
+                                 //blobs
+  BLOBNBOX_IT blob_it = row->blob_list ();
+  BLOBNBOX_IT plot_it;
+  int16_t blob_count;              //no of blobs
+  TBOX blob_box;                  //bounding box
+  FPSEGPT_LIST seg_list;         //char cells
+  FPSEGPT_IT seg_it;             //iterator
+  int16_t segpos;                  //position of segment
+                                 //iterator
+  ICOORDELT_IT cell_it = row_cells;
+  ICOORDELT *cell;               //new cell
+  double word_sync;              //result for word
+
+  mid_cuts = 0;
+  if (blob_it.empty ()) {
+    occupation = 0;
+    return initial_pitch * 10;
+  }
+#ifndef GRAPHICS_DISABLED
+  if (testing_on && to_win != nullptr) {
+    projection->plot (to_win, projection_left,
+      row->intercept (), 1.0f, -1.0f, ScrollView::CORAL);
+  }
+#endif
+  blob_count = 0;
+  blob_it.mark_cycle_pt ();
+  do {
+                                 //first blob
+    blob_box = box_next (&blob_it);
+    blob_count++;
+  }
+  while (!blob_it.cycled_list ());
+  plot_it = blob_it;
+  word_sync = check_pitch_sync2 (&blob_it, blob_count, static_cast<int16_t>(initial_pitch),
+    2, projection, projection_left,
+    projection_right,
+    row->xheight * textord_projection_scale,
+    occupation, &seg_list, start, end);
+  if (testing_on) {
+    tprintf ("Row ending at (%d,%d), len=%d, sync rating=%g, ",
+      blob_box.right (), blob_box.top (),
+      seg_list.length () - 1, word_sync);
+    seg_it.set_to_list (&seg_list);
+    for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
+      if (seg_it.data ()->faked)
+        tprintf ("(F)");
+      tprintf ("%d, ", seg_it.data ()->position ());
+      //                              tprintf("C=%g, s=%g, sq=%g\n",
+      //                                      seg_it.data()->cost_function(),
+      //                                      seg_it.data()->sum(),
+      //                                      seg_it.data()->squares());
+    }
+    tprintf ("\n");
+  }
+#ifndef GRAPHICS_DISABLED
+  if (textord_show_fixed_cuts && blob_count > 0 && to_win != nullptr)
+    plot_fp_cells2(to_win, ScrollView::GOLDENROD, row, &seg_list);
+#endif
+  seg_it.set_to_list (&seg_list);
+  for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
+    segpos = seg_it.data ()->position ();
+                                 //make new one
+    cell = new ICOORDELT (segpos, 0);
+    cell_it.add_after_then_move (cell);
+    if (seg_it.at_last ())
+      mid_cuts = seg_it.data ()->cheap_cuts ();
+  }
+  seg_list.clear ();
+  return occupation > 0 ? sqrt (word_sync / occupation) : initial_pitch * 10;
+}
+
+
+/**********************************************************************
+ * print_pitch_sd
+ *
+ * Use a dp algorithm to fit the character cells and return the sd of
+ * the cell size over the row.
+ **********************************************************************/
+
+void print_pitch_sd(                        //find fp cells
+                    TO_ROW *row,            //row to do
+                    STATS *projection,      //vertical projection
+                    int16_t projection_left,  //edges //size of blank
+                    int16_t projection_right,
+                    float space_size,
+                    float initial_pitch     //guess at pitch
+                   ) {
+  const char *res2;              //pitch result
+  int16_t occupation;              //used cells
+  float sp_sd;                   //space sd
+                                 //blobs
+  BLOBNBOX_IT blob_it = row->blob_list ();
+  BLOBNBOX_IT start_it;          //start of word
+  BLOBNBOX_IT row_start;         //start of row
+  int16_t blob_count;              //no of blobs
+  int16_t total_blob_count;        //total blobs in line
+  TBOX blob_box;                  //bounding box
+  TBOX prev_box;                  //of super blob
+  int32_t prev_right;              //of word sync
+  int scale_factor;              //on scores for big words
+  int32_t sp_count;                //spaces
+  FPSEGPT_LIST seg_list;         //char cells
+  FPSEGPT_IT seg_it;             //iterator
+  double sqsum;                  //sum of squares
+  double spsum;                  //of spaces
+  double sp_var;                 //space error
+  double word_sync;              //result for word
+  double total_count;            //total cuts
+
+  if (blob_it.empty ())
+    return;
+  row_start = blob_it;
+  total_blob_count = 0;
+
+  total_count = 0;
+  sqsum = 0;
+  sp_count = 0;
+  spsum = 0;
+  prev_right = -1;
+  blob_it = row_start;
+  start_it = blob_it;
+  blob_count = 0;
+  blob_box = box_next (&blob_it);//first blob
+  blob_it.mark_cycle_pt ();
+  do {
+    for (; blob_count > 0; blob_count--)
+      box_next(&start_it);
+    do {
+      prev_box = blob_box;
+      blob_count++;
+      blob_box = box_next (&blob_it);
+    }
+    while (!blob_it.cycled_list ()
+      && blob_box.left () - prev_box.right () < space_size);
+    word_sync =
+      check_pitch_sync2 (&start_it, blob_count, static_cast<int16_t>(initial_pitch), 2,
+      projection, projection_left, projection_right,
+      row->xheight * textord_projection_scale,
+      occupation, &seg_list, 0, 0);
+    total_blob_count += blob_count;
+    seg_it.set_to_list (&seg_list);
+    if (prev_right >= 0) {
+      sp_var = seg_it.data ()->position () - prev_right;
+      sp_var -= floor (sp_var / initial_pitch + 0.5) * initial_pitch;
+      sp_var *= sp_var;
+      spsum += sp_var;
+      sp_count++;
+    }
+    seg_it.move_to_last ();
+    prev_right = seg_it.data ()->position ();
+    if (textord_pitch_scalebigwords) {
+      scale_factor = (seg_list.length () - 2) / 2;
+      if (scale_factor < 1)
+        scale_factor = 1;
+    }
+    else
+      scale_factor = 1;
+    sqsum += word_sync * scale_factor;
+    total_count += (seg_list.length () - 1) * scale_factor;
+    seg_list.clear ();
+  }
+  while (!blob_it.cycled_list ());
+  sp_sd = sp_count > 0 ? sqrt (spsum / sp_count) : 0;
+  word_sync = total_count > 0 ? sqrt (sqsum / total_count) : space_size * 10;
+  tprintf ("new_sd=%g:sd/p=%g:new_sp_sd=%g:res=%c:",
+    word_sync, word_sync / initial_pitch, sp_sd,
+    word_sync < textord_words_pitchsd_threshold * initial_pitch
+    ? 'F' : 'P');
+
+  start_it = row_start;
+  blob_it = row_start;
+  word_sync =
+    check_pitch_sync2 (&blob_it, total_blob_count, static_cast<int16_t>(initial_pitch), 2,
+    projection, projection_left, projection_right,
+    row->xheight * textord_projection_scale, occupation,
+    &seg_list, 0, 0);
+  if (occupation > 1)
+    word_sync /= occupation;
+  word_sync = sqrt (word_sync);
+
+#ifndef GRAPHICS_DISABLED
+  if (textord_show_row_cuts && to_win != nullptr)
+    plot_fp_cells2(to_win, ScrollView::CORAL, row, &seg_list);
+#endif
+  seg_list.clear ();
+  if (word_sync < textord_words_pitchsd_threshold * initial_pitch) {
+    if (word_sync < textord_words_def_fixed * initial_pitch
+      && !row->all_caps)
+      res2 = "DF";
+    else
+      res2 = "MF";
+  }
+  else
+    res2 = word_sync < textord_words_def_prop * initial_pitch ? "MP" : "DP";
+  tprintf
+    ("row_sd=%g:sd/p=%g:res=%c:N=%d:res2=%s,init pitch=%g, row_pitch=%g, all_caps=%d\n",
+    word_sync, word_sync / initial_pitch,
+    word_sync < textord_words_pitchsd_threshold * initial_pitch ? 'F' : 'P',
+    occupation, res2, initial_pitch, row->fixed_pitch, row->all_caps);
+}
+
+/**********************************************************************
+ * find_repeated_chars
+ *
+ * Extract marked leader blobs and put them
+ * into words in advance of fixed pitch checking and word generation.
+ **********************************************************************/
+void find_repeated_chars(TO_BLOCK* block,       // Block to search.
+                         bool testing_on) {    // Debug mode.
+  POLY_BLOCK* pb = block->block->pdblk.poly_block();
+  if (pb != nullptr && !pb->IsText())
+    return;  // Don't find repeated chars in non-text blocks.
+
+  TO_ROW *row;
+  BLOBNBOX_IT box_it;
+  BLOBNBOX_IT search_it;         // forward search
+  WERD *word;                    // new word
+  TBOX word_box;                 // for plotting
+  int blobcount, repeated_set;
+
+  TO_ROW_IT row_it = block->get_rows();
+  if (row_it.empty()) return;  // empty block
+  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+    row = row_it.data();
+    box_it.set_to_list(row->blob_list());
+    if (box_it.empty())  continue; // no blobs in this row
+    if (!row->rep_chars_marked()) {
+      mark_repeated_chars(row);
+    }
+    if (row->num_repeated_sets() == 0) continue;  // nothing to do for this row
+    // new words
+    WERD_IT word_it(&row->rep_words);
+    do {
+      if (box_it.data()->repeated_set() != 0 &&
+          !box_it.data()->joined_to_prev()) {
+        blobcount = 1;
+        repeated_set = box_it.data()->repeated_set();
+        search_it = box_it;
+        search_it.forward();
+        while (!search_it.at_first() &&
+               search_it.data()->repeated_set() == repeated_set) {
+          blobcount++;
+          search_it.forward();
+        }
+        // After the call to make_real_word() all the blobs from this
+        // repeated set will be removed from the blob list. box_it will be
+        // set to point to the blob after the end of the extracted sequence.
+        word = make_real_word(&box_it, blobcount, box_it.at_first(), 1);
+        if (!box_it.empty() && box_it.data()->joined_to_prev()) {
+          tprintf("Bad box joined to prev at");
+          box_it.data()->bounding_box().print();
+          tprintf("After repeated word:");
+          word->bounding_box().print();
+        }
+        ASSERT_HOST(box_it.empty() || !box_it.data()->joined_to_prev());
+        word->set_flag(W_REP_CHAR, true);
+        word->set_flag(W_DONT_CHOP, true);
+        word_it.add_after_then_move(word);
+      } else {
+        box_it.forward();
+      }
+    } while (!box_it.at_first());
+  }
+}
+
+
+/**********************************************************************
+ * plot_fp_word
+ *
+ * Plot a block of words as if fixed pitch.
+ **********************************************************************/
+
+#ifndef GRAPHICS_DISABLED
+void plot_fp_word(                  //draw block of words
+                  TO_BLOCK *block,  //block to draw
+                  float pitch,      //pitch to draw with
+                  float nonspace    //for space threshold
+                 ) {
+  TO_ROW *row;                   //current row
+  TO_ROW_IT row_it = block->get_rows ();
+
+  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+    row = row_it.data ();
+    row->min_space = static_cast<int32_t>((pitch + nonspace) / 2);
+    row->max_nonspace = row->min_space;
+    row->space_threshold = row->min_space;
+    plot_word_decisions (to_win, static_cast<int16_t>(pitch), row);
+  }
+}
+#endif
+
+} // namespace tesseract
diff --git a/tesseract/src/textord/topitch.h b/tesseract/src/textord/topitch.h
new file mode 100644
index 00000000..39b239f2
--- /dev/null
+++ b/tesseract/src/textord/topitch.h
@@ -0,0 +1,191 @@
+/**********************************************************************
+ * File:        topitch.h  (Formerly to_pitch.h)
+ * Description: Code to determine fixed pitchness and the pitch if fixed.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1993, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef           TOPITCH_H
+#define           TOPITCH_H
+
+#include          "blobbox.h"
+
+namespace tesseract {
+
+class Tesseract;
+
+extern BOOL_VAR_H (textord_debug_pitch_test, false,
+"Debug on fixed pitch test");
+extern BOOL_VAR_H (textord_debug_pitch_metric, false,
+"Write full metric stuff");
+extern BOOL_VAR_H (textord_show_row_cuts, false, "Draw row-level cuts");
+extern BOOL_VAR_H (textord_show_page_cuts, false, "Draw page-level cuts");
+extern BOOL_VAR_H (textord_pitch_cheat, false,
+"Use correct answer for fixed/prop");
+extern BOOL_VAR_H (textord_blockndoc_fixed, true,
+"Attempt whole doc/block fixed pitch");
+extern BOOL_VAR_H (textord_fast_pitch_test, false,
+"Do even faster pitch algorithm");
+extern double_VAR_H (textord_projection_scale, 0.125,
+"Ding rate for mid-cuts");
+extern double_VAR_H (textord_balance_factor, 2.0,
+"Ding rate for unbalanced char cells");
+
+void compute_fixed_pitch(ICOORD page_tr,              // top right
+                         TO_BLOCK_LIST* port_blocks,  // input list
+                         float gradient,              // page skew
+                         FCOORD rotation,             // for drawing
+                         bool testing_on);           // correct orientation
+void fix_row_pitch(                        //get some value
+                   TO_ROW *bad_row,        //row to fix
+                   TO_BLOCK *bad_block,    //block of bad_row
+                   TO_BLOCK_LIST *blocks,  //blocks to scan
+                   int32_t row_target,       //number of row
+                   int32_t block_target      //number of block
+                  );
+void compute_block_pitch(TO_BLOCK* block,     // input list
+                         FCOORD rotation,      // for drawing
+                         int32_t block_index,    // block number
+                         bool testing_on);    // correct orientation
+bool compute_rows_pitch(                    //find line stats
+        TO_BLOCK* block,    //block to do
+        int32_t block_index,  //block number
+        bool testing_on    //correct orientation
+);
+bool try_doc_fixed(                             //determine pitch
+        ICOORD page_tr,              //top right
+        TO_BLOCK_LIST* port_blocks,  //input list
+        float gradient               //page skew
+);
+bool try_block_fixed(                   //find line stats
+        TO_BLOCK* block,   //block to do
+        int32_t block_index  //block number
+);
+bool try_rows_fixed(                    //find line stats
+        TO_BLOCK* block,    //block to do
+        int32_t block_index,  //block number
+        bool testing_on    //correct orientation
+);
+void print_block_counts(                   //find line stats
+                        TO_BLOCK *block,   //block to do
+                        int32_t block_index  //block number
+                       );
+void count_block_votes(                   //find line stats
+                       TO_BLOCK *block,   //block to do
+                       int32_t &def_fixed,  //add to counts
+                       int32_t &def_prop,
+                       int32_t &maybe_fixed,
+                       int32_t &maybe_prop,
+                       int32_t &corr_fixed,
+                       int32_t &corr_prop,
+                       int32_t &dunno);
+bool row_pitch_stats(                  //find line stats
+        TO_ROW* row,      //current row
+        int32_t maxwidth,   //of spaces
+        bool testing_on  //correct orientation
+);
+bool find_row_pitch(                    //find lines
+        TO_ROW* row,        //row to do
+        int32_t maxwidth,     //max permitted space
+        int32_t dm_gap,       //ignorable gaps
+        TO_BLOCK* block,    //block of row
+        int32_t block_index,  //block_number
+        int32_t row_index,    //number of row
+        bool testing_on    //correct orientation
+);
+bool fixed_pitch_row(                   //find lines
+        TO_ROW* row,       //row to do
+        BLOCK* block,
+        int32_t block_index  //block_number
+);
+bool count_pitch_stats(                       //find lines
+        TO_ROW* row,           //row to do
+        STATS* gap_stats,      //blob gaps
+        STATS* pitch_stats,    //centre-centre stats
+        float initial_pitch,   //guess at pitch
+        float min_space,       //estimate space size
+        bool ignore_outsize,  //discard big objects
+        bool split_outsize,   //split big objects
+        int32_t dm_gap           //ignorable gaps
+);
+float tune_row_pitch(                             //find fp cells
+        TO_ROW* row,                 //row to do
+        STATS* projection,           //vertical projection
+        int16_t projection_left,       //edge of projection
+        int16_t projection_right,      //edge of projection
+        float space_size,            //size of blank
+        float& initial_pitch,        //guess at pitch
+        float& best_sp_sd,           //space sd
+        int16_t& best_mid_cuts,        //no of cheap cuts
+        ICOORDELT_LIST* best_cells,  //row cells
+        bool testing_on             //inidividual words
+);
+float tune_row_pitch2(                             //find fp cells
+        TO_ROW* row,                 //row to do
+        STATS* projection,           //vertical projection
+        int16_t projection_left,       //edge of projection
+        int16_t projection_right,      //edge of projection
+        float space_size,            //size of blank
+        float& initial_pitch,        //guess at pitch
+        float& best_sp_sd,           //space sd
+        int16_t& best_mid_cuts,        //no of cheap cuts
+        ICOORDELT_LIST* best_cells,  //row cells
+        bool testing_on             //inidividual words
+);
+float compute_pitch_sd(         //find fp cells
+        TO_ROW* row,                    //row to do
+        STATS* projection,              //vertical projection
+        int16_t projection_left,           //edge
+        int16_t projection_right,          //edge
+        float space_size,                //size of blank
+        float initial_pitch,             //guess at pitch
+        float& sp_sd,                    //space sd
+        int16_t& mid_cuts,                //no of free cuts
+        ICOORDELT_LIST* row_cells,      //list of chop pts
+        bool testing_on,                //inidividual words
+        int16_t start = 0,                 //start of good range
+        int16_t end = 0                    //end of good range
+);
+float compute_pitch_sd2(        //find fp cells
+        TO_ROW* row,                    //row to do
+        STATS* projection,              //vertical projection
+        int16_t projection_left,           //edge
+        int16_t projection_right,          //edge
+        float initial_pitch,             //guess at pitch
+        int16_t& occupation,              //no of occupied cells
+        int16_t& mid_cuts,                //no of free cuts
+        ICOORDELT_LIST* row_cells,      //list of chop pts
+        bool testing_on,                //inidividual words
+        int16_t start = 0,                 //start of good range
+        int16_t end = 0                    //end of good range
+);
+void print_pitch_sd(                        //find fp cells
+                    TO_ROW *row,            //row to do
+                    STATS *projection,      //vertical projection
+                    int16_t projection_left,  //edges //size of blank
+                    int16_t projection_right,
+                    float space_size,
+                    float initial_pitch     //guess at pitch
+                   );
+void find_repeated_chars(TO_BLOCK* block,    // Block to search.
+                         bool testing_on);  // Debug mode.
+void plot_fp_word(                  //draw block of words
+                  TO_BLOCK *block,  //block to draw
+                  float pitch,      //pitch to draw with
+                  float nonspace    //for space threshold
+                 );
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/textord/tordmain.cpp b/tesseract/src/textord/tordmain.cpp
new file mode 100644
index 00000000..7f91b1ff
--- /dev/null
+++ b/tesseract/src/textord/tordmain.cpp
@@ -0,0 +1,994 @@
+/**********************************************************************
+ * File:        tordmain.cpp  (Formerly textordp.c)
+ * Description: C++ top level textord code.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#define _USE_MATH_DEFINES       // for M_PI
+
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "tordmain.h"
+
+#include "arrayaccess.h"        // for GET_DATA_BYTE
+#include "blobbox.h"            // for BLOBNBOX_IT, BLOBNBOX, TO_BLOCK, TO_B...
+#include "ccstruct.h"           // for CCStruct, CCStruct::kXHeightFraction
+#include "clst.h"               // for CLISTIZE
+#include "coutln.h"             // for C_OUTLINE_IT, C_OUTLINE_LIST, C_OUTLINE
+#include "drawtord.h"           // for plot_box_list, to_win, create_to_win
+#include "edgblob.h"            // for extract_edges
+#include "errcode.h"            // for ASSERT_HOST, ...
+#include "makerow.h"            // for textord_test_x, textord_test_y, texto...
+#include "ocrblock.h"           // for BLOCK_IT, BLOCK, BLOCK_LIST (ptr only)
+#include "ocrrow.h"             // for ROW, ROW_IT, ROW_LIST, tweak_row_base...
+#include "params.h"             // for DoubleParam, BoolParam, IntParam
+#include "pdblock.h"            // for PDBLK
+#include "points.h"             // for FCOORD, ICOORD
+#include "polyblk.h"            // for POLY_BLOCK
+#include "quadratc.h"           // for QUAD_COEFFS
+#include "quspline.h"           // for QSPLINE, tweak_row_baseline
+#include "rect.h"               // for TBOX
+#include "scrollview.h"         // for ScrollView, ScrollView::WHITE
+#include "statistc.h"           // for STATS
+#include "stepblob.h"           // for C_BLOB_IT, C_BLOB, C_BLOB_LIST
+#include "textord.h"            // for Textord, WordWithBox, WordGrid, WordS...
+#include "tprintf.h"            // for tprintf
+#include "werd.h"               // for WERD_IT, WERD, WERD_LIST, W_DONT_CHOP
+
+#include "genericvector.h"      // for PointerVector, GenericVector
+
+#include "allheaders.h"         // for pixDestroy, pixGetHeight, boxCreate
+
+#include <cfloat>               // for FLT_MAX
+#include <cmath>                // for ceil, floor, M_PI
+#include <cstdint>              // for INT16_MAX, uint32_t, int32_t, int16_t
+
+namespace tesseract {
+
+#define MAX_NEAREST_DIST  600    //for block skew stats
+
+CLISTIZE(WordWithBox)
+
+/**********************************************************************
+ * SetBlobStrokeWidth
+ *
+ * Set the horizontal and vertical stroke widths in the blob.
+ **********************************************************************/
+void SetBlobStrokeWidth(Pix* pix, BLOBNBOX* blob) {
+  // Cut the blob rectangle into a Pix.
+  int pix_height = pixGetHeight(pix);
+  const TBOX& box = blob->bounding_box();
+  int width = box.width();
+  int height = box.height();
+  Box* blob_pix_box = boxCreate(box.left(), pix_height - box.top(),
+                                width, height);
+  Pix* pix_blob = pixClipRectangle(pix, blob_pix_box, nullptr);
+  boxDestroy(&blob_pix_box);
+  Pix* dist_pix = pixDistanceFunction(pix_blob, 4, 8, L_BOUNDARY_BG);
+  pixDestroy(&pix_blob);
+  // Compute the stroke widths.
+  uint32_t* data = pixGetData(dist_pix);
+  int wpl = pixGetWpl(dist_pix);
+  // Horizontal width of stroke.
+  STATS h_stats(0, width + 1);
+  for (int y = 0; y < height; ++y) {
+    uint32_t* pixels = data + y*wpl;
+    int prev_pixel = 0;
+    int pixel = GET_DATA_BYTE(pixels, 0);
+    for (int x = 1; x < width; ++x) {
+      int next_pixel = GET_DATA_BYTE(pixels, x);
+      // We are looking for a pixel that is equal to its vertical neighbours,
+      // yet greater than its left neighbour.
+      if (prev_pixel < pixel &&
+          (y == 0 || pixel == GET_DATA_BYTE(pixels - wpl, x - 1)) &&
+          (y == height - 1 || pixel == GET_DATA_BYTE(pixels + wpl, x - 1))) {
+        if (pixel > next_pixel) {
+          // Single local max, so an odd width.
+          h_stats.add(pixel * 2 - 1, 1);
+        } else if (pixel == next_pixel && x + 1 < width &&
+                 pixel > GET_DATA_BYTE(pixels, x + 1)) {
+          // Double local max, so an even width.
+          h_stats.add(pixel * 2, 1);
+        }
+      }
+      prev_pixel = pixel;
+      pixel = next_pixel;
+    }
+  }
+  // Vertical width of stroke.
+  STATS v_stats(0, height + 1);
+  for (int x = 0; x < width; ++x) {
+    int prev_pixel = 0;
+    int pixel = GET_DATA_BYTE(data, x);
+    for (int y = 1; y < height; ++y) {
+      uint32_t* pixels = data + y*wpl;
+      int next_pixel = GET_DATA_BYTE(pixels, x);
+      // We are looking for a pixel that is equal to its horizontal neighbours,
+      // yet greater than its upper neighbour.
+      if (prev_pixel < pixel &&
+          (x == 0 || pixel == GET_DATA_BYTE(pixels - wpl, x - 1)) &&
+          (x == width - 1 || pixel == GET_DATA_BYTE(pixels - wpl, x + 1))) {
+        if (pixel > next_pixel) {
+          // Single local max, so an odd width.
+          v_stats.add(pixel * 2 - 1, 1);
+        } else if (pixel == next_pixel && y + 1 < height &&
+                 pixel > GET_DATA_BYTE(pixels + wpl, x)) {
+          // Double local max, so an even width.
+          v_stats.add(pixel * 2, 1);
+        }
+      }
+      prev_pixel = pixel;
+      pixel = next_pixel;
+    }
+  }
+  pixDestroy(&dist_pix);
+  // Store the horizontal and vertical width in the blob, keeping both
+  // widths if there is enough information, otherwise only the one with
+  // the most samples.
+  // If there are insufficient samples, store zero, rather than using
+  // 2*area/perimeter, as the numbers that gives do not match the numbers
+  // from the distance method.
+  if (h_stats.get_total() >= (width + height) / 4) {
+    blob->set_horz_stroke_width(h_stats.ile(0.5f));
+    if (v_stats.get_total() >= (width + height) / 4)
+      blob->set_vert_stroke_width(v_stats.ile(0.5f));
+    else
+      blob->set_vert_stroke_width(0.0f);
+  } else {
+    if (v_stats.get_total() >= (width + height) / 4 ||
+        v_stats.get_total() > h_stats.get_total()) {
+      blob->set_horz_stroke_width(0.0f);
+      blob->set_vert_stroke_width(v_stats.ile(0.5f));
+    } else {
+      blob->set_horz_stroke_width(h_stats.get_total() > 2 ? h_stats.ile(0.5f)
+                                                          : 0.0f);
+      blob->set_vert_stroke_width(0.0f);
+    }
+  }
+}
+
+/**********************************************************************
+ * assign_blobs_to_blocks2
+ *
+ * Make a list of TO_BLOCKs for portrait and landscape orientation.
+ **********************************************************************/
+
+void assign_blobs_to_blocks2(Pix* pix,
+                             BLOCK_LIST *blocks,          // blocks to process
+                             TO_BLOCK_LIST *port_blocks) {  // output list
+  BLOCK *block;                  // current block
+  BLOBNBOX *newblob;             // created blob
+  C_BLOB *blob;                  // current blob
+  BLOCK_IT block_it = blocks;
+  C_BLOB_IT blob_it;             // iterator
+  BLOBNBOX_IT port_box_it;       // iterator
+                                 // destination iterator
+  TO_BLOCK_IT port_block_it = port_blocks;
+  TO_BLOCK *port_block;          // created block
+
+  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
+    block = block_it.data();
+    port_block = new TO_BLOCK(block);
+
+    // Convert the good outlines to block->blob_list
+    port_box_it.set_to_list(&port_block->blobs);
+    blob_it.set_to_list(block->blob_list());
+    for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+      blob = blob_it.extract();
+      newblob = new BLOBNBOX(blob);  // Convert blob to BLOBNBOX.
+      SetBlobStrokeWidth(pix, newblob);
+      port_box_it.add_after_then_move(newblob);
+    }
+
+    // Put the rejected outlines in block->noise_blobs, which allows them to
+    // be reconsidered and sorted back into rows and recover outlines mistakenly
+    // rejected.
+    port_box_it.set_to_list(&port_block->noise_blobs);
+    blob_it.set_to_list(block->reject_blobs());
+    for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+      blob = blob_it.extract();
+      newblob = new BLOBNBOX(blob);  // Convert blob to BLOBNBOX.
+      SetBlobStrokeWidth(pix, newblob);
+      port_box_it.add_after_then_move(newblob);
+    }
+
+    port_block_it.add_after_then_move(port_block);
+  }
+}
+
+/**********************************************************************
+ * find_components
+ *
+ * Find the C_OUTLINEs of the connected components in each block, put them
+ * in C_BLOBs, and filter them by size, putting the different size
+ * grades on different lists in the matching TO_BLOCK in to_blocks.
+ **********************************************************************/
+
+void Textord::find_components(Pix* pix, BLOCK_LIST *blocks,
+                              TO_BLOCK_LIST *to_blocks) {
+  int width = pixGetWidth(pix);
+  int height = pixGetHeight(pix);
+  if (width > INT16_MAX || height > INT16_MAX) {
+    tprintf("Input image too large! (%d, %d)\n", width, height);
+    return;  // Can't handle it.
+  }
+
+  BLOCK_IT block_it(blocks);    // iterator
+  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
+       block_it.forward()) {
+    BLOCK* block = block_it.data();
+    if (block->pdblk.poly_block() == nullptr || block->pdblk.poly_block()->IsText()) {
+      extract_edges(pix, block);
+    }
+  }
+
+  assign_blobs_to_blocks2(pix, blocks, to_blocks);
+  ICOORD page_tr(width, height);
+  filter_blobs(page_tr, to_blocks, !textord_test_landscape);
+}
+
+/**********************************************************************
+ * filter_blobs
+ *
+ * Sort the blobs into sizes in all the blocks for later work.
+ **********************************************************************/
+
+void Textord::filter_blobs(ICOORD page_tr,         // top right
+                           TO_BLOCK_LIST* blocks,  // output list
+                           bool testing_on) {     // for plotting
+  TO_BLOCK_IT block_it = blocks;          // destination iterator
+  TO_BLOCK *block;                        // created block
+
+  #ifndef GRAPHICS_DISABLED
+  if (to_win != nullptr)
+    to_win->Clear();
+  #endif // !GRAPHICS_DISABLED
+
+  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
+       block_it.forward()) {
+    block = block_it.data();
+    block->line_size = filter_noise_blobs(&block->blobs,
+      &block->noise_blobs,
+      &block->small_blobs,
+      &block->large_blobs);
+    if (block->line_size == 0) block->line_size = 1;
+    block->line_spacing = block->line_size *
+        (tesseract::CCStruct::kDescenderFraction +
+         tesseract::CCStruct::kXHeightFraction +
+         2 * tesseract::CCStruct::kAscenderFraction) /
+         tesseract::CCStruct::kXHeightFraction;
+    block->line_size *= textord_min_linesize;
+    block->max_blob_size = block->line_size * textord_excess_blobsize;
+
+    #ifndef GRAPHICS_DISABLED
+    if (textord_show_blobs && testing_on) {
+      if (to_win == nullptr)
+        create_to_win(page_tr);
+      block->plot_graded_blobs(to_win);
+    }
+    if (textord_show_boxes && testing_on) {
+      if (to_win == nullptr)
+        create_to_win(page_tr);
+      plot_box_list(to_win, &block->noise_blobs, ScrollView::WHITE);
+      plot_box_list(to_win, &block->small_blobs, ScrollView::WHITE);
+      plot_box_list(to_win, &block->large_blobs, ScrollView::WHITE);
+      plot_box_list(to_win, &block->blobs, ScrollView::WHITE);
+    }
+    #endif // !GRAPHICS_DISABLED
+  }
+}
+
+/**********************************************************************
+ * filter_noise_blobs
+ *
+ * Move small blobs to a separate list.
+ **********************************************************************/
+
+float Textord::filter_noise_blobs(
+    BLOBNBOX_LIST *src_list,      // original list
+    BLOBNBOX_LIST *noise_list,    // noise list
+    BLOBNBOX_LIST *small_list,    // small blobs
+    BLOBNBOX_LIST *large_list) {  // large blobs
+  int16_t height;                  //height of blob
+  int16_t width;                   //of blob
+  BLOBNBOX *blob;                //current blob
+  float initial_x;               //first guess
+  BLOBNBOX_IT src_it = src_list; //iterators
+  BLOBNBOX_IT noise_it = noise_list;
+  BLOBNBOX_IT small_it = small_list;
+  BLOBNBOX_IT large_it = large_list;
+  STATS size_stats (0, MAX_NEAREST_DIST);
+  //blob heights
+  float min_y;                   //size limits
+  float max_y;
+  float max_x;
+  float max_height;              //of good blobs
+
+  for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
+    blob = src_it.data();
+    if (blob->bounding_box().height() < textord_max_noise_size)
+      noise_it.add_after_then_move(src_it.extract());
+    else if (blob->enclosed_area() >= blob->bounding_box().height()
+      * blob->bounding_box().width() * textord_noise_area_ratio)
+      small_it.add_after_then_move(src_it.extract());
+  }
+  for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
+    size_stats.add(src_it.data()->bounding_box().height(), 1);
+  }
+  initial_x = size_stats.ile(textord_initialx_ile);
+  max_y = ceil(initial_x *
+               (tesseract::CCStruct::kDescenderFraction +
+                tesseract::CCStruct::kXHeightFraction +
+                2 * tesseract::CCStruct::kAscenderFraction) /
+               tesseract::CCStruct::kXHeightFraction);
+  min_y = floor (initial_x / 2);
+  max_x = ceil (initial_x * textord_width_limit);
+  small_it.move_to_first ();
+  for (small_it.mark_cycle_pt (); !small_it.cycled_list ();
+  small_it.forward ()) {
+    height = small_it.data()->bounding_box().height();
+    if (height > max_y)
+      large_it.add_after_then_move(small_it.extract ());
+    else if (height >= min_y)
+      src_it.add_after_then_move(small_it.extract ());
+  }
+  size_stats.clear ();
+  for (src_it.mark_cycle_pt (); !src_it.cycled_list (); src_it.forward ()) {
+    height = src_it.data ()->bounding_box ().height ();
+    width = src_it.data ()->bounding_box ().width ();
+    if (height < min_y)
+      small_it.add_after_then_move (src_it.extract ());
+    else if (height > max_y || width > max_x)
+      large_it.add_after_then_move (src_it.extract ());
+    else
+      size_stats.add (height, 1);
+  }
+  max_height = size_stats.ile (textord_initialasc_ile);
+  //      tprintf("max_y=%g, min_y=%g, initial_x=%g, max_height=%g,",
+  //              max_y,min_y,initial_x,max_height);
+  max_height *= tesseract::CCStruct::kXHeightCapRatio;
+  if (max_height > initial_x)
+    initial_x = max_height;
+  //      tprintf(" ret=%g\n",initial_x);
+  return initial_x;
+}
+
+// Fixes the block so it obeys all the rules:
+// Must have at least one ROW.
+// Must have at least one WERD.
+// WERDs contain a fake blob.
+void Textord::cleanup_nontext_block(BLOCK* block) {
+  // Non-text blocks must contain at least one row.
+  ROW_IT row_it(block->row_list());
+  if (row_it.empty()) {
+    const TBOX& box = block->pdblk.bounding_box();
+    float height = box.height();
+    int32_t xstarts[2] = {box.left(), box.right()};
+    double coeffs[3] = {0.0, 0.0, static_cast<double>(box.bottom())};
+    ROW* row = new ROW(1, xstarts, coeffs, height / 2.0f, height / 4.0f,
+                       height / 4.0f, 0, 1);
+    row_it.add_after_then_move(row);
+  }
+  // Each row must contain at least one word.
+  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+    ROW* row = row_it.data();
+    WERD_IT w_it(row->word_list());
+    if (w_it.empty()) {
+      // Make a fake blob to put in the word.
+      TBOX box = block->row_list()->singleton() ? block->pdblk.bounding_box()
+                                                : row->bounding_box();
+      C_BLOB* blob = C_BLOB::FakeBlob(box);
+      C_BLOB_LIST blobs;
+      C_BLOB_IT blob_it(&blobs);
+      blob_it.add_after_then_move(blob);
+      WERD* word = new WERD(&blobs, 0, nullptr);
+      w_it.add_after_then_move(word);
+    }
+    // Each word must contain a fake blob.
+    for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
+      WERD* word = w_it.data();
+      // Just assert that this is true, as it would be useful to find
+      // out why it isn't.
+      ASSERT_HOST(!word->cblob_list()->empty());
+    }
+    row->recalc_bounding_box();
+  }
+}
+
+/**********************************************************************
+ * cleanup_blocks
+ *
+ * Delete empty blocks, rows from the page.
+ **********************************************************************/
+
+void Textord::cleanup_blocks(bool clean_noise, BLOCK_LIST* blocks) {
+  BLOCK_IT block_it = blocks;    //iterator
+  ROW_IT row_it;                 //row iterator
+
+  int num_rows = 0;
+  int num_rows_all = 0;
+  int num_blocks = 0;
+  int num_blocks_all = 0;
+  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
+       block_it.forward()) {
+    BLOCK* block = block_it.data();
+    if (block->pdblk.poly_block() != nullptr && !block->pdblk.poly_block()->IsText()) {
+      cleanup_nontext_block(block);
+      continue;
+    }
+    num_rows = 0;
+    num_rows_all = 0;
+    if (clean_noise) {
+      row_it.set_to_list(block->row_list());
+      for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+        ROW* row = row_it.data();
+        ++num_rows_all;
+        clean_small_noise_from_words(row);
+        if ((textord_noise_rejrows && !row->word_list()->empty() &&
+             clean_noise_from_row(row)) ||
+            row->word_list()->empty()) {
+          delete row_it.extract();  // lose empty row.
+        } else {
+          if (textord_noise_rejwords)
+            clean_noise_from_words(row_it.data());
+          if (textord_blshift_maxshift >= 0)
+            tweak_row_baseline(row, textord_blshift_maxshift,
+                               textord_blshift_xfraction);
+          ++num_rows;
+        }
+      }
+    }
+    if (block->row_list()->empty()) {
+      delete block_it.extract();  // Lose empty text blocks.
+    } else {
+      ++num_blocks;
+    }
+    ++num_blocks_all;
+    if (textord_noise_debug)
+      tprintf("cleanup_blocks: # rows = %d / %d\n", num_rows, num_rows_all);
+  }
+  if (textord_noise_debug)
+    tprintf("cleanup_blocks: # blocks = %d / %d\n", num_blocks, num_blocks_all);
+}
+
+
+/**********************************************************************
+ * clean_noise_from_row
+ *
+ * Move blobs of words from rows of garbage into the reject blobs list.
+ **********************************************************************/
+
+bool Textord::clean_noise_from_row(          //remove empties
+        ROW* row  //row to clean
+) {
+  bool testing_on;
+  TBOX blob_box;                 //bounding box
+  C_BLOB *blob;                  //current blob
+  C_OUTLINE *outline;            //current outline
+  WERD *word;                    //current word
+  int32_t blob_size;             //biggest size
+  int32_t trans_count = 0;       //no of transitions
+  int32_t trans_threshold;       //noise tolerance
+  int32_t dot_count;             //small objects
+  int32_t norm_count;            //normal objects
+  int32_t super_norm_count;      //real char-like
+                                 //words of row
+  WERD_IT word_it = row->word_list ();
+  C_BLOB_IT blob_it;             //blob iterator
+  C_OUTLINE_IT out_it;           //outline iterator
+
+  testing_on = textord_test_y > row->base_line (textord_test_x)
+               && textord_show_blobs
+               && textord_test_y < row->base_line (textord_test_x) + row->x_height ();
+  dot_count = 0;
+  norm_count = 0;
+  super_norm_count = 0;
+  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
+    word = word_it.data ();      //current word
+                                 //blobs in word
+    blob_it.set_to_list (word->cblob_list ());
+    for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
+    blob_it.forward ()) {
+      blob = blob_it.data ();
+      if (!word->flag (W_DONT_CHOP)) {
+                                 //get outlines
+        out_it.set_to_list (blob->out_list ());
+        for (out_it.mark_cycle_pt (); !out_it.cycled_list ();
+        out_it.forward ()) {
+          outline = out_it.data ();
+          blob_box = outline->bounding_box ();
+          blob_size =
+            blob_box.width () >
+            blob_box.height ()? blob_box.width () : blob_box.
+            height();
+          if (blob_size < textord_noise_sizelimit * row->x_height ())
+            dot_count++;         //count smal outlines
+          if (!outline->child ()->empty ()
+            && blob_box.height () <
+            (1 + textord_noise_syfract) * row->x_height ()
+            && blob_box.height () >
+            (1 - textord_noise_syfract) * row->x_height ()
+            && blob_box.width () <
+            (1 + textord_noise_sxfract) * row->x_height ()
+            && blob_box.width () >
+            (1 - textord_noise_sxfract) * row->x_height ())
+            super_norm_count++;  //count smal outlines
+        }
+      }
+      else
+        super_norm_count++;
+      blob_box = blob->bounding_box ();
+      blob_size =
+        blob_box.width () >
+        blob_box.height ()? blob_box.width () : blob_box.height ();
+      if (blob_size >= textord_noise_sizelimit * row->x_height ()
+          && blob_size < row->x_height () * 2) {
+        trans_threshold = blob_size / textord_noise_sizefraction;
+        trans_count = blob->count_transitions (trans_threshold);
+        if (trans_count < textord_noise_translimit)
+          norm_count++;
+      }
+      else if (blob_box.height () > row->x_height () * 2
+        && (!word_it.at_first () || !blob_it.at_first ()))
+        dot_count += 2;
+      if (testing_on) {
+        tprintf
+          ("Blob at (%d,%d) -> (%d,%d), ols=%d, tc=%d, bldiff=%g\n",
+          blob_box.left (), blob_box.bottom (), blob_box.right (),
+          blob_box.top (), blob->out_list ()->length (), trans_count,
+          blob_box.bottom () - row->base_line (blob_box.left ()));
+      }
+    }
+  }
+  if (textord_noise_debug) {
+    tprintf ("Row ending at (%d,%g):",
+      blob_box.right (), row->base_line (blob_box.right ()));
+    tprintf (" R=%g, dc=%d, nc=%d, %s\n",
+      norm_count > 0 ? static_cast<float>(dot_count) / norm_count : 9999,
+      dot_count, norm_count,
+      dot_count > norm_count * textord_noise_normratio
+      && dot_count > 2 ? "REJECTED" : "ACCEPTED");
+  }
+  return super_norm_count < textord_noise_sncount
+    && dot_count > norm_count * textord_noise_rowratio && dot_count > 2;
+}
+
+/**********************************************************************
+ * clean_noise_from_words
+ *
+ * Move blobs of words from rows of garbage into the reject blobs list.
+ **********************************************************************/
+
+void Textord::clean_noise_from_words(          //remove empties
+                                     ROW *row  //row to clean
+                                    ) {
+  TBOX blob_box;                 //bounding box
+  C_BLOB *blob;                  //current blob
+  C_OUTLINE *outline;            //current outline
+  WERD *word;                    //current word
+  int32_t blob_size;             //biggest size
+  int32_t trans_count;           //no of transitions
+  int32_t trans_threshold;       //noise tolerance
+  int32_t dot_count;             //small objects
+  int32_t norm_count;            //normal objects
+  int32_t dud_words;             //number discarded
+  int32_t ok_words;              //number remaining
+  int32_t word_index;            //current word
+                                 //words of row
+  WERD_IT word_it = row->word_list ();
+  C_BLOB_IT blob_it;             //blob iterator
+  C_OUTLINE_IT out_it;           //outline iterator
+
+  ok_words = word_it.length ();
+  if (ok_words == 0 || textord_no_rejects)
+    return;
+  // was it chucked
+  std::vector<int8_t> word_dud(ok_words);
+  dud_words = 0;
+  ok_words = 0;
+  word_index = 0;
+  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
+    word = word_it.data ();      //current word
+    dot_count = 0;
+    norm_count = 0;
+                                 //blobs in word
+    blob_it.set_to_list (word->cblob_list ());
+    for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
+    blob_it.forward ()) {
+      blob = blob_it.data ();
+      if (!word->flag (W_DONT_CHOP)) {
+                                 //get outlines
+        out_it.set_to_list (blob->out_list ());
+        for (out_it.mark_cycle_pt (); !out_it.cycled_list ();
+        out_it.forward ()) {
+          outline = out_it.data ();
+          blob_box = outline->bounding_box ();
+          blob_size =
+            blob_box.width () >
+            blob_box.height ()? blob_box.width () : blob_box.
+            height();
+          if (blob_size < textord_noise_sizelimit * row->x_height ())
+            dot_count++;         //count smal outlines
+          if (!outline->child ()->empty ()
+            && blob_box.height () <
+            (1 + textord_noise_syfract) * row->x_height ()
+            && blob_box.height () >
+            (1 - textord_noise_syfract) * row->x_height ()
+            && blob_box.width () <
+            (1 + textord_noise_sxfract) * row->x_height ()
+            && blob_box.width () >
+            (1 - textord_noise_sxfract) * row->x_height ())
+            norm_count++;        //count smal outlines
+        }
+      }
+      else
+        norm_count++;
+      blob_box = blob->bounding_box ();
+      blob_size =
+        blob_box.width () >
+        blob_box.height ()? blob_box.width () : blob_box.height ();
+      if (blob_size >= textord_noise_sizelimit * row->x_height ()
+      && blob_size < row->x_height () * 2) {
+        trans_threshold = blob_size / textord_noise_sizefraction;
+        trans_count = blob->count_transitions (trans_threshold);
+        if (trans_count < textord_noise_translimit)
+          norm_count++;
+      }
+      else if (blob_box.height () > row->x_height () * 2
+        && (!word_it.at_first () || !blob_it.at_first ()))
+        dot_count += 2;
+    }
+    if (dot_count > 2 && !word->flag(W_REP_CHAR)) {
+      if (dot_count > norm_count * textord_noise_normratio * 2)
+        word_dud[word_index] = 2;
+      else if (dot_count > norm_count * textord_noise_normratio)
+        word_dud[word_index] = 1;
+      else
+        word_dud[word_index] = 0;
+    } else {
+      word_dud[word_index] = 0;
+    }
+    if (word_dud[word_index] == 2)
+      dud_words++;
+    else
+      ok_words++;
+    word_index++;
+  }
+
+  word_index = 0;
+  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
+    if (word_dud[word_index] == 2
+    || (word_dud[word_index] == 1 && dud_words > ok_words)) {
+      word = word_it.data();  // Current word.
+      // Previously we threw away the entire word.
+      // Now just aggressively throw all small blobs into the reject list, where
+      // the classifier can decide whether they are actually needed.
+      word->CleanNoise(textord_noise_sizelimit * row->x_height());
+    }
+    word_index++;
+  }
+}
+
+// Remove outlines that are a tiny fraction in either width or height
+// of the word height.
+void Textord::clean_small_noise_from_words(ROW *row) {
+  WERD_IT word_it(row->word_list());
+  for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
+    WERD* word = word_it.data();
+    int min_size = static_cast<int>(
+      textord_noise_hfract * word->bounding_box().height() + 0.5);
+    C_BLOB_IT blob_it(word->cblob_list());
+    for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+      C_BLOB* blob = blob_it.data();
+      C_OUTLINE_IT out_it(blob->out_list());
+      for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
+        C_OUTLINE* outline = out_it.data();
+        outline->RemoveSmallRecursive(min_size, &out_it);
+      }
+      if (blob->out_list()->empty()) {
+        delete blob_it.extract();
+      }
+    }
+    if (word->cblob_list()->empty()) {
+      if (!word_it.at_last()) {
+        // The next word is no longer a fuzzy non space if it was before,
+        // since the word before is about to be deleted.
+        WERD* next_word = word_it.data_relative(1);
+        if (next_word->flag(W_FUZZY_NON)) {
+          next_word->set_flag(W_FUZZY_NON, false);
+        }
+      }
+      delete word_it.extract();
+    }
+  }
+}
+
+// Local struct to hold a group of blocks.
+struct BlockGroup {
+  BlockGroup() : rotation(1.0f, 0.0f), angle(0.0f), min_xheight(1.0f) {}
+  explicit BlockGroup(BLOCK* block)
+      : bounding_box(block->pdblk.bounding_box()),
+        rotation(block->re_rotation()),
+        angle(block->re_rotation().angle()),
+        min_xheight(block->x_height()) {
+    blocks.push_back(block);
+  }
+  // Union of block bounding boxes.
+  TBOX bounding_box;
+  // Common rotation of the blocks.
+  FCOORD rotation;
+  // Angle of rotation.
+  float angle;
+  // Min xheight of the blocks.
+  float min_xheight;
+  // Collection of borrowed pointers to the blocks in the group.
+  GenericVector<BLOCK*> blocks;
+};
+
+// Groups blocks by rotation, then, for each group, makes a WordGrid and calls
+// TransferDiacriticsToWords to copy the diacritic blobs to the most
+// appropriate words in the group of blocks. Source blobs are not touched.
+void Textord::TransferDiacriticsToBlockGroups(BLOBNBOX_LIST* diacritic_blobs,
+                                              BLOCK_LIST* blocks) {
+  // Angle difference larger than this is too much to consider equal.
+  // They should only be in multiples of M_PI/2 anyway.
+  const double kMaxAngleDiff = 0.01;  // About 0.6 degrees.
+  PointerVector<BlockGroup> groups;
+  BLOCK_IT bk_it(blocks);
+  for (bk_it.mark_cycle_pt(); !bk_it.cycled_list(); bk_it.forward()) {
+    BLOCK* block = bk_it.data();
+    if (block->pdblk.poly_block() != nullptr && !block->pdblk.poly_block()->IsText()) {
+      continue;
+    }
+    // Linear search of the groups to find a matching rotation.
+    float block_angle = block->re_rotation().angle();
+    int best_g = 0;
+    float best_angle_diff = FLT_MAX;
+    for (int g = 0; g < groups.size(); ++g) {
+      double angle_diff = fabs(block_angle - groups[g]->angle);
+      if (angle_diff > M_PI) angle_diff = fabs(angle_diff - 2.0 * M_PI);
+      if (angle_diff < best_angle_diff) {
+        best_angle_diff = angle_diff;
+        best_g = g;
+      }
+    }
+    if (best_angle_diff > kMaxAngleDiff) {
+      groups.push_back(new BlockGroup(block));
+    } else {
+      groups[best_g]->blocks.push_back(block);
+      groups[best_g]->bounding_box += block->pdblk.bounding_box();
+      float x_height = block->x_height();
+      if (x_height < groups[best_g]->min_xheight)
+        groups[best_g]->min_xheight = x_height;
+    }
+  }
+  // Now process each group of blocks.
+  PointerVector<WordWithBox> word_ptrs;
+  for (int g = 0; g < groups.size(); ++g) {
+    const BlockGroup* group = groups[g];
+    if (group->bounding_box.null_box()) continue;
+    WordGrid word_grid(group->min_xheight, group->bounding_box.botleft(),
+                       group->bounding_box.topright());
+    for (int b = 0; b < group->blocks.size(); ++b) {
+      ROW_IT row_it(group->blocks[b]->row_list());
+      for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+        ROW* row = row_it.data();
+        // Put the words of the row into the grid.
+        WERD_IT w_it(row->word_list());
+        for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
+          WERD* word = w_it.data();
+          auto* box_word = new WordWithBox(word);
+          word_grid.InsertBBox(true, true, box_word);
+          // Save the pointer where it will be auto-deleted.
+          word_ptrs.push_back(box_word);
+        }
+      }
+    }
+    FCOORD rotation = group->rotation;
+    // Make it a forward rotation that will transform blob coords to block.
+    rotation.set_y(-rotation.y());
+    TransferDiacriticsToWords(diacritic_blobs, rotation, &word_grid);
+  }
+}
+
+// Places a copy of blobs that are near a word (after applying rotation to the
+// blob) in the most appropriate word, unless there is doubt, in which case a
+// blob can end up in two words. Source blobs are not touched.
+void Textord::TransferDiacriticsToWords(BLOBNBOX_LIST* diacritic_blobs,
+                                        const FCOORD& rotation,
+                                        WordGrid* word_grid) {
+  WordSearch ws(word_grid);
+  BLOBNBOX_IT b_it(diacritic_blobs);
+  // Apply rotation to each blob before finding the nearest words. The rotation
+  // allows us to only consider above/below placement and not left/right on
+  // vertical text, because all text is horizontal here.
+  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
+    BLOBNBOX* blobnbox = b_it.data();
+    TBOX blob_box = blobnbox->bounding_box();
+    blob_box.rotate(rotation);
+    ws.StartRectSearch(blob_box);
+    // Above/below refer to word position relative to diacritic. Since some
+    // scripts eg Kannada/Telugu habitually put diacritics below words, and
+    // others eg Thai/Vietnamese/Latin put most diacritics above words, try
+    // for both if there isn't much in it.
+    WordWithBox* best_above_word = nullptr;
+    WordWithBox* best_below_word = nullptr;
+    int best_above_distance = 0;
+    int best_below_distance = 0;
+    for (WordWithBox* word = ws.NextRectSearch(); word != nullptr;
+         word = ws.NextRectSearch()) {
+      if (word->word()->flag(W_REP_CHAR)) continue;
+      TBOX word_box = word->true_bounding_box();
+      int x_distance = blob_box.x_gap(word_box);
+      int y_distance = blob_box.y_gap(word_box);
+      if (x_distance > 0) {
+        // Arbitrarily divide x-distance by 2 if there is a major y overlap,
+        // and the word is to the left of the diacritic. If the
+        // diacritic is a dropped broken character between two words, this will
+        // help send all the pieces to a single word, instead of splitting them
+        // over the 2 words.
+        if (word_box.major_y_overlap(blob_box) &&
+            blob_box.left() > word_box.right()) {
+          x_distance /= 2;
+        }
+        y_distance += x_distance;
+      }
+      if (word_box.y_middle() > blob_box.y_middle() &&
+          (best_above_word == nullptr || y_distance < best_above_distance)) {
+        best_above_word = word;
+        best_above_distance = y_distance;
+      }
+      if (word_box.y_middle() <= blob_box.y_middle() &&
+          (best_below_word == nullptr || y_distance < best_below_distance)) {
+        best_below_word = word;
+        best_below_distance = y_distance;
+      }
+    }
+    bool above_good =
+        best_above_word != nullptr &&
+        (best_below_word == nullptr ||
+         best_above_distance < best_below_distance + blob_box.height());
+    bool below_good =
+        best_below_word != nullptr && best_below_word != best_above_word &&
+        (best_above_word == nullptr ||
+         best_below_distance < best_above_distance + blob_box.height());
+    if (below_good) {
+      C_BLOB* copied_blob = C_BLOB::deep_copy(blobnbox->cblob());
+      copied_blob->rotate(rotation);
+      // Put the blob into the word's reject blobs list.
+      C_BLOB_IT blob_it(best_below_word->RejBlobs());
+      blob_it.add_to_end(copied_blob);
+    }
+    if (above_good) {
+      C_BLOB* copied_blob = C_BLOB::deep_copy(blobnbox->cblob());
+      copied_blob->rotate(rotation);
+      // Put the blob into the word's reject blobs list.
+      C_BLOB_IT blob_it(best_above_word->RejBlobs());
+      blob_it.add_to_end(copied_blob);
+    }
+  }
+}
+
+/**********************************************************************
+ * tweak_row_baseline
+ *
+ * Shift baseline to fit the blobs more accurately where they are
+ * close enough.
+ **********************************************************************/
+
+void tweak_row_baseline(ROW *row,
+                        double blshift_maxshift,
+                        double blshift_xfraction) {
+  TBOX blob_box;                 //bounding box
+  C_BLOB *blob;                  //current blob
+  WERD *word;                    //current word
+  int32_t blob_count;              //no of blobs
+  int32_t src_index;               //source segment
+  int32_t dest_index;              //destination segment
+  float ydiff;                   //baseline error
+  float x_centre;                //centre of blob
+                                 //words of row
+  WERD_IT word_it = row->word_list ();
+  C_BLOB_IT blob_it;             //blob iterator
+
+  blob_count = 0;
+  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
+    word = word_it.data ();      //current word
+                                 //get total blobs
+    blob_count += word->cblob_list ()->length ();
+  }
+  if (blob_count == 0)
+    return;
+  // spline segments
+  std::vector<int32_t> xstarts(blob_count + row->baseline.segments + 1);
+  // spline coeffs
+  std::vector<double> coeffs((blob_count + row->baseline.segments) * 3);
+
+  src_index = 0;
+  dest_index = 0;
+  xstarts[0] = row->baseline.xcoords[0];
+  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
+    word = word_it.data ();      //current word
+                                 //blobs in word
+    blob_it.set_to_list (word->cblob_list ());
+    for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
+    blob_it.forward ()) {
+      blob = blob_it.data ();
+      blob_box = blob->bounding_box ();
+      x_centre = (blob_box.left () + blob_box.right ()) / 2.0;
+      ydiff = blob_box.bottom () - row->base_line (x_centre);
+      if (ydiff < 0)
+        ydiff = -ydiff / row->x_height ();
+      else
+        ydiff = ydiff / row->x_height ();
+      if (ydiff < blshift_maxshift
+        && blob_box.height () / row->x_height () > blshift_xfraction) {
+        if (xstarts[dest_index] >= x_centre)
+          xstarts[dest_index] = blob_box.left ();
+        coeffs[dest_index * 3] = 0;
+        coeffs[dest_index * 3 + 1] = 0;
+        coeffs[dest_index * 3 + 2] = blob_box.bottom ();
+        //shift it
+        dest_index++;
+        xstarts[dest_index] = blob_box.right () + 1;
+      }
+      else {
+        if (xstarts[dest_index] <= x_centre) {
+          while (row->baseline.xcoords[src_index + 1] <= x_centre
+          && src_index < row->baseline.segments - 1) {
+            if (row->baseline.xcoords[src_index + 1] >
+            xstarts[dest_index]) {
+              coeffs[dest_index * 3] =
+                row->baseline.quadratics[src_index].a;
+              coeffs[dest_index * 3 + 1] =
+                row->baseline.quadratics[src_index].b;
+              coeffs[dest_index * 3 + 2] =
+                row->baseline.quadratics[src_index].c;
+              dest_index++;
+              xstarts[dest_index] =
+                row->baseline.xcoords[src_index + 1];
+            }
+            src_index++;
+          }
+          coeffs[dest_index * 3] =
+            row->baseline.quadratics[src_index].a;
+          coeffs[dest_index * 3 + 1] =
+            row->baseline.quadratics[src_index].b;
+          coeffs[dest_index * 3 + 2] =
+            row->baseline.quadratics[src_index].c;
+          dest_index++;
+          xstarts[dest_index] = row->baseline.xcoords[src_index + 1];
+        }
+      }
+    }
+  }
+  while (src_index < row->baseline.segments
+    && row->baseline.xcoords[src_index + 1] <= xstarts[dest_index])
+    src_index++;
+  while (src_index < row->baseline.segments) {
+    coeffs[dest_index * 3] = row->baseline.quadratics[src_index].a;
+    coeffs[dest_index * 3 + 1] = row->baseline.quadratics[src_index].b;
+    coeffs[dest_index * 3 + 2] = row->baseline.quadratics[src_index].c;
+    dest_index++;
+    src_index++;
+    xstarts[dest_index] = row->baseline.xcoords[src_index];
+  }
+                                 //turn to spline
+  row->baseline = QSPLINE(dest_index, &xstarts[0], &coeffs[0]);
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/textord/tordmain.h b/tesseract/src/textord/tordmain.h
new file mode 100644
index 00000000..2a6e31fa
--- /dev/null
+++ b/tesseract/src/textord/tordmain.h
@@ -0,0 +1,45 @@
+/**********************************************************************
+ * File:        tordmain.h  (Formerly textordp.h)
+ * Description: C++ top level textord code.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef           TORDMAIN_H
+#define           TORDMAIN_H
+
+#include          "params.h"
+#include          "ocrblock.h"
+#include          "blobs.h"
+#include          "blobbox.h"
+
+#include          <ctime>
+
+struct Pix;
+
+namespace tesseract {
+
+class Tesseract;
+
+void SetBlobStrokeWidth(Pix* pix, BLOBNBOX* blob);
+void assign_blobs_to_blocks2(Pix* pix, BLOCK_LIST *blocks,
+                             TO_BLOCK_LIST *port_blocks);
+
+void tweak_row_baseline(ROW *row,
+                        double blshift_maxshift,
+                        double blshift_xfraction);
+
+}  // namespace tesseract
+
+#endif
diff --git a/tesseract/src/textord/tospace.cpp b/tesseract/src/textord/tospace.cpp
new file mode 100644
index 00000000..6ab17a64
--- /dev/null
+++ b/tesseract/src/textord/tospace.cpp
@@ -0,0 +1,1894 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+/**********************************************************************
+ * tospace.cpp
+ *
+ * Compute fuzzy word spacing thresholds for each row.
+ * I.e. set :   max_nonspace
+ *              space_threshold
+ *              min_space
+ *              kern_size
+ *              space_size
+ * for each row.
+ * ONLY FOR PROPORTIONAL BLOCKS - FIXED PITCH IS ASSUMED ALREADY DONE
+ *
+ * Note: functions in this file were originally not members of any
+ * class or enclosed by any namespace. Now they are all static members
+ * of the Textord class.
+ *
+ **********************************************************************/
+
+#include "drawtord.h"
+#include "statistc.h"
+#include "textord.h"
+#include "tovars.h"
+
+// Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include <algorithm>
+#include <memory>
+
+#define MAXSPACING      128      /*max expected spacing in pix */
+
+namespace tesseract {
+void Textord::to_spacing(
+    ICOORD page_tr,        //topright of page
+    TO_BLOCK_LIST *blocks  //blocks on page
+                         ) {
+  TO_BLOCK_IT block_it;          //iterator
+  TO_BLOCK *block;               //current block;
+  TO_ROW *row;                   //current row
+  int block_index;               //block number
+  int row_index;                 //row number
+  //estimated width of real spaces for whole block
+  int16_t block_space_gap_width;
+  //estimated width of non space gaps for whole block
+  int16_t block_non_space_gap_width;
+  bool old_text_ord_proportional;//old fixed/prop result
+
+  block_it.set_to_list (blocks);
+  block_index = 1;
+  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
+  block_it.forward ()) {
+    block = block_it.data ();
+    std::unique_ptr<GAPMAP> gapmap(new GAPMAP (block)); //map of big vert gaps in blk
+    block_spacing_stats(block,
+                        gapmap.get(),
+                        old_text_ord_proportional,
+                        block_space_gap_width,
+                        block_non_space_gap_width);
+    // Make sure relative values of block-level space and non-space gap
+    // widths are reasonable. The ratio of 1:3 is also used in
+    // block_spacing_stats, to corrrect the block_space_gap_width
+    // Useful for arabic and hindi, when the non-space gap width is
+    // often over-estimated and should not be trusted. A similar ratio
+    // is found in block_spacing_stats.
+    if (tosp_old_to_method && tosp_old_to_constrain_sp_kn &&
+        static_cast<float>(block_space_gap_width) / block_non_space_gap_width < 3.0) {
+      block_non_space_gap_width = static_cast<int16_t>(floor (block_space_gap_width / 3.0));
+    }
+    // row iterator
+    TO_ROW_IT row_it(block->get_rows());
+    row_index = 1;
+    for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+      row = row_it.data ();
+      if ((row->pitch_decision == PITCH_DEF_PROP) ||
+      (row->pitch_decision == PITCH_CORR_PROP)) {
+        if ((tosp_debug_level > 0) && !old_text_ord_proportional)
+          tprintf ("Block %d Row %d: Now Proportional\n",
+            block_index, row_index);
+        row_spacing_stats(row,
+                          gapmap.get(),
+                          block_index,
+                          row_index,
+                          block_space_gap_width,
+                          block_non_space_gap_width);
+      }
+      else {
+        if ((tosp_debug_level > 0) && old_text_ord_proportional)
+          tprintf
+            ("Block %d Row %d: Now Fixed Pitch Decision:%d fp flag:%f\n",
+            block_index, row_index, row->pitch_decision,
+            row->fixed_pitch);
+      }
+#ifndef GRAPHICS_DISABLED
+      if (textord_show_initial_words)
+        plot_word_decisions (to_win, static_cast<int16_t>(row->fixed_pitch), row);
+#endif
+      row_index++;
+    }
+    block_index++;
+  }
+}
+
+
+/*************************************************************************
+ * block_spacing_stats()
+ *************************************************************************/
+
+void Textord::block_spacing_stats(
+        TO_BLOCK* block,
+        GAPMAP* gapmap,
+        bool& old_text_ord_proportional,
+        int16_t& block_space_gap_width,     // resulting estimate
+        int16_t& block_non_space_gap_width  // resulting estimate
+) {
+  TO_ROW *row;                   // current row
+  BLOBNBOX_IT blob_it;           // iterator
+
+  STATS centre_to_centre_stats (0, MAXSPACING);
+  // DEBUG USE ONLY
+  STATS all_gap_stats (0, MAXSPACING);
+  STATS space_gap_stats (0, MAXSPACING);
+  int16_t minwidth = MAXSPACING;    // narrowest blob
+  TBOX blob_box;
+  TBOX prev_blob_box;
+  int16_t centre_to_centre;
+  int16_t gap_width;
+  float real_space_threshold;
+  float iqr_centre_to_centre;    // DEBUG USE ONLY
+  float iqr_all_gap_stats;       // DEBUG USE ONLY
+  int32_t end_of_row;
+  int32_t row_length;
+
+  // row iterator
+  TO_ROW_IT row_it(block->get_rows());
+  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+    row = row_it.data ();
+    if (!row->blob_list ()->empty () &&
+      (!tosp_only_use_prop_rows ||
+      (row->pitch_decision == PITCH_DEF_PROP) ||
+    (row->pitch_decision == PITCH_CORR_PROP))) {
+      blob_it.set_to_list (row->blob_list ());
+      blob_it.mark_cycle_pt ();
+      end_of_row = blob_it.data_relative (-1)->bounding_box ().right ();
+      if (tosp_use_pre_chopping)
+        blob_box = box_next_pre_chopped (&blob_it);
+      else if (tosp_stats_use_xht_gaps)
+        blob_box = reduced_box_next (row, &blob_it);
+      else
+        blob_box = box_next (&blob_it);
+      row_length = end_of_row - blob_box.left ();
+      if (blob_box.width () < minwidth)
+        minwidth = blob_box.width ();
+      prev_blob_box = blob_box;
+      while (!blob_it.cycled_list ()) {
+        if (tosp_use_pre_chopping)
+          blob_box = box_next_pre_chopped (&blob_it);
+        else if (tosp_stats_use_xht_gaps)
+          blob_box = reduced_box_next (row, &blob_it);
+        else
+          blob_box = box_next (&blob_it);
+        if (blob_box.width () < minwidth)
+          minwidth = blob_box.width ();
+        int16_t left = prev_blob_box.right();
+        int16_t right = blob_box.left();
+        gap_width = right - left;
+        if (!ignore_big_gap(row, row_length, gapmap, left, right)) {
+          all_gap_stats.add (gap_width, 1);
+
+          centre_to_centre = (right + blob_box.right () -
+            (prev_blob_box.left () + left)) / 2;
+          //DEBUG
+          centre_to_centre_stats.add (centre_to_centre, 1);
+          // DEBUG
+        }
+        prev_blob_box = blob_box;
+      }
+    }
+  }
+
+                                 //Inadequate samples
+  if (all_gap_stats.get_total () <= 1) {
+    block_non_space_gap_width = minwidth;
+    block_space_gap_width = -1;  //No est. space width
+                                 //DEBUG
+    old_text_ord_proportional = true;
+  }
+  else {
+    /* For debug only ..... */
+    iqr_centre_to_centre = centre_to_centre_stats.ile (0.75) -
+      centre_to_centre_stats.ile (0.25);
+    iqr_all_gap_stats = all_gap_stats.ile (0.75) - all_gap_stats.ile (0.25);
+    old_text_ord_proportional =
+      iqr_centre_to_centre * 2 > iqr_all_gap_stats;
+    /* .......For debug only */
+
+    /*
+    The median of the gaps is used as an estimate of the NON-SPACE gap width.
+    This RELIES on the assumption that there are more gaps WITHIN words than
+    BETWEEN words in a block
+
+    Now try to estimate the width of a real space for all real spaces in the
+    block. Do this by using a crude threshold to ignore "narrow" gaps, then
+    find the median of the "wide" gaps and use this.
+    */
+    block_non_space_gap_width = static_cast<int16_t>(floor (all_gap_stats.median ()));
+    // median gap
+
+    row_it.set_to_list (block->get_rows ());
+    for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+      row = row_it.data ();
+      if (!row->blob_list ()->empty () &&
+        (!tosp_only_use_prop_rows ||
+        (row->pitch_decision == PITCH_DEF_PROP) ||
+      (row->pitch_decision == PITCH_CORR_PROP))) {
+        real_space_threshold =
+                std::max (tosp_init_guess_kn_mult * block_non_space_gap_width,
+          tosp_init_guess_xht_mult * row->xheight);
+        blob_it.set_to_list (row->blob_list ());
+        blob_it.mark_cycle_pt ();
+        end_of_row =
+          blob_it.data_relative (-1)->bounding_box ().right ();
+        if (tosp_use_pre_chopping)
+          blob_box = box_next_pre_chopped (&blob_it);
+        else if (tosp_stats_use_xht_gaps)
+          blob_box = reduced_box_next (row, &blob_it);
+        else
+          blob_box = box_next (&blob_it);
+        row_length = blob_box.left () - end_of_row;
+        prev_blob_box = blob_box;
+        while (!blob_it.cycled_list ()) {
+          if (tosp_use_pre_chopping)
+            blob_box = box_next_pre_chopped (&blob_it);
+          else if (tosp_stats_use_xht_gaps)
+            blob_box = reduced_box_next (row, &blob_it);
+          else
+            blob_box = box_next (&blob_it);
+          int16_t left = prev_blob_box.right();
+          int16_t right = blob_box.left();
+          gap_width = right - left;
+          if ((gap_width > real_space_threshold) &&
+              !ignore_big_gap(row, row_length, gapmap, left, right)) {
+            /*
+            If tosp_use_cert_spaces is enabled, the estimate of the space gap is
+            restricted to obvious spaces - those wider than half the xht or those
+            with wide blobs on both sides - i.e not things that are suspect 1's or
+            punctuation that is sometimes widely spaced.
+            */
+            if (!tosp_block_use_cert_spaces ||
+              (gap_width >
+              tosp_fuzzy_space_factor2 * row->xheight)
+              ||
+              ((gap_width >
+              tosp_fuzzy_space_factor1 * row->xheight)
+              && (!tosp_narrow_blobs_not_cert
+              || (!narrow_blob (row, prev_blob_box)
+              && !narrow_blob (row, blob_box))))
+              || (wide_blob (row, prev_blob_box)
+              && wide_blob (row, blob_box)))
+              space_gap_stats.add (gap_width, 1);
+          }
+          prev_blob_box = blob_box;
+        }
+      }
+    }
+                                 //Inadequate samples
+    if (space_gap_stats.get_total () <= 2)
+      block_space_gap_width = -1;//No est. space width
+    else
+      block_space_gap_width =
+              std::max(static_cast<int16_t>(floor(space_gap_stats.median())),
+                       static_cast<int16_t>(3 * block_non_space_gap_width));
+  }
+}
+
+
+/*************************************************************************
+ * row_spacing_stats()
+ * Set values for min_space, max_non_space based on row stats only
+ * If failure - return 0 values.
+ *************************************************************************/
+void Textord::row_spacing_stats(
+    TO_ROW *row,
+    GAPMAP *gapmap,
+    int16_t block_idx,
+    int16_t row_idx,
+    int16_t block_space_gap_width,    //estimate for block
+    int16_t block_non_space_gap_width //estimate for block
+                                ) {
+  //iterator
+  BLOBNBOX_IT blob_it = row->blob_list ();
+  STATS all_gap_stats (0, MAXSPACING);
+  STATS cert_space_gap_stats (0, MAXSPACING);
+  STATS all_space_gap_stats (0, MAXSPACING);
+  STATS small_gap_stats (0, MAXSPACING);
+  TBOX blob_box;
+  TBOX prev_blob_box;
+  int16_t gap_width;
+  int16_t real_space_threshold = 0;
+  int16_t max = 0;
+  int16_t index;
+  int16_t large_gap_count = 0;
+  bool suspected_table;
+  int32_t max_max_nonspace;        //upper bound
+  bool good_block_space_estimate = block_space_gap_width > 0;
+  int32_t end_of_row;
+  int32_t row_length = 0;
+  float sane_space;
+  int32_t sane_threshold;
+
+  /* Collect first pass stats for row */
+
+  if (!good_block_space_estimate)
+    block_space_gap_width = int16_t (floor (row->xheight / 2));
+  if (!row->blob_list ()->empty ()) {
+    if (tosp_threshold_bias1 > 0)
+      real_space_threshold =
+        block_non_space_gap_width +
+        int16_t (floor (0.5 +
+        tosp_threshold_bias1 * (block_space_gap_width -
+                                block_non_space_gap_width)));
+    else
+      real_space_threshold =     //Old TO method
+        (block_space_gap_width + block_non_space_gap_width) / 2;
+    blob_it.set_to_list (row->blob_list ());
+    blob_it.mark_cycle_pt ();
+    end_of_row = blob_it.data_relative (-1)->bounding_box ().right ();
+    if (tosp_use_pre_chopping)
+      blob_box = box_next_pre_chopped (&blob_it);
+    else if (tosp_stats_use_xht_gaps)
+      blob_box = reduced_box_next (row, &blob_it);
+    else
+      blob_box = box_next (&blob_it);
+    row_length = end_of_row - blob_box.left ();
+    prev_blob_box = blob_box;
+    while (!blob_it.cycled_list ()) {
+      if (tosp_use_pre_chopping)
+        blob_box = box_next_pre_chopped (&blob_it);
+      else if (tosp_stats_use_xht_gaps)
+        blob_box = reduced_box_next (row, &blob_it);
+      else
+        blob_box = box_next (&blob_it);
+      int16_t left = prev_blob_box.right();
+      int16_t right = blob_box.left();
+      gap_width = right - left;
+      if (ignore_big_gap(row, row_length, gapmap, left, right)) {
+        large_gap_count++;
+      } else {
+        if (gap_width >= real_space_threshold) {
+          if (!tosp_row_use_cert_spaces ||
+            (gap_width > tosp_fuzzy_space_factor2 * row->xheight) ||
+            ((gap_width > tosp_fuzzy_space_factor1 * row->xheight)
+            && (!tosp_narrow_blobs_not_cert
+            || (!narrow_blob (row, prev_blob_box)
+            && !narrow_blob (row, blob_box))))
+            || (wide_blob (row, prev_blob_box)
+            && wide_blob (row, blob_box)))
+            cert_space_gap_stats.add (gap_width, 1);
+          all_space_gap_stats.add (gap_width, 1);
+        }
+        else
+          small_gap_stats.add (gap_width, 1);
+        all_gap_stats.add (gap_width, 1);
+      }
+      prev_blob_box = blob_box;
+    }
+  }
+  suspected_table = (large_gap_count > 1) ||
+      ((large_gap_count > 0) &&
+       (all_gap_stats.get_total () <= tosp_few_samples));
+
+  /* Now determine row kern size, space size and threshold */
+
+  if ((cert_space_gap_stats.get_total () >=
+    tosp_enough_space_samples_for_median) ||
+    ((suspected_table ||
+    all_gap_stats.get_total () <= tosp_short_row) &&
+    cert_space_gap_stats.get_total () > 0)) {
+    old_to_method(row,
+                  &all_gap_stats,
+                  &cert_space_gap_stats,
+                  &small_gap_stats,
+                  block_space_gap_width,
+                  block_non_space_gap_width);
+  } else {
+    if (!tosp_recovery_isolated_row_stats ||
+        !isolated_row_stats (row, gapmap, &all_gap_stats, suspected_table,
+                             block_idx, row_idx)) {
+      if (tosp_row_use_cert_spaces && (tosp_debug_level > 5))
+        tprintf ("B:%d R:%d -- Inadequate certain spaces.\n",
+          block_idx, row_idx);
+      if (tosp_row_use_cert_spaces1 && good_block_space_estimate) {
+                                 //Use block default
+        row->space_size = block_space_gap_width;
+        if (all_gap_stats.get_total () > tosp_redo_kern_limit)
+          row->kern_size = all_gap_stats.median ();
+        else
+          row->kern_size = block_non_space_gap_width;
+        row->space_threshold =
+          int32_t (floor ((row->space_size + row->kern_size) /
+                        tosp_old_sp_kn_th_factor));
+      }
+      else
+        old_to_method(row,
+                      &all_gap_stats,
+                      &all_space_gap_stats,
+                      &small_gap_stats,
+                      block_space_gap_width,
+                      block_non_space_gap_width);
+    }
+  }
+
+  if (tosp_improve_thresh && !suspected_table)
+    improve_row_threshold(row, &all_gap_stats);
+
+  /* Now lets try to be careful not to do anything silly with tables when we
+  are ignoring big gaps*/
+  if (tosp_sanity_method == 0) {
+    if (suspected_table &&
+    (row->space_size < tosp_table_kn_sp_ratio * row->kern_size)) {
+      if (tosp_debug_level > 5)
+        tprintf("B:%d R:%d -- DON'T BELIEVE SPACE %3.2f %d %3.2f.\n", block_idx,
+                row_idx, row->kern_size, row->space_threshold, row->space_size);
+      row->space_threshold =
+        static_cast<int32_t>(tosp_table_kn_sp_ratio * row->kern_size);
+      row->space_size = std::max(row->space_threshold + 1.0f, row->xheight);
+    }
+  }
+  else if (tosp_sanity_method == 1) {
+    sane_space = row->space_size;
+    /* NEVER let space size get too close to kern size */
+    if ((row->space_size < tosp_min_sane_kn_sp * std::max(row->kern_size, 2.5f))
+      || ((row->space_size - row->kern_size) <
+    (tosp_silly_kn_sp_gap * row->xheight))) {
+      if (good_block_space_estimate &&
+        (block_space_gap_width >= tosp_min_sane_kn_sp * row->kern_size))
+        sane_space = block_space_gap_width;
+      else
+        sane_space =
+                std::max(static_cast<float>(tosp_min_sane_kn_sp) * std::max(row->kern_size, 2.5f),
+          row->xheight / 2.0f);
+      if (tosp_debug_level > 5)
+        tprintf("B:%d R:%d -- DON'T BELIEVE SPACE %3.2f %d %3.2f -> %3.2f.\n",
+                block_idx, row_idx, row->kern_size, row->space_threshold,
+                row->space_size, sane_space);
+      row->space_size = sane_space;
+      row->space_threshold =
+        int32_t (floor ((row->space_size + row->kern_size) /
+                      tosp_old_sp_kn_th_factor));
+    }
+    /* NEVER let threshold get VERY far away from kern */
+    sane_threshold = int32_t (floor (tosp_max_sane_kn_thresh *
+                                             std::max(row->kern_size, 2.5f)));
+    if (row->space_threshold > sane_threshold) {
+      if (tosp_debug_level > 5)
+        tprintf("B:%d R:%d -- DON'T BELIEVE THRESH %3.2f %d %3.2f->%d.\n",
+                block_idx, row_idx, row->kern_size, row->space_threshold,
+                row->space_size, sane_threshold);
+      row->space_threshold = sane_threshold;
+      if (row->space_size <= sane_threshold)
+        row->space_size = row->space_threshold + 1.0f;
+    }
+    /* Beware of tables - there may be NO spaces */
+    if (suspected_table) {
+      sane_space = std::max(tosp_table_kn_sp_ratio * row->kern_size,
+        tosp_table_xht_sp_ratio * row->xheight);
+      sane_threshold = int32_t (floor ((sane_space + row->kern_size) / 2));
+
+      if ((row->space_size < sane_space) ||
+      (row->space_threshold < sane_threshold)) {
+        if (tosp_debug_level > 5)
+          tprintf ("B:%d R:%d -- SUSPECT NO SPACES %3.2f %d %3.2f.\n",
+            block_idx, row_idx,
+            row->kern_size,
+            row->space_threshold, row->space_size);
+                                 //the minimum sane value
+        row->space_threshold = static_cast<int32_t>(sane_space);
+        row->space_size = std::max(row->space_threshold + 1.0f, row->xheight);
+      }
+    }
+  }
+
+  /* Now lets try to put some error limits on the threshold */
+
+  if (tosp_old_to_method) {
+    /* Old textord made a space if gap >= threshold */
+                                 //NO FUZZY SPACES YET
+    row->max_nonspace = row->space_threshold;
+                                 //NO FUZZY SPACES       YET
+    row->min_space = row->space_threshold + 1;
+  }
+  else {
+    /* Any gap greater than 0.6 x-ht is bound to be a space (isn't it:-) */
+    row->min_space =
+            std::min(int32_t (ceil (tosp_fuzzy_space_factor * row->xheight)),
+      int32_t (row->space_size));
+    if (row->min_space <= row->space_threshold)
+      // Don't be silly
+      row->min_space = row->space_threshold + 1;
+    /*
+    Lets try to guess the max certain kern gap by looking at the cluster of
+    kerns for the row. The row is proportional so the kerns should cluster
+    tightly at the bottom of the distribution. We also expect most gaps to be
+    kerns. Find the maximum of the kern piles between 0 and twice the kern
+    estimate. Piles before the first one with less than 1/10 the maximum
+    number of samples can be taken as certain kerns.
+
+      Of course, there are some cases where the kern peak and space peaks merge,
+      so we will put an UPPER limit on the max certain kern gap of some fraction
+      below the threshold.
+    */
+
+    max_max_nonspace = int32_t ((row->space_threshold + row->kern_size) / 2);
+
+                                 //default
+    row->max_nonspace = max_max_nonspace;
+    for (index = 0; index <= max_max_nonspace; index++) {
+      if (all_gap_stats.pile_count (index) > max)
+        max = all_gap_stats.pile_count (index);
+      if ((index > row->kern_size) &&
+      (all_gap_stats.pile_count (index) < 0.1 * max)) {
+        row->max_nonspace = index;
+        break;
+      }
+    }
+  }
+
+  /* Yet another algorithm - simpler this time - just choose a fraction of the
+  threshold to space range */
+
+  if ((tosp_fuzzy_sp_fraction > 0) &&
+    (row->space_size > row->space_threshold))
+    row->min_space = std::max(row->min_space,
+      static_cast<int32_t>(ceil (row->space_threshold +
+      tosp_fuzzy_sp_fraction *
+      (row->space_size -
+      row->space_threshold))));
+
+  /* Ensure that ANY space less than some multiplier times the kern size is
+  fuzzy.  In tables there is a risk of erroneously setting a small space size
+  when there are no real spaces. Sometimes tables have text squashed into
+  columns so that the kn->sp ratio is small anyway - this means that we can't
+  use this to force a wider separation - hence we rely on context to join any
+  dubious breaks. */
+
+  if ((tosp_table_fuzzy_kn_sp_ratio > 0) &&
+    (suspected_table || tosp_fuzzy_limit_all))
+    row->min_space = std::max(row->min_space,
+      static_cast<int32_t>(ceil (tosp_table_fuzzy_kn_sp_ratio *
+      row->kern_size)));
+
+  if ((tosp_fuzzy_kn_fraction > 0) && (row->kern_size < row->space_threshold)) {
+    row->max_nonspace = static_cast<int32_t>(floor (0.5 + row->kern_size +
+      tosp_fuzzy_kn_fraction *
+      (row->space_threshold -
+      row->kern_size)));
+  }
+  if (row->max_nonspace > row->space_threshold) {
+    // Don't be silly
+    row->max_nonspace = row->space_threshold;
+  }
+
+  if (tosp_debug_level > 5)
+    tprintf
+      ("B:%d R:%d L:%d-- Kn:%d Sp:%d Thr:%d -- Kn:%3.2f (%d) Thr:%d (%d) Sp:%3.2f\n",
+      block_idx, row_idx, row_length, block_non_space_gap_width,
+      block_space_gap_width, real_space_threshold, row->kern_size,
+      row->max_nonspace, row->space_threshold, row->min_space,
+      row->space_size);
+  if (tosp_debug_level > 10)
+    tprintf("row->kern_size = %3.2f, row->space_size = %3.2f, "
+            "row->space_threshold = %d\n",
+            row->kern_size, row->space_size, row->space_threshold);
+}
+
+void Textord::old_to_method(
+    TO_ROW *row,
+    STATS *all_gap_stats,
+    STATS *space_gap_stats,
+    STATS *small_gap_stats,
+    int16_t block_space_gap_width,     //estimate for block
+    int16_t block_non_space_gap_width  //estimate for block
+                            ) {
+  /* First, estimate row space size */
+  /* Old to condition was > 2 */
+  if (space_gap_stats->get_total () >= tosp_enough_space_samples_for_median) {
+  //Adequate samples
+    /* Set space size to median of spaces BUT limits it if it seems wildly out */
+    row->space_size = space_gap_stats->median ();
+    if (row->space_size > block_space_gap_width * 1.5) {
+      if (tosp_old_to_bug_fix)
+        row->space_size = block_space_gap_width * 1.5;
+      else
+                                 //BUG??? should be *1.5
+        row->space_size = block_space_gap_width;
+    }
+    if (row->space_size < (block_non_space_gap_width * 2) + 1)
+      row->space_size = (block_non_space_gap_width * 2) + 1;
+  }
+                                 //Only 1 or 2 samples
+  else if (space_gap_stats->get_total () >= 1) {
+                                 //hence mean not median
+    row->space_size = space_gap_stats->mean ();
+    if (row->space_size > block_space_gap_width * 1.5) {
+      if (tosp_old_to_bug_fix)
+        row->space_size = block_space_gap_width * 1.5;
+      else
+                                 //BUG??? should be *1.5
+        row->space_size = block_space_gap_width;
+    }
+    if (row->space_size < (block_non_space_gap_width * 3) + 1)
+      row->space_size = (block_non_space_gap_width * 3) + 1;
+  }
+  else {
+                                 //Use block default
+    row->space_size = block_space_gap_width;
+  }
+
+  /* Next, estimate row kern size */
+  if ((tosp_only_small_gaps_for_kern) &&
+    (small_gap_stats->get_total () > tosp_redo_kern_limit))
+    row->kern_size = small_gap_stats->median ();
+  else if (all_gap_stats->get_total () > tosp_redo_kern_limit)
+    row->kern_size = all_gap_stats->median ();
+  else                          //old TO -SAME FOR ALL ROWS
+    row->kern_size = block_non_space_gap_width;
+
+  /* Finally, estimate row space threshold */
+  if (tosp_threshold_bias2 > 0) {
+    row->space_threshold =
+        int32_t (floor (0.5 + row->kern_size +
+                      tosp_threshold_bias2 * (row->space_size -
+                                              row->kern_size)));
+  } else {
+    /*
+      NOTE old text ord uses (space_size + kern_size + 1)/2  as the threshold
+    and holds this in a float. The use is with a >= test
+    NEW textord uses an integer threshold and a > test
+    It comes to the same thing.
+      (Though there is a difference in that old textor has integer space_size
+      and kern_size.)
+    */
+    row->space_threshold =
+        int32_t (floor ((row->space_size + row->kern_size) / 2));
+  }
+
+  // Apply the same logic and ratios as in row_spacing_stats to
+  // restrict relative values of the row's space_size, kern_size, and
+  // space_threshold
+  if (tosp_old_to_constrain_sp_kn && tosp_sanity_method == 1 &&
+      ((row->space_size <
+        tosp_min_sane_kn_sp * std::max(row->kern_size, 2.5f)) ||
+       ((row->space_size - row->kern_size) <
+        tosp_silly_kn_sp_gap * row->xheight))) {
+    if (row->kern_size > 2.5)
+      row->kern_size = row->space_size / tosp_min_sane_kn_sp;
+    row->space_threshold = int32_t (floor ((row->space_size + row->kern_size) /
+                                         tosp_old_sp_kn_th_factor));
+  }
+}
+
+
+/*************************************************************************
+ * isolated_row_stats()
+ * Set values for min_space, max_non_space based on row stats only
+ *************************************************************************/
+bool Textord::isolated_row_stats(TO_ROW* row,
+                                 GAPMAP* gapmap,
+                                 STATS* all_gap_stats,
+                                 bool suspected_table,
+                                 int16_t block_idx,
+                                 int16_t row_idx) {
+  float kern_estimate;
+  float crude_threshold_estimate;
+  int16_t small_gaps_count;
+  int16_t total;
+  //iterator
+  BLOBNBOX_IT blob_it = row->blob_list ();
+  STATS cert_space_gap_stats (0, MAXSPACING);
+  STATS all_space_gap_stats (0, MAXSPACING);
+  STATS small_gap_stats (0, MAXSPACING);
+  TBOX blob_box;
+  TBOX prev_blob_box;
+  int16_t gap_width;
+  int32_t end_of_row;
+  int32_t row_length;
+
+  kern_estimate = all_gap_stats->median ();
+  crude_threshold_estimate = std::max(tosp_init_guess_kn_mult * kern_estimate,
+    tosp_init_guess_xht_mult * row->xheight);
+  small_gaps_count = stats_count_under (all_gap_stats,
+    static_cast<int16_t>(ceil (crude_threshold_estimate)));
+  total = all_gap_stats->get_total ();
+
+  if ((total <= tosp_redo_kern_limit) ||
+    ((small_gaps_count / static_cast<float>(total)) < tosp_enough_small_gaps) ||
+  (total - small_gaps_count < 1)) {
+    if (tosp_debug_level > 5)
+      tprintf("B:%d R:%d -- Can't do isolated row stats.\n", block_idx,
+              row_idx);
+    return false;
+  }
+  blob_it.set_to_list (row->blob_list ());
+  blob_it.mark_cycle_pt ();
+  end_of_row = blob_it.data_relative (-1)->bounding_box ().right ();
+  if (tosp_use_pre_chopping)
+    blob_box = box_next_pre_chopped (&blob_it);
+  else if (tosp_stats_use_xht_gaps)
+    blob_box = reduced_box_next (row, &blob_it);
+  else
+    blob_box = box_next (&blob_it);
+  row_length = end_of_row - blob_box.left ();
+  prev_blob_box = blob_box;
+  while (!blob_it.cycled_list ()) {
+    if (tosp_use_pre_chopping)
+      blob_box = box_next_pre_chopped (&blob_it);
+    else if (tosp_stats_use_xht_gaps)
+      blob_box = reduced_box_next (row, &blob_it);
+    else
+      blob_box = box_next (&blob_it);
+    int16_t left = prev_blob_box.right();
+    int16_t right = blob_box.left();
+    gap_width = right - left;
+    if (!ignore_big_gap(row, row_length, gapmap, left, right) &&
+    (gap_width > crude_threshold_estimate)) {
+      if ((gap_width > tosp_fuzzy_space_factor2 * row->xheight) ||
+        ((gap_width > tosp_fuzzy_space_factor1 * row->xheight) &&
+        (!tosp_narrow_blobs_not_cert ||
+        (!narrow_blob (row, prev_blob_box) &&
+        !narrow_blob (row, blob_box)))) ||
+        (wide_blob (row, prev_blob_box) && wide_blob (row, blob_box)))
+        cert_space_gap_stats.add (gap_width, 1);
+      all_space_gap_stats.add (gap_width, 1);
+    }
+    if (gap_width < crude_threshold_estimate)
+      small_gap_stats.add (gap_width, 1);
+
+    prev_blob_box = blob_box;
+  }
+  if (cert_space_gap_stats.get_total () >=
+    tosp_enough_space_samples_for_median)
+                                 //median
+    row->space_size = cert_space_gap_stats.median ();
+  else if (suspected_table && (cert_space_gap_stats.get_total () > 0))
+                                 //to avoid spaced
+    row->space_size = cert_space_gap_stats.mean ();
+  //      1's in tables
+  else if (all_space_gap_stats.get_total () >=
+    tosp_enough_space_samples_for_median)
+                                 //median
+    row->space_size = all_space_gap_stats.median ();
+  else
+    row->space_size = all_space_gap_stats.mean ();
+
+  if (tosp_only_small_gaps_for_kern)
+    row->kern_size = small_gap_stats.median ();
+  else
+    row->kern_size = all_gap_stats->median ();
+  row->space_threshold =
+    int32_t (floor ((row->space_size + row->kern_size) / 2));
+  /* Sanity check */
+  if ((row->kern_size >= row->space_threshold) ||
+    (row->space_threshold >= row->space_size) ||
+  (row->space_threshold <= 0)) {
+    if (tosp_debug_level > 5)
+      tprintf ("B:%d R:%d -- Isolated row stats SANITY FAILURE: %f %d %f\n",
+        block_idx, row_idx,
+        row->kern_size, row->space_threshold, row->space_size);
+    row->kern_size = 0.0f;
+    row->space_threshold = 0;
+    row->space_size = 0.0f;
+    return false;
+  }
+
+  if (tosp_debug_level > 5)
+    tprintf ("B:%d R:%d -- Isolated row stats: %f %d %f\n",
+      block_idx, row_idx,
+      row->kern_size, row->space_threshold, row->space_size);
+  return true;
+}
+
+int16_t Textord::stats_count_under(STATS *stats, int16_t threshold) {
+  int16_t index;
+  int16_t total = 0;
+
+  for (index = 0; index < threshold; index++)
+    total += stats->pile_count (index);
+  return total;
+}
+
+
+/*************************************************************************
+ * improve_row_threshold()
+ *    Try to recognise a "normal line" -
+ *           > 25 gaps
+ *     &&    space > 3 * kn  && space > 10
+ *              (I.e. reasonably large space and kn:sp ratio)
+ *     &&    > 3/4 # gaps < kn + (sp - kn)/3
+ *              (I.e. most gaps are well away from space estimate)
+ *     &&    a gap of max(3, (sp - kn) / 3) empty histogram positions is found
+ *           somewhere in the histogram between kn and sp
+ *     THEN set the threshold and fuzzy limits to this gap - ie NO fuzzies
+ *          NO!!!!! the bristol line has "11" with a gap of 12 between the 1's!!!
+ *          try moving the default threshold to within this band but leave the
+ *          fuzzy limit calculation as at present.
+ *************************************************************************/
+void Textord::improve_row_threshold(TO_ROW *row, STATS *all_gap_stats) {
+  float sp = row->space_size;
+  float kn = row->kern_size;
+  int16_t reqd_zero_width = 0;
+  int16_t zero_width = 0;
+  int16_t zero_start = 0;
+  int16_t index = 0;
+
+  if (tosp_debug_level > 10)
+    tprintf ("Improve row threshold 0");
+  if ((all_gap_stats->get_total () <= 25) ||
+    (sp <= 10) ||
+    (sp <= 3 * kn) ||
+    (stats_count_under (all_gap_stats,
+    static_cast<int16_t>(ceil (kn + (sp - kn) / 3 + 0.5))) <
+    (0.75 * all_gap_stats->get_total ())))
+    return;
+  if (tosp_debug_level > 10)
+    tprintf (" 1");
+  /*
+  Look for the first region of all 0's in the histogram which is wider than
+  max(3, (sp - kn) / 3) and starts between kn and sp. If found, and current
+  threshold is not within it, move the threshold so that is is just inside it.
+  */
+  reqd_zero_width = static_cast<int16_t>(floor ((sp - kn) / 3 + 0.5));
+  if (reqd_zero_width < 3)
+    reqd_zero_width = 3;
+
+  for (index = int16_t (ceil (kn)); index < int16_t (floor (sp)); index++) {
+    if (all_gap_stats->pile_count (index) == 0) {
+      if (zero_width == 0)
+        zero_start = index;
+      zero_width++;
+    }
+    else {
+      if (zero_width >= reqd_zero_width)
+        break;
+      else {
+        zero_width = 0;
+      }
+    }
+  }
+  index--;
+  if (tosp_debug_level > 10)
+    tprintf (" reqd_z_width: %d found %d 0's, starting %d; thresh: %d/n",
+      reqd_zero_width, zero_width, zero_start, row->space_threshold);
+  if ((zero_width < reqd_zero_width) ||
+    ((row->space_threshold >= zero_start) &&
+    (row->space_threshold <= index)))
+    return;
+  if (tosp_debug_level > 10)
+    tprintf (" 2");
+  if (row->space_threshold < zero_start) {
+    if (tosp_debug_level > 5)
+      tprintf
+        ("Improve row kn:%5.2f sp:%5.2f 0's: %d -> %d  thresh:%d -> %d\n",
+        kn, sp, zero_start, index, row->space_threshold, zero_start);
+    row->space_threshold = zero_start;
+  }
+  if (row->space_threshold > index) {
+    if (tosp_debug_level > 5)
+      tprintf
+        ("Improve row kn:%5.2f sp:%5.2f 0's: %d -> %d  thresh:%d -> %d\n",
+        kn, sp, zero_start, index, row->space_threshold, index);
+    row->space_threshold = index;
+  }
+}
+
+
+/**********************************************************************
+ * make_prop_words
+ *
+ * Convert a TO_ROW to a ROW.
+ **********************************************************************/
+ROW *Textord::make_prop_words(
+    TO_ROW *row,     // row to make
+    FCOORD rotation  // for drawing
+                              ) {
+  bool bol;                     // start of line
+  /* prev_ values are for start of word being built. non prev_ values are for
+  the gap between the word being built and the next one. */
+  bool prev_fuzzy_sp;           // probably space
+  bool prev_fuzzy_non;          // probably not
+  uint8_t prev_blanks;             // in front of word
+  bool fuzzy_sp = false;        // probably space
+  bool fuzzy_non = false;       // probably not
+  uint8_t blanks = 0;              // in front of word
+  bool prev_gap_was_a_space = false;
+  bool break_at_next_gap = false;
+  ROW *real_row;                 // output row
+  C_OUTLINE_IT cout_it;
+  C_BLOB_LIST cblobs;
+  C_BLOB_IT cblob_it = &cblobs;
+  WERD_LIST words;
+  WERD *word;                    // new word
+  int32_t next_rep_char_word_right = INT32_MAX;
+  float repetition_spacing;      // gap between repetitions
+  int32_t xstarts[2];              // row ends
+  int32_t prev_x;                  // end of prev blob
+  BLOBNBOX *bblob;               // current blob
+  TBOX blob_box;                 // bounding box
+  BLOBNBOX_IT box_it;            // iterator
+  TBOX prev_blob_box;
+  TBOX next_blob_box;
+  int16_t prev_gap = INT16_MAX;
+  int16_t current_gap = INT16_MAX;
+  int16_t next_gap = INT16_MAX;
+  int16_t prev_within_xht_gap = INT16_MAX;
+  int16_t current_within_xht_gap = INT16_MAX;
+  int16_t next_within_xht_gap = INT16_MAX;
+  int16_t word_count = 0;
+
+  // repeated char words
+  WERD_IT rep_char_it(&(row->rep_words));
+  if (!rep_char_it.empty ()) {
+    next_rep_char_word_right =
+      rep_char_it.data ()->bounding_box ().right ();
+  }
+
+  prev_x = -INT16_MAX;
+  cblob_it.set_to_list (&cblobs);
+  box_it.set_to_list (row->blob_list ());
+  // new words
+  WERD_IT word_it(&words);
+  bol = true;
+  prev_blanks = 0;
+  prev_fuzzy_sp = false;
+  prev_fuzzy_non = false;
+  if (!box_it.empty ()) {
+    xstarts[0] = box_it.data ()->bounding_box ().left ();
+    if (xstarts[0] > next_rep_char_word_right) {
+      /* We need to insert a repeated char word at the start of the row */
+      word = rep_char_it.extract ();
+      word_it.add_after_then_move (word);
+      /* Set spaces before repeated char word */
+      word->set_flag (W_BOL, true);
+      bol = false;
+      word->set_blanks (0);
+                                 //NO uncertainty
+      word->set_flag (W_FUZZY_SP, false);
+      word->set_flag (W_FUZZY_NON, false);
+      xstarts[0] = word->bounding_box ().left ();
+      /* Set spaces after repeated char word (and leave current word set) */
+      repetition_spacing = find_mean_blob_spacing (word);
+      current_gap = box_it.data ()->bounding_box ().left () -
+        next_rep_char_word_right;
+      current_within_xht_gap = current_gap;
+      if (current_gap > tosp_rep_space * repetition_spacing) {
+        prev_blanks = static_cast<uint8_t>(floor (current_gap / row->space_size));
+        if (prev_blanks < 1)
+          prev_blanks = 1;
+      }
+      else
+        prev_blanks = 0;
+      if (tosp_debug_level > 5)
+        tprintf ("Repch wd at BOL(%d, %d). rep spacing %5.2f;  Rgap:%d  ",
+          box_it.data ()->bounding_box ().left (),
+          box_it.data ()->bounding_box ().bottom (),
+          repetition_spacing, current_gap);
+      prev_fuzzy_sp = false;
+      prev_fuzzy_non = false;
+      if (rep_char_it.empty ()) {
+        next_rep_char_word_right = INT32_MAX;
+      }
+      else {
+        rep_char_it.forward ();
+        next_rep_char_word_right =
+          rep_char_it.data ()->bounding_box ().right ();
+      }
+    }
+
+    peek_at_next_gap(row,
+                     box_it,
+                     next_blob_box,
+                     next_gap,
+                     next_within_xht_gap);
+    do {
+      bblob = box_it.data ();
+      blob_box = bblob->bounding_box ();
+      if (bblob->joined_to_prev ()) {
+        if (bblob->cblob () != nullptr) {
+          cout_it.set_to_list (cblob_it.data ()->out_list ());
+          cout_it.move_to_last ();
+          cout_it.add_list_after (bblob->cblob ()->out_list ());
+          delete bblob->cblob ();
+        }
+      } else {
+        if (bblob->cblob() != nullptr)
+          cblob_it.add_after_then_move (bblob->cblob ());
+        prev_x = blob_box.right ();
+      }
+      box_it.forward ();         //next one
+      bblob = box_it.data ();
+      blob_box = bblob->bounding_box ();
+
+      if (!bblob->joined_to_prev() && bblob->cblob() != nullptr) {
+        /* Real Blob - not multiple outlines or pre-chopped */
+        prev_gap = current_gap;
+        prev_within_xht_gap = current_within_xht_gap;
+        prev_blob_box = next_blob_box;
+        current_gap = next_gap;
+        current_within_xht_gap = next_within_xht_gap;
+        peek_at_next_gap(row,
+                         box_it,
+                         next_blob_box,
+                         next_gap,
+                         next_within_xht_gap);
+
+        int16_t prev_gap_arg = prev_gap;
+        int16_t next_gap_arg = next_gap;
+        if (tosp_only_use_xht_gaps) {
+          prev_gap_arg = prev_within_xht_gap;
+          next_gap_arg = next_within_xht_gap;
+        }
+        // Decide if a word-break should be inserted
+        if (blob_box.left () > next_rep_char_word_right ||
+            make_a_word_break(row, blob_box, prev_gap_arg, prev_blob_box,
+                              current_gap, current_within_xht_gap,
+                              next_blob_box, next_gap_arg,
+                              blanks, fuzzy_sp, fuzzy_non,
+                              prev_gap_was_a_space,
+                              break_at_next_gap) ||
+            box_it.at_first()) {
+          /* Form a new word out of the blobs collected */
+          word = new WERD (&cblobs, prev_blanks, nullptr);
+          word_count++;
+          word_it.add_after_then_move (word);
+          if (bol) {
+            word->set_flag (W_BOL, true);
+            bol = false;
+          }
+          if (prev_fuzzy_sp)
+                                 //probably space
+            word->set_flag (W_FUZZY_SP, true);
+          else if (prev_fuzzy_non)
+            word->set_flag (W_FUZZY_NON, true);
+          //probably not
+
+          if (blob_box.left () > next_rep_char_word_right) {
+            /* We need to insert a repeated char word */
+            word = rep_char_it.extract ();
+            word_it.add_after_then_move (word);
+
+            /* Set spaces before repeated char word */
+            repetition_spacing = find_mean_blob_spacing (word);
+            current_gap = word->bounding_box ().left () - prev_x;
+            current_within_xht_gap = current_gap;
+            if (current_gap > tosp_rep_space * repetition_spacing) {
+              blanks =
+                static_cast<uint8_t>(floor (current_gap / row->space_size));
+              if (blanks < 1)
+                blanks = 1;
+            }
+            else
+              blanks = 0;
+            if (tosp_debug_level > 5)
+              tprintf
+                ("Repch wd (%d,%d) rep gap %5.2f;  Lgap:%d (%d blanks);",
+                word->bounding_box ().left (),
+                word->bounding_box ().bottom (),
+                repetition_spacing, current_gap, blanks);
+            word->set_blanks (blanks);
+                                 //NO uncertainty
+            word->set_flag (W_FUZZY_SP, false);
+            word->set_flag (W_FUZZY_NON, false);
+
+            /* Set spaces after repeated char word (and leave current word set) */
+            current_gap =
+              blob_box.left () - next_rep_char_word_right;
+            if (current_gap > tosp_rep_space * repetition_spacing) {
+              blanks = static_cast<uint8_t>(current_gap / row->space_size);
+              if (blanks < 1)
+                blanks = 1;
+            }
+            else
+              blanks = 0;
+            if (tosp_debug_level > 5)
+              tprintf (" Rgap:%d (%d blanks)\n",
+                current_gap, blanks);
+            fuzzy_sp = false;
+            fuzzy_non = false;
+
+            if (rep_char_it.empty ()) {
+              next_rep_char_word_right = INT32_MAX;
+            }
+            else {
+              rep_char_it.forward ();
+              next_rep_char_word_right =
+                rep_char_it.data ()->bounding_box ().right ();
+            }
+          }
+
+          if (box_it.at_first () && rep_char_it.empty ()) {
+                                 //at end of line
+            word->set_flag (W_EOL, true);
+            xstarts[1] = prev_x;
+          }
+          else {
+            prev_blanks = blanks;
+            prev_fuzzy_sp = fuzzy_sp;
+            prev_fuzzy_non = fuzzy_non;
+          }
+        }
+      }
+    }
+    while (!box_it.at_first ()); //until back at start
+
+    /* Insert any further repeated char words */
+    while (!rep_char_it.empty ()) {
+      word = rep_char_it.extract ();
+      word_it.add_after_then_move (word);
+
+      /* Set spaces before repeated char word */
+      repetition_spacing = find_mean_blob_spacing (word);
+      current_gap = word->bounding_box ().left () - prev_x;
+      if (current_gap > tosp_rep_space * repetition_spacing) {
+        blanks = static_cast<uint8_t>(floor (current_gap / row->space_size));
+        if (blanks < 1)
+          blanks = 1;
+      }
+      else
+        blanks = 0;
+      if (tosp_debug_level > 5)
+        tprintf(
+            "Repch wd at EOL (%d,%d). rep spacing %5.2f; Lgap:%d (%d blanks)\n",
+            word->bounding_box().left(), word->bounding_box().bottom(),
+            repetition_spacing, current_gap, blanks);
+      word->set_blanks (blanks);
+                                 //NO uncertainty
+      word->set_flag (W_FUZZY_SP, false);
+      word->set_flag (W_FUZZY_NON, false);
+      prev_x = word->bounding_box ().right ();
+      if (rep_char_it.empty ()) {
+                                 //at end of line
+        word->set_flag (W_EOL, true);
+        xstarts[1] = prev_x;
+      }
+      else {
+        rep_char_it.forward ();
+      }
+    }
+    real_row = new ROW (row,
+      static_cast<int16_t>(row->kern_size), static_cast<int16_t>(row->space_size));
+    word_it.set_to_list (real_row->word_list ());
+                                 //put words in row
+    word_it.add_list_after (&words);
+    real_row->recalc_bounding_box ();
+
+    if (tosp_debug_level > 4) {
+      tprintf ("Row: Made %d words in row ((%d,%d)(%d,%d))\n",
+        word_count,
+        real_row->bounding_box ().left (),
+        real_row->bounding_box ().bottom (),
+        real_row->bounding_box ().right (),
+        real_row->bounding_box ().top ());
+    }
+    return real_row;
+  }
+  return nullptr;
+}
+
+/**********************************************************************
+ * make_blob_words
+ *
+ * Converts words into blobs so that each blob is a single character.
+ *  Used for chopper test.
+ **********************************************************************/
+ROW *Textord::make_blob_words(
+    TO_ROW *row,     // row to make
+    FCOORD rotation  // for drawing
+                              ) {
+  bool bol;                      // start of line
+  ROW *real_row;                 // output row
+  C_OUTLINE_IT cout_it;
+  C_BLOB_LIST cblobs;
+  C_BLOB_IT cblob_it = &cblobs;
+  WERD_LIST words;
+  WERD *word;                    // new word
+  BLOBNBOX *bblob;               // current blob
+  TBOX blob_box;                 // bounding box
+  BLOBNBOX_IT box_it;            // iterator
+  int16_t word_count = 0;
+
+  cblob_it.set_to_list(&cblobs);
+  box_it.set_to_list(row->blob_list());
+  // new words
+  WERD_IT word_it(&words);
+  bol = true;
+  if (!box_it.empty()) {
+
+    do {
+      bblob = box_it.data();
+      blob_box = bblob->bounding_box();
+      if (bblob->joined_to_prev()) {
+        if (bblob->cblob() != nullptr) {
+          cout_it.set_to_list(cblob_it.data()->out_list());
+          cout_it.move_to_last();
+          cout_it.add_list_after(bblob->cblob()->out_list());
+          delete bblob->cblob();
+        }
+      } else {
+        if (bblob->cblob() != nullptr)
+          cblob_it.add_after_then_move(bblob->cblob());
+      }
+      box_it.forward();         // next one
+      bblob = box_it.data();
+      blob_box = bblob->bounding_box();
+
+      if (!bblob->joined_to_prev() && !cblobs.empty()) {
+        word = new WERD(&cblobs, 1, nullptr);
+        word_count++;
+        word_it.add_after_then_move(word);
+        if (bol) {
+          word->set_flag(W_BOL, true);
+          bol = false;
+        }
+        if (box_it.at_first()) { // at end of line
+          word->set_flag(W_EOL, true);
+        }
+      }
+    }
+    while (!box_it.at_first()); // until back at start
+    /* Setup the row with created words. */
+    real_row = new ROW(row, static_cast<int16_t>(row->kern_size), static_cast<int16_t>(row->space_size));
+    word_it.set_to_list(real_row->word_list());
+                                 //put words in row
+    word_it.add_list_after(&words);
+    real_row->recalc_bounding_box();
+    if (tosp_debug_level > 4) {
+      tprintf ("Row:Made %d words in row ((%d,%d)(%d,%d))\n",
+        word_count,
+        real_row->bounding_box().left(),
+        real_row->bounding_box().bottom(),
+        real_row->bounding_box().right(),
+        real_row->bounding_box().top());
+    }
+    return real_row;
+  }
+  return nullptr;
+}
+
+bool Textord::make_a_word_break(
+        TO_ROW* row,   // row being made
+        TBOX blob_box, // for next_blob // how many blanks?
+        int16_t prev_gap,
+        TBOX prev_blob_box,
+        int16_t real_current_gap,
+        int16_t within_xht_current_gap,
+        TBOX next_blob_box,
+        int16_t next_gap,
+        uint8_t& blanks,
+        bool& fuzzy_sp,
+        bool& fuzzy_non,
+        bool& prev_gap_was_a_space,
+        bool& break_at_next_gap) {
+  bool space;
+  int16_t current_gap;
+  float fuzzy_sp_to_kn_limit;
+
+  if (break_at_next_gap) {
+    break_at_next_gap = false;
+    return true;
+  }
+  /* Inhibit using the reduced gap if
+    The kerning is large - chars are not kerned and reducing "f"s can cause
+    erroneous blanks
+  OR  The real gap is less than 0
+  OR  The real gap is less than the kerning estimate
+  */
+  if ((row->kern_size > tosp_large_kerning * row->xheight) ||
+      ((tosp_dont_fool_with_small_kerns >= 0) &&
+       (real_current_gap < tosp_dont_fool_with_small_kerns * row->kern_size)))
+                                 //Ignore the difference
+    within_xht_current_gap = real_current_gap;
+
+  if (tosp_use_xht_gaps && tosp_only_use_xht_gaps)
+    current_gap = within_xht_current_gap;
+  else
+    current_gap = real_current_gap;
+
+  if (tosp_old_to_method) {
+                                 //Boring old method
+    space = current_gap > row->max_nonspace;
+    if (space && (current_gap < INT16_MAX)) {
+      if (current_gap < row->min_space) {
+        if (current_gap > row->space_threshold) {
+          blanks = 1;
+          fuzzy_sp = true;
+          fuzzy_non = false;
+        }
+        else {
+          blanks = 0;
+          fuzzy_sp = false;
+          fuzzy_non = true;
+        }
+      }
+      else {
+        blanks = static_cast<uint8_t>(current_gap / row->space_size);
+        if (blanks < 1)
+          blanks = 1;
+        fuzzy_sp = false;
+        fuzzy_non = false;
+      }
+    }
+    return space;
+  }
+  else {
+  /* New exciting heuristic method */
+    if (prev_blob_box.null_box ())  // Beginning of row
+      prev_gap_was_a_space = true;
+
+                                 //Default as old TO
+    space = current_gap > row->space_threshold;
+
+    /* Set defaults for the word break in case we find one.  Currently there are
+    no fuzzy spaces. Depending on the reliability of the different heuristics
+    we may need to set PARTICULAR spaces to fuzzy or not. The values will ONLY
+    be used if the function returns true - ie the word is to be broken.
+    */
+    int num_blanks = current_gap;
+    if (row->space_size > 1.0f)
+      num_blanks = IntCastRounded(current_gap / row->space_size);
+    blanks = static_cast<uint8_t>(ClipToRange<int>(num_blanks, 1, UINT8_MAX));
+    fuzzy_sp = false;
+    fuzzy_non = false;
+    /*
+    If xht measure causes gap to flip one of the 3 thresholds act accordingly -
+    despite any other heuristics - the MINIMUM action is to pass a fuzzy kern to
+    context.
+    */
+    if (tosp_use_xht_gaps &&
+      (real_current_gap <= row->max_nonspace) &&
+    (within_xht_current_gap > row->max_nonspace)) {
+      space = true;
+      fuzzy_non = true;
+#ifndef GRAPHICS_DISABLED
+      mark_gap (blob_box, 20,
+        prev_gap, prev_blob_box.width (),
+        current_gap, next_blob_box.width (), next_gap);
+#endif
+    }
+    else if (tosp_use_xht_gaps &&
+      (real_current_gap <= row->space_threshold) &&
+    (within_xht_current_gap > row->space_threshold)) {
+      space = true;
+      if (tosp_flip_fuzz_kn_to_sp)
+        fuzzy_sp = true;
+      else
+        fuzzy_non = true;
+#ifndef GRAPHICS_DISABLED
+      mark_gap (blob_box, 21,
+        prev_gap, prev_blob_box.width (),
+        current_gap, next_blob_box.width (), next_gap);
+#endif
+    }
+    else if (tosp_use_xht_gaps &&
+      (real_current_gap < row->min_space) &&
+    (within_xht_current_gap >= row->min_space)) {
+      space = true;
+#ifndef GRAPHICS_DISABLED
+      mark_gap (blob_box, 22,
+        prev_gap, prev_blob_box.width (),
+        current_gap, next_blob_box.width (), next_gap);
+#endif
+    }
+    else if (tosp_force_wordbreak_on_punct &&
+             !suspected_punct_blob(row, prev_blob_box) &&
+             suspected_punct_blob(row, blob_box)) {
+      break_at_next_gap = true;
+    }
+    /* Now continue with normal heuristics */
+    else if ((current_gap < row->min_space) &&
+    (current_gap > row->space_threshold)) {
+      /* Heuristics to turn dubious spaces to kerns */
+      if (tosp_pass_wide_fuzz_sp_to_context > 0)
+        fuzzy_sp_to_kn_limit = row->kern_size +
+          tosp_pass_wide_fuzz_sp_to_context *
+          (row->space_size - row->kern_size);
+      else
+        fuzzy_sp_to_kn_limit = 99999.0f;
+
+      /* If current gap is significantly smaller than the previous space the other
+      side of a narrow blob then this gap is a kern. */
+      if ((prev_blob_box.width () > 0) &&
+        narrow_blob (row, prev_blob_box) &&
+        prev_gap_was_a_space &&
+      (current_gap <= tosp_gap_factor * prev_gap)) {
+        if ((tosp_all_flips_fuzzy) ||
+        (current_gap > fuzzy_sp_to_kn_limit)) {
+          if (tosp_flip_fuzz_sp_to_kn)
+            fuzzy_non = true;
+          else
+            fuzzy_sp = true;
+        }
+        else
+          space = false;
+#ifndef GRAPHICS_DISABLED
+        mark_gap (blob_box, 1,
+          prev_gap, prev_blob_box.width (),
+          current_gap, next_blob_box.width (), next_gap);
+#endif
+      }
+      /* If current gap not much bigger than the previous kern the other side of a
+      narrow blob then this gap is a kern as well */
+      else if ((prev_blob_box.width () > 0) &&
+        narrow_blob (row, prev_blob_box) &&
+        !prev_gap_was_a_space &&
+      (current_gap * tosp_gap_factor <= prev_gap)) {
+        if ((tosp_all_flips_fuzzy) ||
+        (current_gap > fuzzy_sp_to_kn_limit)) {
+          if (tosp_flip_fuzz_sp_to_kn)
+            fuzzy_non = true;
+          else
+            fuzzy_sp = true;
+        }
+        else
+          space = false;
+#ifndef GRAPHICS_DISABLED
+        mark_gap (blob_box, 2,
+          prev_gap, prev_blob_box.width (),
+          current_gap, next_blob_box.width (), next_gap);
+#endif
+      }
+      else if ((next_blob_box.width () > 0) &&
+        narrow_blob (row, next_blob_box) &&
+        (next_gap > row->space_threshold) &&
+      (current_gap <= tosp_gap_factor * next_gap)) {
+        if ((tosp_all_flips_fuzzy) ||
+        (current_gap > fuzzy_sp_to_kn_limit)) {
+          if (tosp_flip_fuzz_sp_to_kn)
+            fuzzy_non = true;
+          else
+            fuzzy_sp = true;
+        }
+        else
+          space = false;
+#ifndef GRAPHICS_DISABLED
+        mark_gap (blob_box, 3,
+          prev_gap, prev_blob_box.width (),
+          current_gap, next_blob_box.width (), next_gap);
+#endif
+      }
+      else if ((next_blob_box.width () > 0) &&
+        narrow_blob (row, next_blob_box) &&
+        (next_gap <= row->space_threshold) &&
+      (current_gap * tosp_gap_factor <= next_gap)) {
+        if ((tosp_all_flips_fuzzy) ||
+        (current_gap > fuzzy_sp_to_kn_limit)) {
+          if (tosp_flip_fuzz_sp_to_kn)
+            fuzzy_non = true;
+          else
+            fuzzy_sp = true;
+        }
+        else
+          space = false;
+#ifndef GRAPHICS_DISABLED
+        mark_gap (blob_box, 4,
+          prev_gap, prev_blob_box.width (),
+          current_gap, next_blob_box.width (), next_gap);
+#endif
+      }
+      else if ((((next_blob_box.width () > 0) &&
+        narrow_blob (row, next_blob_box)) ||
+        ((prev_blob_box.width () > 0) &&
+      narrow_blob (row, prev_blob_box)))) {
+        fuzzy_sp = true;
+#ifndef GRAPHICS_DISABLED
+        mark_gap (blob_box, 6,
+          prev_gap, prev_blob_box.width (),
+          current_gap, next_blob_box.width (), next_gap);
+#endif
+      }
+    }
+    else if ((current_gap > row->max_nonspace) &&
+             (current_gap <= row->space_threshold)) {
+
+      /* Heuristics to turn dubious kerns to spaces */
+      /* TRIED THIS BUT IT MADE THINGS WORSE
+          if (prev_gap == INT16_MAX)
+            prev_gap = 0;  // start of row
+          if (next_gap == INT16_MAX)
+            next_gap = 0;  // end of row
+      */
+      if ((prev_blob_box.width () > 0) &&
+        (next_blob_box.width () > 0) &&
+        (current_gap >=
+        tosp_kern_gap_factor1 * std::max(prev_gap, next_gap)) &&
+        wide_blob (row, prev_blob_box) &&
+      wide_blob (row, next_blob_box)) {
+
+        space = true;
+        /*
+        tosp_flip_caution is an attempt to stop the default changing in cases
+        where there is a large difference between the kern and space estimates.
+          See problem in 'chiefs' where "have" gets split in the quotation.
+        */
+        if ((tosp_flip_fuzz_kn_to_sp) &&
+          ((tosp_flip_caution <= 0) ||
+          (tosp_flip_caution * row->kern_size > row->space_size)))
+          fuzzy_sp = true;
+        else
+          fuzzy_non = true;
+#ifndef GRAPHICS_DISABLED
+        mark_gap (blob_box, 7,
+          prev_gap, prev_blob_box.width (),
+          current_gap, next_blob_box.width (), next_gap);
+#endif
+      } else if (prev_blob_box.width() > 0 &&
+                 next_blob_box.width() > 0 &&
+                 current_gap > 5 &&  // Rule 9 handles small gap, big ratio.
+                 current_gap >=
+                   tosp_kern_gap_factor2 * std::max(prev_gap, next_gap) &&
+                 !(narrow_blob(row, prev_blob_box) ||
+                   suspected_punct_blob(row, prev_blob_box)) &&
+                 !(narrow_blob(row, next_blob_box) ||
+                   suspected_punct_blob(row, next_blob_box))) {
+        space = true;
+        fuzzy_non = true;
+#ifndef GRAPHICS_DISABLED
+        mark_gap (blob_box, 8,
+          prev_gap, prev_blob_box.width (),
+          current_gap, next_blob_box.width (), next_gap);
+#endif
+      }
+      else if ((tosp_kern_gap_factor3 > 0) &&
+               (prev_blob_box.width () > 0) &&
+               (next_blob_box.width () > 0) &&
+               (current_gap >= tosp_kern_gap_factor3 * std::max(prev_gap, next_gap)) &&
+               (!tosp_rule_9_test_punct ||
+                (!suspected_punct_blob (row, prev_blob_box) &&
+                 !suspected_punct_blob (row, next_blob_box)))) {
+        space = true;
+        fuzzy_non = true;
+#ifndef GRAPHICS_DISABLED
+        mark_gap (blob_box, 9,
+          prev_gap, prev_blob_box.width (),
+          current_gap, next_blob_box.width (), next_gap);
+#endif
+      }
+    }
+    if (tosp_debug_level > 10)
+      tprintf("word break = %d current_gap = %d, prev_gap = %d, "
+              "next_gap = %d\n", space ? 1 : 0, current_gap,
+              prev_gap, next_gap);
+    prev_gap_was_a_space = space && !(fuzzy_non);
+    return space;
+  }
+}
+
+bool Textord::narrow_blob(TO_ROW* row, TBOX blob_box) {
+  bool result;
+  result = ((blob_box.width () <= tosp_narrow_fraction * row->xheight) ||
+    ((static_cast<float>(blob_box.width ()) / blob_box.height ()) <=
+    tosp_narrow_aspect_ratio));
+  return result;
+}
+
+bool Textord::wide_blob(TO_ROW* row, TBOX blob_box) {
+  bool result;
+  if (tosp_wide_fraction > 0) {
+    if (tosp_wide_aspect_ratio > 0)
+      result = ((blob_box.width () >= tosp_wide_fraction * row->xheight) &&
+        ((static_cast<float>(blob_box.width ()) / blob_box.height ()) >
+        tosp_wide_aspect_ratio));
+    else
+      result = (blob_box.width () >= tosp_wide_fraction * row->xheight);
+  }
+  else
+    result = !narrow_blob (row, blob_box);
+  return result;
+}
+
+bool Textord::suspected_punct_blob(TO_ROW* row, TBOX box) {
+  bool result;
+  float baseline;
+  float blob_x_centre;
+  /* Find baseline of centre of blob */
+  blob_x_centre = (box.right () + box.left ()) / 2.0;
+  baseline = row->baseline.y (blob_x_centre);
+
+  result = (box.height () <= 0.66 * row->xheight) ||
+           (box.top () < baseline + row->xheight / 2.0) ||
+           (box.bottom () > baseline + row->xheight / 2.0);
+  return result;
+}
+
+
+void Textord::peek_at_next_gap(TO_ROW *row,
+                               BLOBNBOX_IT box_it,
+                               TBOX &next_blob_box,
+                               int16_t &next_gap,
+                               int16_t &next_within_xht_gap) {
+  TBOX next_reduced_blob_box;
+  TBOX bit_beyond;
+  BLOBNBOX_IT reduced_box_it = box_it;
+
+  next_blob_box = box_next (&box_it);
+  next_reduced_blob_box = reduced_box_next (row, &reduced_box_it);
+  if (box_it.at_first ()) {
+    next_gap = INT16_MAX;
+    next_within_xht_gap = INT16_MAX;
+  }
+  else {
+    bit_beyond = box_it.data ()->bounding_box ();
+    next_gap = bit_beyond.left () - next_blob_box.right ();
+    bit_beyond = reduced_box_next (row, &reduced_box_it);
+    next_within_xht_gap =
+      bit_beyond.left () - next_reduced_blob_box.right ();
+  }
+}
+
+
+#ifndef GRAPHICS_DISABLED
+void Textord::mark_gap(
+    TBOX blob,   // blob following gap
+    int16_t rule,  // heuristic id
+    int16_t prev_gap,
+    int16_t prev_blob_width,
+    int16_t current_gap,
+    int16_t next_blob_width,
+    int16_t next_gap) {
+  ScrollView::Color col;                    //of ellipse marking flipped gap
+
+  switch (rule) {
+    case 1:
+      col = ScrollView::RED;
+      break;
+    case 2:
+      col = ScrollView::CYAN;
+      break;
+    case 3:
+      col = ScrollView::GREEN;
+      break;
+    case 4:
+      col = ScrollView::BLACK;
+      break;
+    case 5:
+      col = ScrollView::MAGENTA;
+      break;
+    case 6:
+      col = ScrollView::BLUE;
+      break;
+
+    case 7:
+      col = ScrollView::WHITE;
+      break;
+    case 8:
+      col = ScrollView::YELLOW;
+      break;
+    case 9:
+      col = ScrollView::BLACK;
+      break;
+
+    case 20:
+      col = ScrollView::CYAN;
+      break;
+    case 21:
+      col = ScrollView::GREEN;
+      break;
+    case 22:
+      col = ScrollView::MAGENTA;
+      break;
+    default:
+      col = ScrollView::BLACK;
+  }
+  if (textord_show_initial_words) {
+    to_win->Pen(col);
+  /*  if (rule < 20)
+      //interior_style(to_win, INT_SOLID, false);
+    else
+      //interior_style(to_win, INT_HOLLOW, true);*/
+                                 //x radius
+    to_win->Ellipse (current_gap / 2.0f,
+      blob.height () / 2.0f,     //y radius
+                                 //x centre
+      blob.left () - current_gap / 2.0f,
+                                 //y centre
+      blob.bottom () + blob.height () / 2.0f);
+ }
+  if (tosp_debug_level > 5)
+    tprintf("  (%d,%d) Sp<->Kn Rule %d %d %d %d %d %d\n",
+            blob.left() - current_gap / 2, blob.bottom(), rule, prev_gap,
+            prev_blob_width, current_gap, next_blob_width, next_gap);
+}
+#endif
+
+float Textord::find_mean_blob_spacing(WERD *word) {
+  C_BLOB_IT cblob_it;
+  TBOX blob_box;
+  int32_t gap_sum = 0;
+  int16_t gap_count = 0;
+  int16_t prev_right;
+
+  cblob_it.set_to_list (word->cblob_list ());
+  if (!cblob_it.empty ()) {
+    cblob_it.mark_cycle_pt ();
+    prev_right = cblob_it.data ()->bounding_box ().right ();
+    //first blob
+    cblob_it.forward ();
+    for (; !cblob_it.cycled_list (); cblob_it.forward ()) {
+      blob_box = cblob_it.data ()->bounding_box ();
+      gap_sum += blob_box.left () - prev_right;
+      gap_count++;
+      prev_right = blob_box.right ();
+    }
+  }
+  if (gap_count > 0)
+    return (gap_sum / static_cast<float>(gap_count));
+  else
+    return 0.0f;
+}
+
+
+bool Textord::ignore_big_gap(TO_ROW* row,
+                             int32_t row_length,
+                             GAPMAP* gapmap,
+                             int16_t left,
+                             int16_t right) {
+  int16_t gap = right - left + 1;
+
+  if (tosp_ignore_big_gaps > 999) return false;  // Don't ignore
+  if (tosp_ignore_big_gaps > 0)
+    return (gap > tosp_ignore_big_gaps * row->xheight);
+  if (gap > tosp_ignore_very_big_gaps * row->xheight)
+    return true;
+  if (tosp_ignore_big_gaps == 0) {
+    if ((gap > 2.1 * row->xheight) && (row_length > 20 * row->xheight))
+      return true;
+    if ((gap > 1.75 * row->xheight) &&
+      ((row_length > 35 * row->xheight) ||
+      gapmap->table_gap (left, right)))
+      return true;
+  }
+  else {
+  /* ONLY time gaps < 3.0 * xht are ignored is when they are part of a table */
+    if ((gap > gapmap_big_gaps * row->xheight) &&
+      gapmap->table_gap (left, right))
+      return true;
+  }
+  return false;
+}
+
+/**********************************************************************
+ * reduced_box_next
+ *
+ * Compute the bounding box of this blob with merging of x overlaps
+ * but no pre-chopping.
+ * Then move the iterator on to the start of the next blob.
+ * DON'T reduce the box for small things - eg punctuation.
+ **********************************************************************/
+TBOX Textord::reduced_box_next(
+    TO_ROW *row,     // current row
+    BLOBNBOX_IT *it  // iterator to blobds
+                               ) {
+  BLOBNBOX *blob;                //current blob
+  BLOBNBOX *head_blob;           //place to store box
+  TBOX full_box;                  //full blob boundg box
+  TBOX reduced_box;               //box of significant part
+  int16_t left_above_xht;          //ABOVE xht left limit
+  int16_t new_left_above_xht;      //ABOVE xht left limit
+
+  blob = it->data ();
+  if (blob->red_box_set ()) {
+    reduced_box = blob->reduced_box ();
+    do {
+      it->forward();
+      blob = it->data();
+    }
+    while (blob->cblob() == nullptr || blob->joined_to_prev());
+    return reduced_box;
+  }
+  head_blob = blob;
+  full_box = blob->bounding_box ();
+  reduced_box = reduced_box_for_blob (blob, row, &left_above_xht);
+  do {
+    it->forward ();
+    blob = it->data ();
+    if (blob->cblob() == nullptr)
+                                 //was pre-chopped
+      full_box += blob->bounding_box ();
+    else if (blob->joined_to_prev ()) {
+      reduced_box +=
+        reduced_box_for_blob(blob, row, &new_left_above_xht);
+      left_above_xht = std::min(left_above_xht, new_left_above_xht);
+    }
+  }
+                                 //until next real blob
+  while (blob->cblob() == nullptr || blob->joined_to_prev());
+
+  if ((reduced_box.width () > 0) &&
+    ((reduced_box.left () + tosp_near_lh_edge * reduced_box.width ())
+  < left_above_xht) && (reduced_box.height () > 0.7 * row->xheight)) {
+#ifndef GRAPHICS_DISABLED
+    if (textord_show_initial_words)
+      reduced_box.plot (to_win, ScrollView::YELLOW, ScrollView::YELLOW);
+#endif
+  }
+  else
+    reduced_box = full_box;
+  head_blob->set_reduced_box (reduced_box);
+  return reduced_box;
+}
+
+
+/*************************************************************************
+ * reduced_box_for_blob()
+ * Find box for blob which is the same height and y position as the whole blob,
+ * but whose left limit is the left most position of the blob ABOVE the
+ * baseline and whose right limit is the right most position of the blob BELOW
+ * the xheight.
+ *
+ *
+ * !!!!!!! WONT WORK WITH LARGE UPPER CASE CHARS - T F V W - look at examples on
+ *         "home".  Perhaps we need something which say if the width ABOVE the
+ *         xht alone includes the whole of the reduced width, then use the full
+ *         blob box - Might still fail on italic F
+ *
+ *         Alternatively we could be a little less severe and only reduce the
+ *         left and right edges by half the difference between the full box and
+ *         the reduced box.
+ *
+ * NOTE that we need to rotate all the coordinates as
+ * find_blob_limits finds the y min and max within a specified x band
+ *************************************************************************/
+TBOX Textord::reduced_box_for_blob(
+    BLOBNBOX *blob,
+    TO_ROW *row,
+    int16_t *left_above_xht) {
+  float baseline;
+  float blob_x_centre;
+  float left_limit;
+  float right_limit;
+  float junk;
+  TBOX blob_box;
+
+  /* Find baseline of centre of blob */
+
+  blob_box = blob->bounding_box ();
+  blob_x_centre = (blob_box.left () + blob_box.right ()) / 2.0;
+  baseline = row->baseline.y (blob_x_centre);
+
+  /*
+  Find LH limit of blob ABOVE the xht. This is so that we can detect certain
+  caps ht chars which should NOT have their box reduced: T, Y, V, W etc
+  */
+  left_limit = static_cast<float>(INT32_MAX);
+  junk = static_cast<float>(-INT32_MAX);
+  find_cblob_hlimits(blob->cblob(), (baseline + 1.1 * row->xheight),
+                     static_cast<float>(INT16_MAX), left_limit, junk);
+  if (left_limit > junk)
+    *left_above_xht = INT16_MAX; //No area above xht
+  else
+    *left_above_xht = static_cast<int16_t>(floor (left_limit));
+  /*
+  Find reduced LH limit of blob - the left extent of the region ABOVE the
+  baseline.
+  */
+  left_limit = static_cast<float>(INT32_MAX);
+  junk = static_cast<float>(-INT32_MAX);
+  find_cblob_hlimits(blob->cblob(), baseline, static_cast<float>(INT16_MAX),
+                     left_limit, junk);
+
+  if (left_limit > junk)
+    return TBOX ();               //no area within xht so return empty box
+  /*
+  Find reduced RH limit of blob - the right extent of the region BELOW the xht.
+  */
+  junk = static_cast<float>(INT32_MAX);
+  right_limit = static_cast<float>(-INT32_MAX);
+  find_cblob_hlimits(blob->cblob(), static_cast<float>(-INT16_MAX),
+                     (baseline + row->xheight), junk, right_limit);
+  if (junk > right_limit)
+    return TBOX ();               //no area within xht so return empty box
+
+  return TBOX (ICOORD (static_cast<int16_t>(floor (left_limit)), blob_box.bottom ()),
+    ICOORD (static_cast<int16_t>(ceil (right_limit)), blob_box.top ()));
+}
+}  // namespace tesseract
diff --git a/tesseract/src/textord/tovars.cpp b/tesseract/src/textord/tovars.cpp
new file mode 100644
index 00000000..22e838f0
--- /dev/null
+++ b/tesseract/src/textord/tovars.cpp
@@ -0,0 +1,85 @@
+/**********************************************************************
+ * File:        tovars.cpp  (Formerly to_vars.c)
+ * Description: Variables used by textord.
+ * Author:    Ray Smith
+ * Created:   Tue Aug 24 16:55:02 BST 1993
+ *
+ * (C) Copyright 1993, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#include "tovars.h"
+#include "params.h"
+
+namespace tesseract {
+
+BOOL_VAR (textord_show_initial_words, false, "Display separate words");
+BOOL_VAR (textord_show_new_words, false, "Display separate words");
+BOOL_VAR (textord_show_fixed_words, false,
+"Display forced fixed pitch words");
+BOOL_VAR (textord_blocksall_fixed, false, "Moan about prop blocks");
+BOOL_VAR (textord_blocksall_prop, false,
+"Moan about fixed pitch blocks");
+BOOL_VAR (textord_blocksall_testing, false, "Dump stats when moaning");
+BOOL_VAR (textord_test_mode, false, "Do current test");
+INT_VAR (textord_dotmatrix_gap, 3,
+"Max pixel gap for broken pixed pitch");
+INT_VAR (textord_debug_block, 0, "Block to do debug on");
+INT_VAR (textord_pitch_range, 2, "Max range test on pitch");
+double_VAR (textord_wordstats_smooth_factor, 0.05,
+"Smoothing gap stats");
+double_VAR (textord_width_smooth_factor, 0.10,
+"Smoothing width stats");
+double_VAR (textord_words_width_ile, 0.4,
+"Ile of blob widths for space est");
+double_VAR (textord_words_maxspace, 4.0, "Multiple of xheight");
+double_VAR (textord_words_default_maxspace, 3.5,
+"Max believable third space");
+double_VAR (textord_words_default_minspace, 0.6,
+"Fraction of xheight");
+double_VAR (textord_words_min_minspace, 0.3, "Fraction of xheight");
+double_VAR (textord_words_default_nonspace, 0.2,
+"Fraction of xheight");
+double_VAR(textord_words_initial_lower, 0.25,
+                  "Max initial cluster size");
+double_VAR (textord_words_initial_upper, 0.15,
+"Min initial cluster spacing");
+double_VAR (textord_words_minlarge, 0.75,
+"Fraction of valid gaps needed");
+double_VAR (textord_words_pitchsd_threshold, 0.040,
+"Pitch sync threshold");
+double_VAR (textord_words_def_fixed, 0.016,
+"Threshold for definite fixed");
+double_VAR (textord_words_def_prop, 0.090,
+"Threshold for definite prop");
+INT_VAR (textord_words_veto_power, 5,
+"Rows required to outvote a veto");
+double_VAR (textord_pitch_rowsimilarity, 0.08,
+"Fraction of xheight for sameness");
+BOOL_VAR (textord_pitch_scalebigwords, false,
+"Scale scores on big words");
+double_VAR(words_initial_lower, 0.5, "Max initial cluster size");
+double_VAR (words_initial_upper, 0.15, "Min initial cluster spacing");
+double_VAR (words_default_prop_nonspace, 0.25, "Fraction of xheight");
+double_VAR (words_default_fixed_space, 0.75, "Fraction of xheight");
+double_VAR (words_default_fixed_limit, 0.6, "Allowed size variance");
+double_VAR (textord_words_definite_spread, 0.30,
+"Non-fuzzy spacing region");
+double_VAR (textord_spacesize_ratiofp, 2.8,
+"Min ratio space/nonspace");
+double_VAR (textord_spacesize_ratioprop, 2.0,
+"Min ratio space/nonspace");
+double_VAR (textord_fpiqr_ratio, 1.5, "Pitch IQR/Gap IQR threshold");
+double_VAR (textord_max_pitch_iqr, 0.20, "Xh fraction noise in pitch");
+double_VAR (textord_fp_min_width, 0.5, "Min width of decent blobs");
+
+} // namespace tesseract
diff --git a/tesseract/src/textord/tovars.h b/tesseract/src/textord/tovars.h
new file mode 100644
index 00000000..79d297a4
--- /dev/null
+++ b/tesseract/src/textord/tovars.h
@@ -0,0 +1,94 @@
+/**********************************************************************
+ * File:        tovars.h  (Formerly to_vars.h)
+ * Description: Variables used by textord.
+ * Author:    Ray Smith
+ * Created:   Tue Aug 24 16:55:02 BST 1993
+ *
+ * (C) Copyright 1993, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef           TOVARS_H
+#define           TOVARS_H
+
+#include          "params.h"
+
+namespace tesseract {
+
+extern BOOL_VAR_H (textord_show_initial_words, false,
+"Display separate words");
+extern BOOL_VAR_H (textord_show_new_words, false, "Display separate words");
+extern BOOL_VAR_H (textord_show_fixed_words, false,
+"Display forced fixed pitch words");
+extern BOOL_VAR_H (textord_blocksall_fixed, false, "Moan about prop blocks");
+extern BOOL_VAR_H (textord_blocksall_prop, false,
+"Moan about fixed pitch blocks");
+extern BOOL_VAR_H (textord_blocksall_testing, false,
+"Dump stats when moaning");
+extern BOOL_VAR_H (textord_test_mode, false, "Do current test");
+extern INT_VAR_H (textord_dotmatrix_gap, 3,
+"Max pixel gap for broken pixed pitch");
+extern INT_VAR_H (textord_debug_block, 0, "Block to do debug on");
+extern INT_VAR_H (textord_pitch_range, 2, "Max range test on pitch");
+extern double_VAR_H (textord_wordstats_smooth_factor, 0.05,
+"Smoothing gap stats");
+extern double_VAR_H (textord_width_smooth_factor, 0.10,
+"Smoothing width stats");
+extern double_VAR_H (textord_words_width_ile, 0.4,
+"Ile of blob widths for space est");
+extern double_VAR_H (textord_words_maxspace, 4.0, "Multiple of xheight");
+extern double_VAR_H (textord_words_default_maxspace, 3.5,
+"Max believable third space");
+extern double_VAR_H (textord_words_default_minspace, 0.6,
+"Fraction of xheight");
+extern double_VAR_H (textord_words_min_minspace, 0.3, "Fraction of xheight");
+extern double_VAR_H (textord_words_default_nonspace, 0.2,
+"Fraction of xheight");
+extern double_VAR_H(textord_words_initial_lower, 0.25,
+                    "Max initial cluster size");
+extern double_VAR_H (textord_words_initial_upper, 0.15,
+"Min initial cluster spacing");
+extern double_VAR_H (textord_words_minlarge, 0.75,
+"Fraction of valid gaps needed");
+extern double_VAR_H (textord_words_pitchsd_threshold, 0.025,
+"Pitch sync threshold");
+extern double_VAR_H (textord_words_def_fixed, 0.01,
+"Threshold for definite fixed");
+extern double_VAR_H (textord_words_def_prop, 0.06,
+"Threshold for definite prop");
+extern INT_VAR_H (textord_words_veto_power, 5,
+"Rows required to outvote a veto");
+extern double_VAR_H (textord_pitch_rowsimilarity, 0.08,
+"Fraction of xheight for sameness");
+extern BOOL_VAR_H (textord_pitch_scalebigwords, false,
+"Scale scores on big words");
+extern double_VAR_H(words_initial_lower, 0.5, "Max initial cluster size");
+extern double_VAR_H (words_initial_upper, 0.15,
+"Min initial cluster spacing");
+extern double_VAR_H (words_default_prop_nonspace, 0.25,
+"Fraction of xheight");
+extern double_VAR_H (words_default_fixed_space, 0.75, "Fraction of xheight");
+extern double_VAR_H (words_default_fixed_limit, 0.6, "Allowed size variance");
+extern double_VAR_H (textord_words_definite_spread, 0.30,
+"Non-fuzzy spacing region");
+extern double_VAR_H (textord_spacesize_ratiofp, 2.8,
+"Min ratio space/nonspace");
+extern double_VAR_H (textord_spacesize_ratioprop, 2.0,
+"Min ratio space/nonspace");
+extern double_VAR_H (textord_fpiqr_ratio, 1.5, "Pitch IQR/Gap IQR threshold");
+extern double_VAR_H (textord_max_pitch_iqr, 0.20,
+"Xh fraction noise in pitch");
+extern double_VAR_H (textord_fp_min_width, 0.5, "Min width of decent blobs");
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/textord/underlin.cpp b/tesseract/src/textord/underlin.cpp
new file mode 100644
index 00000000..6a732f27
--- /dev/null
+++ b/tesseract/src/textord/underlin.cpp
@@ -0,0 +1,278 @@
+/**********************************************************************
+ * File:        underlin.cpp  (Formerly undrline.c)
+ * Description: Code to chop blobs apart from underlines.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1994, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#include "underlin.h"
+
+namespace tesseract {
+
+double_VAR (textord_underline_offset, 0.1, "Fraction of x to ignore");
+BOOL_VAR (textord_restore_underlines, true, "Chop underlines & put back");
+
+/**********************************************************************
+ * restore_underlined_blobs
+ *
+ * Find underlined blobs and put them back in the row.
+ **********************************************************************/
+
+void restore_underlined_blobs(                 //get chop points
+                              TO_BLOCK *block  //block to do
+                             ) {
+  int16_t chop_coord;              //chop boundary
+  TBOX blob_box;                  //of underline
+  BLOBNBOX *u_line;              //underline bit
+  TO_ROW *row;                   //best row for blob
+  ICOORDELT_LIST chop_cells;     //blobs to cut out
+                                 //real underlines
+  BLOBNBOX_LIST residual_underlines;
+  C_OUTLINE_LIST left_coutlines;
+  C_OUTLINE_LIST right_coutlines;
+  ICOORDELT_IT cell_it = &chop_cells;
+                                 //under lines
+  BLOBNBOX_IT under_it = &block->underlines;
+  BLOBNBOX_IT ru_it = &residual_underlines;
+
+  if (block->get_rows()->empty())
+    return;  // Don't crash if there are no rows.
+  for (under_it.mark_cycle_pt (); !under_it.cycled_list ();
+  under_it.forward ()) {
+    u_line = under_it.extract ();
+    blob_box = u_line->bounding_box ();
+    row = most_overlapping_row (block->get_rows (), u_line);
+    if (row == nullptr)
+      return;  // Don't crash if there is no row.
+    find_underlined_blobs (u_line, &row->baseline, row->xheight,
+      row->xheight * textord_underline_offset,
+      &chop_cells);
+    cell_it.set_to_list (&chop_cells);
+    for (cell_it.mark_cycle_pt (); !cell_it.cycled_list ();
+    cell_it.forward ()) {
+      chop_coord = cell_it.data ()->x ();
+      if (cell_it.data ()->y () - chop_coord > textord_fp_chop_error + 1) {
+        split_to_blob (u_line, chop_coord,
+          textord_fp_chop_error + 0.5,
+          &left_coutlines,
+          &right_coutlines);
+        if (!left_coutlines.empty()) {
+          ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines)));
+        }
+        chop_coord = cell_it.data ()->y ();
+        split_to_blob(nullptr, chop_coord, textord_fp_chop_error + 0.5,
+                      &left_coutlines, &right_coutlines);
+        if (!left_coutlines.empty()) {
+          row->insert_blob(new BLOBNBOX(new C_BLOB(&left_coutlines)));
+        }
+        u_line = nullptr;           //no more blobs to add
+      }
+      delete cell_it.extract();
+    }
+    if (!right_coutlines.empty ()) {
+      split_to_blob(nullptr, blob_box.right(), textord_fp_chop_error + 0.5,
+                    &left_coutlines, &right_coutlines);
+      if (!left_coutlines.empty())
+        ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines)));
+    }
+    if (u_line != nullptr) {
+      delete u_line->cblob();
+      delete u_line;
+    }
+  }
+  if (!ru_it.empty()) {
+    ru_it.move_to_first();
+    for (ru_it.mark_cycle_pt(); !ru_it.cycled_list(); ru_it.forward()) {
+      under_it.add_after_then_move(ru_it.extract());
+    }
+  }
+}
+
+
+/**********************************************************************
+ * most_overlapping_row
+ *
+ * Return the row which most overlaps the blob.
+ **********************************************************************/
+
+TO_ROW *most_overlapping_row(                    //find best row
+                             TO_ROW_LIST *rows,  //list of rows
+                             BLOBNBOX *blob      //blob to place
+                            ) {
+  int16_t x = (blob->bounding_box ().left ()
+    + blob->bounding_box ().right ()) / 2;
+  TO_ROW_IT row_it = rows;       //row iterator
+  TO_ROW *row;                   //current row
+  TO_ROW *best_row;              //output row
+  float overlap;                 //of blob & row
+  float bestover;                //best overlap
+
+  best_row = nullptr;
+  bestover = static_cast<float>(-INT32_MAX);
+  if (row_it.empty ())
+    return nullptr;
+  row = row_it.data ();
+  row_it.mark_cycle_pt ();
+  while (row->baseline.y (x) + row->descdrop > blob->bounding_box ().top ()
+  && !row_it.cycled_list ()) {
+    best_row = row;
+    bestover =
+      blob->bounding_box ().top () - row->baseline.y (x) + row->descdrop;
+    row_it.forward ();
+    row = row_it.data ();
+  }
+  while (row->baseline.y (x) + row->xheight + row->ascrise
+  >= blob->bounding_box ().bottom () && !row_it.cycled_list ()) {
+    overlap = row->baseline.y (x) + row->xheight + row->ascrise;
+    if (blob->bounding_box ().top () < overlap)
+      overlap = blob->bounding_box ().top ();
+    if (blob->bounding_box ().bottom () >
+      row->baseline.y (x) + row->descdrop)
+      overlap -= blob->bounding_box ().bottom ();
+    else
+      overlap -= row->baseline.y (x) + row->descdrop;
+    if (overlap > bestover) {
+      bestover = overlap;
+      best_row = row;
+    }
+    row_it.forward ();
+    row = row_it.data ();
+  }
+  if (bestover < 0
+    && row->baseline.y (x) + row->xheight + row->ascrise
+    - blob->bounding_box ().bottom () > bestover)
+    best_row = row;
+  return best_row;
+}
+
+
+/**********************************************************************
+ * find_underlined_blobs
+ *
+ * Find the start and end coords of blobs in the underline.
+ **********************************************************************/
+
+void find_underlined_blobs(                            //get chop points
+                           BLOBNBOX *u_line,           //underlined unit
+                           QSPLINE *baseline,          //actual baseline
+                           float xheight,              //height of line
+                           float baseline_offset,      //amount to shrinke it
+                           ICOORDELT_LIST *chop_cells  //places to chop
+                          ) {
+  int16_t x, y;                    //sides of blob
+  ICOORD blob_chop;              //sides of blob
+  TBOX blob_box = u_line->bounding_box ();
+                                 //cell iterator
+  ICOORDELT_IT cell_it = chop_cells;
+  STATS upper_proj (blob_box.left (), blob_box.right () + 1);
+  STATS middle_proj (blob_box.left (), blob_box.right () + 1);
+  STATS lower_proj (blob_box.left (), blob_box.right () + 1);
+  C_OUTLINE_IT out_it;           //outlines of blob
+
+  ASSERT_HOST (u_line->cblob () != nullptr);
+
+  out_it.set_to_list (u_line->cblob ()->out_list ());
+  for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
+    vertical_cunderline_projection (out_it.data (),
+      baseline, xheight, baseline_offset,
+      &lower_proj, &middle_proj, &upper_proj);
+  }
+
+  for (x = blob_box.left (); x < blob_box.right (); x++) {
+    if (middle_proj.pile_count (x) > 0) {
+      for (y = x + 1;
+        y < blob_box.right () && middle_proj.pile_count (y) > 0; y++);
+      blob_chop = ICOORD (x, y);
+      cell_it.add_after_then_move (new ICOORDELT (blob_chop));
+      x = y;
+    }
+  }
+}
+
+
+/**********************************************************************
+ * vertical_cunderline_projection
+ *
+ * Compute the vertical projection of a outline from its outlines
+ * and add to the given STATS.
+ **********************************************************************/
+
+void vertical_cunderline_projection(                        //project outlines
+                                    C_OUTLINE *outline,     //outline to project
+                                    QSPLINE *baseline,      //actual baseline
+                                    float xheight,          //height of line
+                                    float baseline_offset,  //amount to shrinke it
+                                    STATS *lower_proj,      //below baseline
+                                    STATS *middle_proj,     //centre region
+                                    STATS *upper_proj       //top region
+                                   ) {
+  ICOORD pos;                    //current point
+  ICOORD step;                   //edge step
+  int16_t lower_y, upper_y;        //region limits
+  int32_t length;                  //of outline
+  int16_t stepindex;               //current step
+  C_OUTLINE_IT out_it = outline->child ();
+
+  pos = outline->start_pos ();
+  length = outline->pathlength ();
+  for (stepindex = 0; stepindex < length; stepindex++) {
+    step = outline->step (stepindex);
+    if (step.x () > 0) {
+      lower_y =
+        static_cast<int16_t>(floor (baseline->y (pos.x ()) + baseline_offset + 0.5));
+      upper_y =
+        static_cast<int16_t>(floor (baseline->y (pos.x ()) + baseline_offset +
+        xheight + 0.5));
+      if (pos.y () >= lower_y) {
+        lower_proj->add (pos.x (), -lower_y);
+        if (pos.y () >= upper_y) {
+          middle_proj->add (pos.x (), lower_y - upper_y);
+          upper_proj->add (pos.x (), upper_y - pos.y ());
+        }
+        else
+          middle_proj->add (pos.x (), lower_y - pos.y ());
+      }
+      else
+        lower_proj->add (pos.x (), -pos.y ());
+    }
+    else if (step.x () < 0) {
+      lower_y =
+        static_cast<int16_t>(floor (baseline->y (pos.x () - 1) + baseline_offset +
+        0.5));
+      upper_y =
+        static_cast<int16_t>(floor (baseline->y (pos.x () - 1) + baseline_offset +
+        xheight + 0.5));
+      if (pos.y () >= lower_y) {
+        lower_proj->add (pos.x () - 1, lower_y);
+        if (pos.y () >= upper_y) {
+          middle_proj->add (pos.x () - 1, upper_y - lower_y);
+          upper_proj->add (pos.x () - 1, pos.y () - upper_y);
+        }
+        else
+          middle_proj->add (pos.x () - 1, pos.y () - lower_y);
+      }
+      else
+        lower_proj->add (pos.x () - 1, pos.y ());
+    }
+    pos += step;
+  }
+
+  for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
+    vertical_cunderline_projection (out_it.data (),
+      baseline, xheight, baseline_offset,
+      lower_proj, middle_proj, upper_proj);
+  }
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/textord/underlin.h b/tesseract/src/textord/underlin.h
new file mode 100644
index 00000000..09be1b40
--- /dev/null
+++ b/tesseract/src/textord/underlin.h
@@ -0,0 +1,56 @@
+/**********************************************************************
+ * File:        underlin.h  (Formerly undrline.h)
+ * Description: Code to chop blobs apart from underlines.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1994, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef           UNDERLIN_H
+#define           UNDERLIN_H
+
+#include          "fpchop.h"
+
+namespace tesseract {
+
+extern double_VAR_H (textord_underline_offset, 0.1,
+"Fraction of x to ignore");
+extern BOOL_VAR_H (textord_restore_underlines, false,
+"Chop underlines & put back");
+void restore_underlined_blobs(                 //get chop points
+                              TO_BLOCK *block  //block to do
+                             );
+TO_ROW *most_overlapping_row(                    //find best row
+                             TO_ROW_LIST *rows,  //list of rows
+                             BLOBNBOX *blob      //blob to place
+                            );
+void find_underlined_blobs(                            //get chop points
+                           BLOBNBOX *u_line,           //underlined unit
+                           QSPLINE *baseline,          //actual baseline
+                           float xheight,              //height of line
+                           float baseline_offset,      //amount to shrinke it
+                           ICOORDELT_LIST *chop_cells  //places to chop
+                          );
+void vertical_cunderline_projection(                        //project outlines
+                                    C_OUTLINE *outline,     //outline to project
+                                    QSPLINE *baseline,      //actual baseline
+                                    float xheight,          //height of line
+                                    float baseline_offset,  //amount to shrinke it
+                                    STATS *lower_proj,      //below baseline
+                                    STATS *middle_proj,     //centre region
+                                    STATS *upper_proj       //top region
+                                   );
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/textord/wordseg.cpp b/tesseract/src/textord/wordseg.cpp
new file mode 100644
index 00000000..d8b5516e
--- /dev/null
+++ b/tesseract/src/textord/wordseg.cpp
@@ -0,0 +1,625 @@
+/**********************************************************************
+ * File:        wordseg.cpp  (Formerly wspace.c)
+ * Description: Code to segment the blobs into words.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+ // Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "wordseg.h"
+
+#include "blobbox.h"
+#include "statistc.h"
+#include "drawtord.h"
+#include "makerow.h"
+#include "pitsync1.h"
+#include "tovars.h"
+#include "topitch.h"
+#include "cjkpitch.h"
+#include "textord.h"
+#include "fpchop.h"
+
+namespace tesseract {
+
+BOOL_VAR(textord_fp_chopping, true, "Do fixed pitch chopping");
+BOOL_VAR(textord_force_make_prop_words, false,
+                "Force proportional word segmentation on all rows");
+BOOL_VAR(textord_chopper_test, false,
+                "Chopper is being tested.");
+
+#define BLOCK_STATS_CLUSTERS  10
+
+
+/**
+ * @name make_single_word
+ *
+ * For each row, arrange the blobs into one word. There is no fixed
+ * pitch detection.
+ */
+
+void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST* real_rows) {
+  TO_ROW_IT to_row_it(rows);
+  ROW_IT row_it(real_rows);
+  for (to_row_it.mark_cycle_pt(); !to_row_it.cycled_list();
+       to_row_it.forward()) {
+    TO_ROW* row = to_row_it.data();
+    // The blobs have to come out of the BLOBNBOX into the C_BLOB_LIST ready
+    // to create the word.
+    C_BLOB_LIST cblobs;
+    C_BLOB_IT cblob_it(&cblobs);
+    BLOBNBOX_IT box_it(row->blob_list());
+    for (;!box_it.empty(); box_it.forward()) {
+      BLOBNBOX* bblob= box_it.extract();
+      if (bblob->joined_to_prev() || (one_blob && !cblob_it.empty())) {
+        if (bblob->cblob() != nullptr) {
+          C_OUTLINE_IT cout_it(cblob_it.data()->out_list());
+          cout_it.move_to_last();
+          cout_it.add_list_after(bblob->cblob()->out_list());
+          delete bblob->cblob();
+        }
+      } else {
+        if (bblob->cblob() != nullptr)
+          cblob_it.add_after_then_move(bblob->cblob());
+      }
+      delete bblob;
+    }
+    // Convert the TO_ROW to a ROW.
+    ROW* real_row = new ROW(row, static_cast<int16_t>(row->kern_size),
+                            static_cast<int16_t>(row->space_size));
+    WERD_IT word_it(real_row->word_list());
+    WERD* word = new WERD(&cblobs, 0, nullptr);
+    word->set_flag(W_BOL, true);
+    word->set_flag(W_EOL, true);
+    word->set_flag(W_DONT_CHOP, one_blob);
+    word_it.add_after_then_move(word);
+    row_it.add_after_then_move(real_row);
+  }
+}
+
+/**
+ * make_words
+ *
+ * Arrange the blobs into words.
+ */
+void make_words(tesseract::Textord *textord,
+                ICOORD page_tr,                // top right
+                float gradient,                // page skew
+                BLOCK_LIST *blocks,            // block list
+                TO_BLOCK_LIST *port_blocks) {  // output list
+  TO_BLOCK_IT block_it;          // iterator
+  TO_BLOCK *block;               // current block
+
+  if (textord->use_cjk_fp_model()) {
+    compute_fixed_pitch_cjk(page_tr, port_blocks);
+  } else {
+    compute_fixed_pitch(page_tr, port_blocks, gradient, FCOORD(0.0f, -1.0f),
+                        !bool(textord_test_landscape));
+  }
+  textord->to_spacing(page_tr, port_blocks);
+  block_it.set_to_list(port_blocks);
+  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
+    block = block_it.data();
+    make_real_words(textord, block, FCOORD(1.0f, 0.0f));
+  }
+}
+
+
+/**
+ * @name set_row_spaces
+ *
+ * Set the min_space and max_nonspace members of the row so that
+ * the blobs can be arranged into words.
+ */
+
+void set_row_spaces(                  //find space sizes
+        TO_BLOCK* block,  //block to do
+        FCOORD rotation,  //for drawing
+        bool testing_on  //correct orientation
+) {
+  TO_ROW *row;                   //current row
+  TO_ROW_IT row_it = block->get_rows ();
+
+  if (row_it.empty ())
+    return;                      //empty block
+  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+    row = row_it.data ();
+    if (row->fixed_pitch == 0) {
+      row->min_space =
+        static_cast<int32_t>(ceil (row->pr_space -
+        (row->pr_space -
+        row->pr_nonsp) * textord_words_definite_spread));
+      row->max_nonspace =
+        static_cast<int32_t>(floor (row->pr_nonsp +
+        (row->pr_space -
+        row->pr_nonsp) * textord_words_definite_spread));
+      if (testing_on && textord_show_initial_words) {
+        tprintf ("Assigning defaults %d non, %d space to row at %g\n",
+          row->max_nonspace, row->min_space, row->intercept ());
+      }
+      row->space_threshold = (row->max_nonspace + row->min_space) / 2;
+      row->space_size = row->pr_space;
+      row->kern_size = row->pr_nonsp;
+    }
+#ifndef GRAPHICS_DISABLED
+    if (textord_show_initial_words && testing_on) {
+      plot_word_decisions (to_win, static_cast<int16_t>(row->fixed_pitch), row);
+    }
+#endif
+  }
+}
+
+
+/**
+ * @name row_words
+ *
+ * Compute the max nonspace and min space for the row.
+ */
+
+int32_t row_words(                  //compute space size
+        TO_BLOCK* block,  //block it came from
+        TO_ROW* row,      //row to operate on
+        int32_t maxwidth,   //max expected space size
+        FCOORD rotation,  //for drawing
+        bool testing_on  //for debug
+) {
+  bool testing_row;             //contains testpt
+  bool prev_valid;              //if decent size
+  int32_t prev_x;                //end of prev blob
+  int32_t cluster_count;         //no of clusters
+  int32_t gap_index;             //which cluster
+  int32_t smooth_factor;         //for smoothing stats
+  BLOBNBOX *blob;                //current blob
+  float lower, upper;            //clustering parameters
+  float gaps[3];                 //gap clusers
+  ICOORD testpt;
+  TBOX blob_box;                  //bounding box
+                                 //iterator
+  BLOBNBOX_IT blob_it = row->blob_list ();
+  STATS gap_stats (0, maxwidth);
+  STATS cluster_stats[4];        //clusters
+
+  testpt = ICOORD (textord_test_x, textord_test_y);
+  smooth_factor =
+    static_cast<int32_t>(block->xheight * textord_wordstats_smooth_factor + 1.5);
+  //      if (testing_on)
+  //              tprintf("Row smooth factor=%d\n",smooth_factor);
+  prev_valid = false;
+  prev_x = -INT32_MAX;
+  testing_row = false;
+  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
+    blob = blob_it.data ();
+    blob_box = blob->bounding_box ();
+    if (blob_box.contains (testpt))
+      testing_row = true;
+    gap_stats.add (blob_box.width (), 1);
+  }
+  gap_stats.clear ();
+  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
+    blob = blob_it.data ();
+    if (!blob->joined_to_prev ()) {
+      blob_box = blob->bounding_box ();
+      if (prev_valid && blob_box.left () - prev_x < maxwidth) {
+        gap_stats.add (blob_box.left () - prev_x, 1);
+      }
+      prev_valid = true;
+      prev_x = blob_box.right ();
+    }
+  }
+  if (gap_stats.get_total () == 0) {
+    row->min_space = 0;          //no evidence
+    row->max_nonspace = 0;
+    return 0;
+  }
+  gap_stats.smooth (smooth_factor);
+  lower = row->xheight * textord_words_initial_lower;
+  upper = row->xheight * textord_words_initial_upper;
+  cluster_count = gap_stats.cluster (lower, upper,
+    textord_spacesize_ratioprop, 3,
+    cluster_stats);
+  while (cluster_count < 2 && ceil (lower) < floor (upper)) {
+                                 //shrink gap
+    upper = (upper * 3 + lower) / 4;
+    lower = (lower * 3 + upper) / 4;
+    cluster_count = gap_stats.cluster (lower, upper,
+      textord_spacesize_ratioprop, 3,
+      cluster_stats);
+  }
+  if (cluster_count < 2) {
+    row->min_space = 0;          //no evidence
+    row->max_nonspace = 0;
+    return 0;
+  }
+  for (gap_index = 0; gap_index < cluster_count; gap_index++)
+    gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
+  //get medians
+  if (cluster_count > 2) {
+    if (testing_on && textord_show_initial_words) {
+      tprintf ("Row at %g has 3 sizes of gap:%g,%g,%g\n",
+        row->intercept (),
+        cluster_stats[1].ile (0.5),
+        cluster_stats[2].ile (0.5), cluster_stats[3].ile (0.5));
+    }
+    lower = gaps[0];
+    if (gaps[1] > lower) {
+      upper = gaps[1];           //prefer most frequent
+      if (upper < block->xheight * textord_words_min_minspace
+      && gaps[2] > gaps[1]) {
+        upper = gaps[2];
+      }
+    }
+    else if (gaps[2] > lower
+      && gaps[2] >= block->xheight * textord_words_min_minspace)
+      upper = gaps[2];
+    else if (lower >= block->xheight * textord_words_min_minspace) {
+      upper = lower;             //not nice
+      lower = gaps[1];
+      if (testing_on && textord_show_initial_words) {
+        tprintf ("Had to switch most common from lower to upper!!\n");
+        gap_stats.print();
+      }
+    }
+    else {
+      row->min_space = 0;        //no evidence
+      row->max_nonspace = 0;
+      return 0;
+    }
+  }
+  else {
+    if (gaps[1] < gaps[0]) {
+      if (testing_on && textord_show_initial_words) {
+        tprintf ("Had to switch most common from lower to upper!!\n");
+        gap_stats.print();
+      }
+      lower = gaps[1];
+      upper = gaps[0];
+    }
+    else {
+      upper = gaps[1];
+      lower = gaps[0];
+    }
+  }
+  if (upper < block->xheight * textord_words_min_minspace) {
+    row->min_space = 0;          //no evidence
+    row->max_nonspace = 0;
+    return 0;
+  }
+  if (upper * 3 < block->min_space * 2 + block->max_nonspace
+  || lower * 3 > block->min_space * 2 + block->max_nonspace) {
+    if (testing_on && textord_show_initial_words) {
+      tprintf ("Disagreement between block and row at %g!!\n",
+        row->intercept ());
+      tprintf ("Lower=%g, upper=%g, Stats:\n", lower, upper);
+      gap_stats.print();
+    }
+  }
+  row->min_space =
+    static_cast<int32_t>(ceil (upper - (upper - lower) * textord_words_definite_spread));
+  row->max_nonspace =
+    static_cast<int32_t>(floor (lower + (upper - lower) * textord_words_definite_spread));
+  row->space_threshold = (row->max_nonspace + row->min_space) / 2;
+  row->space_size = upper;
+  row->kern_size = lower;
+  if (testing_on && textord_show_initial_words) {
+    if (testing_row) {
+      tprintf ("GAP STATS\n");
+      gap_stats.print();
+      tprintf ("SPACE stats\n");
+      cluster_stats[2].print_summary();
+      tprintf ("NONSPACE stats\n");
+      cluster_stats[1].print_summary();
+    }
+    tprintf ("Row at %g has minspace=%d(%g), max_non=%d(%g)\n",
+      row->intercept (), row->min_space, upper,
+      row->max_nonspace, lower);
+  }
+  return cluster_stats[2].get_total ();
+}
+
+
+/**
+ * @name row_words2
+ *
+ * Compute the max nonspace and min space for the row.
+ */
+
+int32_t row_words2(                  //compute space size
+        TO_BLOCK* block,  //block it came from
+        TO_ROW* row,      //row to operate on
+        int32_t maxwidth,   //max expected space size
+        FCOORD rotation,  //for drawing
+        bool testing_on  //for debug
+) {
+  bool prev_valid;              //if decent size
+  bool this_valid;              //current blob big enough
+  int32_t prev_x;                  //end of prev blob
+  int32_t min_width;               //min interesting width
+  int32_t valid_count;             //good gaps
+  int32_t total_count;             //total gaps
+  int32_t cluster_count;           //no of clusters
+  int32_t prev_count;              //previous cluster_count
+  int32_t gap_index;               //which cluster
+  int32_t smooth_factor;           //for smoothing stats
+  BLOBNBOX *blob;                //current blob
+  float lower, upper;            //clustering parameters
+  ICOORD testpt;
+  TBOX blob_box;                  //bounding box
+                                 //iterator
+  BLOBNBOX_IT blob_it = row->blob_list ();
+  STATS gap_stats (0, maxwidth);
+                                 //gap sizes
+  float gaps[BLOCK_STATS_CLUSTERS];
+  STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1];
+  //clusters
+
+  testpt = ICOORD (textord_test_x, textord_test_y);
+  smooth_factor =
+    static_cast<int32_t>(block->xheight * textord_wordstats_smooth_factor + 1.5);
+  //      if (testing_on)
+  //              tprintf("Row smooth factor=%d\n",smooth_factor);
+  prev_valid = false;
+  prev_x = -INT16_MAX;
+  const bool testing_row = false;
+                                 //min blob size
+  min_width = static_cast<int32_t>(block->pr_space);
+  total_count = 0;
+  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
+    blob = blob_it.data ();
+    if (!blob->joined_to_prev ()) {
+      blob_box = blob->bounding_box ();
+      this_valid = blob_box.width () >= min_width;
+      if (this_valid && prev_valid
+      && blob_box.left () - prev_x < maxwidth) {
+        gap_stats.add (blob_box.left () - prev_x, 1);
+      }
+      total_count++;             //count possibles
+      prev_x = blob_box.right ();
+      prev_valid = this_valid;
+    }
+  }
+  valid_count = gap_stats.get_total ();
+  if (valid_count < total_count * textord_words_minlarge) {
+    gap_stats.clear ();
+    prev_x = -INT16_MAX;
+    for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
+    blob_it.forward ()) {
+      blob = blob_it.data ();
+      if (!blob->joined_to_prev ()) {
+        blob_box = blob->bounding_box ();
+        if (blob_box.left () - prev_x < maxwidth) {
+          gap_stats.add (blob_box.left () - prev_x, 1);
+        }
+        prev_x = blob_box.right ();
+      }
+    }
+  }
+  if (gap_stats.get_total () == 0) {
+    row->min_space = 0;          //no evidence
+    row->max_nonspace = 0;
+    return 0;
+  }
+
+  cluster_count = 0;
+  lower = block->xheight * words_initial_lower;
+  upper = block->xheight * words_initial_upper;
+  gap_stats.smooth (smooth_factor);
+  do {
+    prev_count = cluster_count;
+    cluster_count = gap_stats.cluster (lower, upper,
+      textord_spacesize_ratioprop,
+      BLOCK_STATS_CLUSTERS, cluster_stats);
+  }
+  while (cluster_count > prev_count && cluster_count < BLOCK_STATS_CLUSTERS);
+  if (cluster_count < 1) {
+    row->min_space = 0;
+    row->max_nonspace = 0;
+    return 0;
+  }
+  for (gap_index = 0; gap_index < cluster_count; gap_index++)
+    gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
+  //get medians
+  if (testing_on) {
+    tprintf ("cluster_count=%d:", cluster_count);
+    for (gap_index = 0; gap_index < cluster_count; gap_index++)
+      tprintf (" %g(%d)", gaps[gap_index],
+        cluster_stats[gap_index + 1].get_total ());
+    tprintf ("\n");
+  }
+
+  //Try to find proportional non-space and space for row.
+  for (gap_index = 0; gap_index < cluster_count
+    && gaps[gap_index] > block->max_nonspace; gap_index++);
+  if (gap_index < cluster_count)
+    lower = gaps[gap_index];     //most frequent below
+  else {
+    if (testing_on)
+      tprintf ("No cluster below block threshold!, using default=%g\n",
+        block->pr_nonsp);
+    lower = block->pr_nonsp;
+  }
+  for (gap_index = 0; gap_index < cluster_count
+    && gaps[gap_index] <= block->max_nonspace; gap_index++);
+  if (gap_index < cluster_count)
+    upper = gaps[gap_index];     //most frequent above
+  else {
+    if (testing_on)
+      tprintf ("No cluster above block threshold!, using default=%g\n",
+        block->pr_space);
+    upper = block->pr_space;
+  }
+  row->min_space =
+    static_cast<int32_t>(ceil (upper - (upper - lower) * textord_words_definite_spread));
+  row->max_nonspace =
+    static_cast<int32_t>(floor (lower + (upper - lower) * textord_words_definite_spread));
+  row->space_threshold = (row->max_nonspace + row->min_space) / 2;
+  row->space_size = upper;
+  row->kern_size = lower;
+  if (testing_on) {
+    if (testing_row) {
+      tprintf ("GAP STATS\n");
+      gap_stats.print();
+      tprintf ("SPACE stats\n");
+      cluster_stats[2].print_summary();
+      tprintf ("NONSPACE stats\n");
+      cluster_stats[1].print_summary();
+    }
+    tprintf ("Row at %g has minspace=%d(%g), max_non=%d(%g)\n",
+      row->intercept (), row->min_space, upper,
+      row->max_nonspace, lower);
+  }
+  return 1;
+}
+
+
+/**
+ * @name make_real_words
+ *
+ * Convert a TO_BLOCK to a BLOCK.
+ */
+
+void make_real_words(
+                     tesseract::Textord *textord,
+                     TO_BLOCK *block,  //block to do
+                     FCOORD rotation   //for drawing
+                    ) {
+  TO_ROW *row;                   //current row
+  TO_ROW_IT row_it = block->get_rows ();
+  ROW *real_row = nullptr;          //output row
+  ROW_IT real_row_it = block->block->row_list ();
+
+  if (row_it.empty ())
+    return;                      //empty block
+  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+    row = row_it.data ();
+    if (row->blob_list ()->empty () && !row->rep_words.empty ()) {
+      real_row = make_rep_words (row, block);
+    } else if (!row->blob_list()->empty()) {
+      // In a fixed pitch document, some lines may be detected as fixed pitch
+      // while others don't, and will go through different path.
+      // For non-space delimited language like CJK, fixed pitch chop always
+      // leave the entire line as one word.  We can force consistent chopping
+      // with force_make_prop_words flag.
+      POLY_BLOCK* pb = block->block->pdblk.poly_block();
+      if (textord_chopper_test) {
+        real_row = textord->make_blob_words (row, rotation);
+      } else if (textord_force_make_prop_words ||
+                 (pb != nullptr && !pb->IsText()) ||
+                 row->pitch_decision == PITCH_DEF_PROP ||
+                 row->pitch_decision == PITCH_CORR_PROP) {
+        real_row = textord->make_prop_words (row, rotation);
+      } else if (row->pitch_decision == PITCH_DEF_FIXED ||
+                 row->pitch_decision == PITCH_CORR_FIXED) {
+        real_row = fixed_pitch_words (row, rotation);
+      } else {
+        ASSERT_HOST(false);
+      }
+    }
+    if (real_row != nullptr) {
+                                 //put row in block
+      real_row_it.add_after_then_move (real_row);
+    }
+  }
+  block->block->set_stats (block->fixed_pitch == 0, static_cast<int16_t>(block->kern_size),
+    static_cast<int16_t>(block->space_size),
+    static_cast<int16_t>(block->fixed_pitch));
+  block->block->check_pitch ();
+}
+
+
+/**
+ * @name make_rep_words
+ *
+ * Fabricate a real row from only the repeated blob words.
+ * Get the xheight from the block as it may be more meaningful.
+ */
+
+ROW *make_rep_words(                 //make a row
+                    TO_ROW *row,     //row to convert
+                    TO_BLOCK *block  //block it lives in
+                   ) {
+  ROW *real_row;                 //output row
+  TBOX word_box;                  //bounding box
+                                 //iterator
+  WERD_IT word_it = &row->rep_words;
+
+  if (word_it.empty ())
+    return nullptr;
+  word_box = word_it.data ()->bounding_box ();
+  for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ())
+    word_box += word_it.data ()->bounding_box ();
+  row->xheight = block->xheight;
+  real_row = new ROW(row,
+    static_cast<int16_t>(block->kern_size), static_cast<int16_t>(block->space_size));
+  word_it.set_to_list (real_row->word_list ());
+                                 //put words in row
+  word_it.add_list_after (&row->rep_words);
+  real_row->recalc_bounding_box ();
+  return real_row;
+}
+
+
+/**
+ * @name make_real_word
+ *
+ * Construct a WERD from a given number of adjacent entries in a
+ * list of BLOBNBOXs.
+ */
+
+WERD *make_real_word(BLOBNBOX_IT *box_it,  //iterator
+                     int32_t blobcount,      //no of blobs to use
+                     bool bol,            //start of line
+                     uint8_t blanks          //no of blanks
+                    ) {
+  C_OUTLINE_IT cout_it;
+  C_BLOB_LIST cblobs;
+  C_BLOB_IT cblob_it = &cblobs;
+  WERD *word;                    // new word
+  BLOBNBOX *bblob;               // current blob
+  int32_t blobindex;               // in row
+
+  for (blobindex = 0; blobindex < blobcount; blobindex++) {
+    bblob = box_it->extract();
+    if (bblob->joined_to_prev()) {
+      if (bblob->cblob() != nullptr) {
+        cout_it.set_to_list(cblob_it.data()->out_list());
+        cout_it.move_to_last();
+        cout_it.add_list_after(bblob->cblob()->out_list());
+        delete bblob->cblob();
+      }
+    }
+    else {
+      if (bblob->cblob() != nullptr)
+        cblob_it.add_after_then_move(bblob->cblob());
+    }
+    delete bblob;
+    box_it->forward();          // next one
+  }
+
+  if (blanks < 1)
+    blanks = 1;
+
+  word = new WERD(&cblobs, blanks, nullptr);
+
+  if (bol)
+    word->set_flag(W_BOL, true);
+  if (box_it->at_first())
+    word->set_flag(W_EOL, true);  // at end of line
+
+  return word;
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/textord/wordseg.h b/tesseract/src/textord/wordseg.h
new file mode 100644
index 00000000..88e9cfdc
--- /dev/null
+++ b/tesseract/src/textord/wordseg.h
@@ -0,0 +1,78 @@
+/**********************************************************************
+ * File:        wordseg.h  (Formerly wspace.h)
+ * Description: Code to segment the blobs into words.
+ * Author:      Ray Smith
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef           WORDSEG_H
+#define           WORDSEG_H
+
+#include          "params.h"
+#include          "blobbox.h"
+#include          "textord.h"
+
+namespace tesseract {
+class Tesseract;
+
+extern BOOL_VAR_H (textord_fp_chopping, true, "Do fixed pitch chopping");
+extern BOOL_VAR_H(textord_force_make_prop_words, false,
+                  "Force proportional word segmentation on all rows");
+extern BOOL_VAR_H (textord_chopper_test, false,
+                   "Chopper is being tested.");
+
+void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST* real_rows);
+void make_words(tesseract::Textord *textord,
+                ICOORD page_tr,                // top right
+                float gradient,               // page skew
+                BLOCK_LIST *blocks,           // block list
+                TO_BLOCK_LIST *port_blocks);  // output list
+void set_row_spaces(                  //find space sizes
+        TO_BLOCK* block,  //block to do
+        FCOORD rotation,  //for drawing
+        bool testing_on  //correct orientation
+);
+int32_t row_words(                  //compute space size
+        TO_BLOCK* block,  //block it came from
+        TO_ROW* row,      //row to operate on
+        int32_t maxwidth,   //max expected space size
+        FCOORD rotation,  //for drawing
+        bool testing_on  //for debug
+);
+int32_t row_words2(                  //compute space size
+        TO_BLOCK* block,  //block it came from
+        TO_ROW* row,      //row to operate on
+        int32_t maxwidth,   //max expected space size
+        FCOORD rotation,  //for drawing
+        bool testing_on  //for debug
+);
+void make_real_words(
+                     tesseract::Textord *textord,
+                     TO_BLOCK *block,  //block to do
+                     FCOORD rotation   //for drawing
+                    );
+ROW *make_rep_words(                 //make a row
+                    TO_ROW *row,     //row to convert
+                    TO_BLOCK *block  //block it lives in
+                   );
+WERD *make_real_word(                      //make a WERD
+        BLOBNBOX_IT* box_it,  //iterator
+        int32_t blobcount,      //no of blobs to use
+        bool bol,            //start of line
+        uint8_t blanks          //no of blanks
+);
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/textord/workingpartset.cpp b/tesseract/src/textord/workingpartset.cpp
new file mode 100644
index 00000000..97ce70ae
--- /dev/null
+++ b/tesseract/src/textord/workingpartset.cpp
@@ -0,0 +1,144 @@
+///////////////////////////////////////////////////////////////////////
+// File:        workingpartset.cpp
+// Description: Class to hold a working set of partitions of the page
+//              during construction of text/image regions.
+// Author:      Ray Smith
+// Created:     Tue Ocr 28 17:21:01 PDT 2008
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "workingpartset.h"
+#include "colpartition.h"
+
+namespace tesseract {
+
+ELISTIZE(WorkingPartSet)
+
+// Add the partition to this WorkingPartSet. Unrelated partitions are
+// stored in the order in which they are received, but if the partition
+// has a SingletonPartner, make sure that it stays with its partner.
+void WorkingPartSet::AddPartition(ColPartition* part) {
+  ColPartition* partner = part->SingletonPartner(true);
+  if (partner != nullptr) {
+    ASSERT_HOST(partner->SingletonPartner(false) == part);
+  }
+  if (latest_part_ == nullptr || partner == nullptr) {
+    // This partition goes at the end of the list
+    part_it_.move_to_last();
+  } else if (latest_part_->SingletonPartner(false) != part) {
+    // Reposition the iterator to the correct partner, or at the end.
+    for (part_it_.move_to_first(); !part_it_.at_last() &&
+         part_it_.data() != partner;
+         part_it_.forward());
+  }
+  part_it_.add_after_then_move(part);
+  latest_part_ = part;
+}
+
+// Make blocks out of any partitions in this WorkingPartSet, and append
+// them to the end of the blocks list. bleft, tright and resolution give
+// the bounds and resolution of the source image, so that blocks can be
+// made to fit in the bounds.
+// All ColPartitions go in the used_parts list, as they need to be kept
+// around, but are no longer needed.
+void WorkingPartSet::ExtractCompletedBlocks(const ICOORD& bleft,
+                                            const ICOORD& tright,
+                                            int resolution,
+                                            ColPartition_LIST* used_parts,
+                                            BLOCK_LIST* blocks,
+                                            TO_BLOCK_LIST* to_blocks) {
+  MakeBlocks(bleft, tright, resolution, used_parts);
+  BLOCK_IT block_it(blocks);
+  block_it.move_to_last();
+  block_it.add_list_after(&completed_blocks_);
+  TO_BLOCK_IT to_block_it(to_blocks);
+  to_block_it.move_to_last();
+  to_block_it.add_list_after(&to_blocks_);
+}
+
+// Insert the given blocks at the front of the completed_blocks_ list so
+// they can be kept in the correct reading order.
+void WorkingPartSet::InsertCompletedBlocks(BLOCK_LIST* blocks,
+                                           TO_BLOCK_LIST* to_blocks) {
+  BLOCK_IT block_it(&completed_blocks_);
+  block_it.add_list_before(blocks);
+  TO_BLOCK_IT to_block_it(&to_blocks_);
+  to_block_it.add_list_before(to_blocks);
+}
+
+// Make a block using lines parallel to the given vector that fit between
+// the min and max coordinates specified by the ColPartitions.
+// Construct a block from the given list of partitions.
+void WorkingPartSet::MakeBlocks(const ICOORD& bleft, const ICOORD& tright,
+                                int resolution, ColPartition_LIST* used_parts) {
+  part_it_.move_to_first();
+  while (!part_it_.empty()) {
+    // Gather a list of ColPartitions in block_parts that will be split
+    // by linespacing into smaller blocks.
+    ColPartition_LIST block_parts;
+    ColPartition_IT block_it(&block_parts);
+    ColPartition* next_part = nullptr;
+    bool text_block = false;
+    do {
+      ColPartition* part = part_it_.extract();
+      if (part->blob_type() == BRT_UNKNOWN ||
+          (part->IsTextType() && part->type() != PT_TABLE))
+        text_block = true;
+      part->set_working_set(nullptr);
+      part_it_.forward();
+      block_it.add_after_then_move(part);
+      next_part = part->SingletonPartner(false);
+      if (part_it_.empty() || next_part != part_it_.data()) {
+        // Sequences of partitions can get split by titles.
+        next_part = nullptr;
+      }
+      // Merge adjacent blocks that are of the same type and let the
+      // linespacing determine the real boundaries.
+      if (next_part == nullptr && !part_it_.empty()) {
+        ColPartition* next_block_part = part_it_.data();
+        const TBOX& part_box = part->bounding_box();
+        const TBOX& next_box = next_block_part->bounding_box();
+
+        // In addition to the same type, the next box must not be above the
+        // current box, nor (if image) too far below.
+        PolyBlockType type = part->type(), next_type = next_block_part->type();
+        if (ColPartition::TypesSimilar(type, next_type) &&
+            !part->IsLineType() && !next_block_part->IsLineType() &&
+            next_box.bottom() <= part_box.top() &&
+            (text_block || part_box.bottom() <= next_box.top()))
+          next_part = next_block_part;
+      }
+    } while (!part_it_.empty() && next_part != nullptr);
+    if (!text_block) {
+      TO_BLOCK* to_block = ColPartition::MakeBlock(bleft, tright,
+                                                   &block_parts, used_parts);
+      if (to_block != nullptr) {
+        TO_BLOCK_IT to_block_it(&to_blocks_);
+        to_block_it.add_to_end(to_block);
+        BLOCK_IT block_it(&completed_blocks_);
+        block_it.add_to_end(to_block->block);
+      }
+    } else {
+      // Further sub-divide text blocks where linespacing changes.
+      ColPartition::LineSpacingBlocks(bleft, tright, resolution, &block_parts,
+                                      used_parts,
+                                      &completed_blocks_, &to_blocks_);
+    }
+  }
+  part_it_.set_to_list(&part_set_);
+  latest_part_ = nullptr;
+  ASSERT_HOST(completed_blocks_.length() == to_blocks_.length());
+}
+
+}  // namespace tesseract.
diff --git a/tesseract/src/textord/workingpartset.h b/tesseract/src/textord/workingpartset.h
new file mode 100644
index 00000000..6fb342aa
--- /dev/null
+++ b/tesseract/src/textord/workingpartset.h
@@ -0,0 +1,88 @@
+///////////////////////////////////////////////////////////////////////
+// File:        workingpartset.h
+// Description: Class to hold a working set of partitions of the page
+//              during construction of text/image regions.
+// Author:      Ray Smith
+// Created:     Tue Ocr 28 17:21:01 PDT 2008
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TEXTORD_WORKINGPARSET_H_
+#define TESSERACT_TEXTORD_WORKINGPARSET_H_
+
+#include "blobbox.h"       // For TO_BLOCK_LIST and BLOCK_LIST.
+#include "colpartition.h"  // For ColPartition_LIST.
+
+namespace tesseract {
+
+// WorkingPartSet holds a working set of ColPartitions during transformation
+// from the grid-based storage to regions in logical reading order, and is
+// therefore only used during construction of the regions.
+class WorkingPartSet : public ELIST_LINK {
+ public:
+  explicit WorkingPartSet(ColPartition* column)
+    : column_(column), latest_part_(nullptr), part_it_(&part_set_) {
+  }
+
+  // Simple accessors.
+  ColPartition* column() const {
+    return column_;
+  }
+  void set_column(ColPartition* col) {
+    column_ = col;
+  }
+
+  // Add the partition to this WorkingPartSet. Partitions are generally
+  // stored in the order in which they are received, but if the partition
+  // has a SingletonPartner, make sure that it stays with its partner.
+  void AddPartition(ColPartition* part);
+
+  // Make blocks out of any partitions in this WorkingPartSet, and append
+  // them to the end of the blocks list. bleft, tright and resolution give
+  // the bounds and resolution of the source image, so that blocks can be
+  // made to fit in the bounds.
+  // All ColPartitions go in the used_parts list, as they need to be kept
+  // around, but are no longer needed.
+  void ExtractCompletedBlocks(const ICOORD& bleft, const ICOORD& tright,
+                              int resolution, ColPartition_LIST* used_parts,
+                              BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks);
+
+  // Insert the given blocks at the front of the completed_blocks_ list so
+  // they can be kept in the correct reading order.
+  void InsertCompletedBlocks(BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks);
+
+ private:
+  // Convert the part_set_ into blocks, starting a new block at a break
+  // in partnerships, or a change in linespacing (for text).
+  void MakeBlocks(const ICOORD& bleft, const ICOORD& tright, int resolution,
+                  ColPartition_LIST* used_parts);
+
+  // The column that this working set applies to. Used by the caller.
+  ColPartition* column_;
+  // The most recently added partition.
+  ColPartition* latest_part_;
+  // All the partitions in the block that is currently under construction.
+  ColPartition_LIST part_set_;
+  // Iteratorn on part_set_ pointing to the most recent addition.
+  ColPartition_IT part_it_;
+  // The blocks that have been made so far and belong before the current block.
+  BLOCK_LIST completed_blocks_;
+  TO_BLOCK_LIST to_blocks_;
+};
+
+ELISTIZEH(WorkingPartSet)
+
+}  // namespace tesseract.
+
+#endif  // TESSERACT_TEXTORD_WORKINGPARSET_H_