summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'tesseract/src/textord')
-rw-r--r--tesseract/src/textord/alignedblob.cpp535
-rw-r--r--tesseract/src/textord/alignedblob.h124
-rw-r--r--tesseract/src/textord/baselinedetect.cpp869
-rw-r--r--tesseract/src/textord/baselinedetect.h276
-rw-r--r--tesseract/src/textord/bbgrid.cpp285
-rw-r--r--tesseract/src/textord/bbgrid.h957
-rw-r--r--tesseract/src/textord/blkocc.cpp165
-rw-r--r--tesseract/src/textord/blkocc.h253
-rw-r--r--tesseract/src/textord/blobgrid.cpp45
-rw-r--r--tesseract/src/textord/blobgrid.h46
-rw-r--r--tesseract/src/textord/ccnontextdetect.cpp323
-rw-r--r--tesseract/src/textord/ccnontextdetect.h87
-rw-r--r--tesseract/src/textord/cjkpitch.cpp1070
-rw-r--r--tesseract/src/textord/cjkpitch.h75
-rw-r--r--tesseract/src/textord/colfind.cpp1642
-rw-r--r--tesseract/src/textord/colfind.h366
-rw-r--r--tesseract/src/textord/colpartition.cpp2597
-rw-r--r--tesseract/src/textord/colpartition.h927
-rw-r--r--tesseract/src/textord/colpartitiongrid.cpp1743
-rw-r--r--tesseract/src/textord/colpartitiongrid.h252
-rw-r--r--tesseract/src/textord/colpartitionset.cpp667
-rw-r--r--tesseract/src/textord/colpartitionset.h171
-rw-r--r--tesseract/src/textord/devanagari_processing.cpp502
-rw-r--r--tesseract/src/textord/devanagari_processing.h210
-rw-r--r--tesseract/src/textord/drawtord.cpp423
-rw-r--r--tesseract/src/textord/drawtord.h103
-rw-r--r--tesseract/src/textord/edgblob.cpp462
-rw-r--r--tesseract/src/textord/edgblob.h100
-rw-r--r--tesseract/src/textord/edgloop.cpp162
-rw-r--r--tesseract/src/textord/edgloop.h44
-rw-r--r--tesseract/src/textord/equationdetectbase.cpp64
-rw-r--r--tesseract/src/textord/equationdetectbase.h59
-rw-r--r--tesseract/src/textord/fpchop.cpp890
-rw-r--r--tesseract/src/textord/fpchop.h84
-rw-r--r--tesseract/src/textord/gap_map.cpp189
-rw-r--r--tesseract/src/textord/gap_map.h53
-rw-r--r--tesseract/src/textord/imagefind.cpp1366
-rw-r--r--tesseract/src/textord/imagefind.h159
-rw-r--r--tesseract/src/textord/linefind.cpp769
-rw-r--r--tesseract/src/textord/linefind.h149
-rw-r--r--tesseract/src/textord/makerow.cpp2673
-rw-r--r--tesseract/src/textord/makerow.h291
-rw-r--r--tesseract/src/textord/oldbasel.cpp1698
-rw-r--r--tesseract/src/textord/oldbasel.h164
-rw-r--r--tesseract/src/textord/pithsync.cpp693
-rw-r--r--tesseract/src/textord/pithsync.h136
-rw-r--r--tesseract/src/textord/pitsync1.cpp422
-rw-r--r--tesseract/src/textord/pitsync1.h125
-rw-r--r--tesseract/src/textord/scanedg.cpp405
-rw-r--r--tesseract/src/textord/scanedg.h38
-rw-r--r--tesseract/src/textord/sortflts.cpp81
-rw-r--r--tesseract/src/textord/sortflts.h76
-rw-r--r--tesseract/src/textord/strokewidth.cpp2030
-rw-r--r--tesseract/src/textord/strokewidth.h355
-rw-r--r--tesseract/src/textord/tabfind.cpp1438
-rw-r--r--tesseract/src/textord/tabfind.h384
-rw-r--r--tesseract/src/textord/tablefind.cpp2088
-rw-r--r--tesseract/src/textord/tablefind.h427
-rw-r--r--tesseract/src/textord/tablerecog.cpp1067
-rw-r--r--tesseract/src/textord/tablerecog.h378
-rw-r--r--tesseract/src/textord/tabvector.cpp982
-rw-r--r--tesseract/src/textord/tabvector.h429
-rw-r--r--tesseract/src/textord/textlineprojection.cpp779
-rw-r--r--tesseract/src/textord/textlineprojection.h206
-rw-r--r--tesseract/src/textord/textord.cpp349
-rw-r--r--tesseract/src/textord/textord.h403
-rw-r--r--tesseract/src/textord/topitch.cpp1847
-rw-r--r--tesseract/src/textord/topitch.h191
-rw-r--r--tesseract/src/textord/tordmain.cpp994
-rw-r--r--tesseract/src/textord/tordmain.h45
-rw-r--r--tesseract/src/textord/tospace.cpp1894
-rw-r--r--tesseract/src/textord/tovars.cpp85
-rw-r--r--tesseract/src/textord/tovars.h94
-rw-r--r--tesseract/src/textord/underlin.cpp278
-rw-r--r--tesseract/src/textord/underlin.h56
-rw-r--r--tesseract/src/textord/wordseg.cpp625
-rw-r--r--tesseract/src/textord/wordseg.h78
-rw-r--r--tesseract/src/textord/workingpartset.cpp144
-rw-r--r--tesseract/src/textord/workingpartset.h88
79 files changed, 43799 insertions, 0 deletions
diff --git a/tesseract/src/textord/alignedblob.cpp b/tesseract/src/textord/alignedblob.cpp
new file mode 100644
index 00000000..4c17584b
--- /dev/null
+++ b/tesseract/src/textord/alignedblob.cpp
@@ -0,0 +1,535 @@
+///////////////////////////////////////////////////////////////////////
+// File: alignedblob.cpp
+// Description: Subclass of BBGrid to find vertically aligned blobs.
+// Author: Ray Smith
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "alignedblob.h"
+
+#include <algorithm>
+
+namespace tesseract {
+
+INT_VAR(textord_debug_tabfind, 0, "Debug tab finding");
+INT_VAR(textord_debug_bugs, 0, "Turn on output related to bugs in tab finding");
+static INT_VAR(textord_testregion_left, -1, "Left edge of debug reporting rectangle in Leptonica coords (bottom=0/top=height), with horizontal lines x/y-flipped");
+static INT_VAR(textord_testregion_top, INT32_MAX, "Top edge of debug reporting rectangle in Leptonica coords (bottom=0/top=height), with horizontal lines x/y-flipped");
+static INT_VAR(textord_testregion_right, INT32_MAX, "Right edge of debug rectangle in Leptonica coords (bottom=0/top=height), with horizontal lines x/y-flipped");
+static INT_VAR(textord_testregion_bottom, -1, "Bottom edge of debug rectangle in Leptonica coords (bottom=0/top=height), with horizontal lines x/y-flipped");
+BOOL_VAR(textord_debug_printable, false, "Make debug windows printable");
+
+// Fraction of resolution used as alignment tolerance for aligned tabs.
+const double kAlignedFraction = 0.03125;
+// Fraction of resolution used as alignment tolerance for ragged tabs.
+const double kRaggedFraction = 2.5;
+// Fraction of height used as a minimum gutter gap for aligned blobs.
+const double kAlignedGapFraction = 0.75;
+// Fraction of height used as a minimum gutter gap for ragged tabs.
+const double kRaggedGapFraction = 1.0;
+// Constant number of pixels used as alignment tolerance for line finding.
+const int kVLineAlignment = 3;
+// Constant number of pixels used as gutter gap tolerance for line finding.
+const int kVLineGutter = 1;
+// Constant number of pixels used as the search size for line finding.
+const int kVLineSearchSize = 150;
+// Min number of points to accept for a ragged tab stop.
+const int kMinRaggedTabs = 5;
+// Min number of points to accept for an aligned tab stop.
+const int kMinAlignedTabs = 4;
+// Constant number of pixels minimum height of a vertical line.
+const int kVLineMinLength = 300;
+// Minimum gradient for a vertical tab vector. Used to prune away junk
+// tab vectors with what would be a ridiculously large skew angle.
+// Value corresponds to tan(90 - max allowed skew angle)
+const double kMinTabGradient = 4.0;
+// Tolerance to skew on top of current estimate of skew. Divide x or y length
+// by kMaxSkewFactor to get the y or x skew distance.
+// If the angle is small, the angle in degrees is roughly 60/kMaxSkewFactor.
+const int kMaxSkewFactor = 15;
+
+// Constructor to set the parameters for finding aligned and ragged tabs.
+// Vertical_x and vertical_y are the current estimates of the true vertical
+// direction (up) in the image. Height is the height of the starter blob.
+// v_gap_multiple is the multiple of height that will be used as a limit
+// on vertical gap before giving up and calling the line ended.
+// resolution is the original image resolution, and align0 indicates the
+// type of tab stop to be found.
+AlignedBlobParams::AlignedBlobParams(int vertical_x, int vertical_y,
+ int height, int v_gap_multiple,
+ int min_gutter_width,
+ int resolution, TabAlignment align0)
+ : right_tab(align0 == TA_RIGHT_RAGGED || align0 == TA_RIGHT_ALIGNED),
+ ragged(align0 == TA_LEFT_RAGGED || align0 == TA_RIGHT_RAGGED),
+ alignment(align0),
+ confirmed_type(TT_CONFIRMED),
+ min_length(0) {
+ // Set the tolerances according to the type of line sought.
+ // For tab search, these are based on the image resolution for most, or
+ // the height of the starting blob for the maximum vertical gap.
+ max_v_gap = height * v_gap_multiple;
+ if (ragged) {
+ // In the case of a ragged edge, we are much more generous with the
+ // inside alignment fraction, but also require a much bigger gutter.
+ gutter_fraction = kRaggedGapFraction;
+ if (alignment == TA_RIGHT_RAGGED) {
+ l_align_tolerance = static_cast<int>(resolution * kRaggedFraction + 0.5);
+ r_align_tolerance = static_cast<int>(resolution * kAlignedFraction + 0.5);
+ } else {
+ l_align_tolerance = static_cast<int>(resolution * kAlignedFraction + 0.5);
+ r_align_tolerance = static_cast<int>(resolution * kRaggedFraction + 0.5);
+ }
+ min_points = kMinRaggedTabs;
+ } else {
+ gutter_fraction = kAlignedGapFraction;
+ l_align_tolerance = static_cast<int>(resolution * kAlignedFraction + 0.5);
+ r_align_tolerance = static_cast<int>(resolution * kAlignedFraction + 0.5);
+ min_points = kMinAlignedTabs;
+ }
+ min_gutter = static_cast<int>(height * gutter_fraction + 0.5);
+ if (min_gutter < min_gutter_width)
+ min_gutter = min_gutter_width;
+ // Fit the vertical vector into an ICOORD, which is 16 bit.
+ set_vertical(vertical_x, vertical_y);
+}
+
+// Constructor to set the parameters for finding vertical lines.
+// Vertical_x and vertical_y are the current estimates of the true vertical
+// direction (up) in the image. Width is the width of the starter blob.
+AlignedBlobParams::AlignedBlobParams(int vertical_x, int vertical_y,
+ int width)
+ : gutter_fraction(0.0),
+ right_tab(false),
+ ragged(false),
+ alignment(TA_SEPARATOR),
+ confirmed_type(TT_VLINE),
+ max_v_gap(kVLineSearchSize),
+ min_gutter(kVLineGutter),
+ min_points(1),
+ min_length(kVLineMinLength) {
+ // Compute threshold for left and right alignment.
+ l_align_tolerance = std::max(kVLineAlignment, width);
+ r_align_tolerance = std::max(kVLineAlignment, width);
+
+ // Fit the vertical vector into an ICOORD, which is 16 bit.
+ set_vertical(vertical_x, vertical_y);
+}
+
+// Fit the vertical vector into an ICOORD, which is 16 bit.
+void AlignedBlobParams::set_vertical(int vertical_x, int vertical_y) {
+ int factor = 1;
+ if (vertical_y > INT16_MAX)
+ factor = vertical_y / INT16_MAX + 1;
+ vertical.set_x(vertical_x / factor);
+ vertical.set_y(vertical_y / factor);
+}
+
+
+AlignedBlob::AlignedBlob(int gridsize,
+ const ICOORD& bleft, const ICOORD& tright)
+ : BlobGrid(gridsize, bleft, tright) {
+}
+
+// Return true if the given coordinates are within the test rectangle
+// and the debug level is at least the given detail level.
+bool AlignedBlob::WithinTestRegion(int detail_level, int x, int y) {
+ if (textord_debug_tabfind < detail_level)
+ return false;
+ return x >= textord_testregion_left && x <= textord_testregion_right &&
+ y <= textord_testregion_top && y >= textord_testregion_bottom;
+}
+
+#ifndef GRAPHICS_DISABLED
+
+// Display the tab codes of the BLOBNBOXes in this grid.
+ScrollView* AlignedBlob::DisplayTabs(const char* window_name,
+ ScrollView* tab_win) {
+ if (tab_win == nullptr)
+ tab_win = MakeWindow(0, 50, window_name);
+ // For every tab in the grid, display it.
+ GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> gsearch(this);
+ gsearch.StartFullSearch();
+ BLOBNBOX* bbox;
+ while ((bbox = gsearch.NextFullSearch()) != nullptr) {
+ const TBOX& box = bbox->bounding_box();
+ int left_x = box.left();
+ int right_x = box.right();
+ int top_y = box.top();
+ int bottom_y = box.bottom();
+ TabType tabtype = bbox->left_tab_type();
+ if (tabtype != TT_NONE) {
+ if (tabtype == TT_MAYBE_ALIGNED)
+ tab_win->Pen(ScrollView::BLUE);
+ else if (tabtype == TT_MAYBE_RAGGED)
+ tab_win->Pen(ScrollView::YELLOW);
+ else if (tabtype == TT_CONFIRMED)
+ tab_win->Pen(ScrollView::GREEN);
+ else
+ tab_win->Pen(ScrollView::GREY);
+ tab_win->Line(left_x, top_y, left_x, bottom_y);
+ }
+ tabtype = bbox->right_tab_type();
+ if (tabtype != TT_NONE) {
+ if (tabtype == TT_MAYBE_ALIGNED)
+ tab_win->Pen(ScrollView::MAGENTA);
+ else if (tabtype == TT_MAYBE_RAGGED)
+ tab_win->Pen(ScrollView::ORANGE);
+ else if (tabtype == TT_CONFIRMED)
+ tab_win->Pen(ScrollView::RED);
+ else
+ tab_win->Pen(ScrollView::GREY);
+ tab_win->Line(right_x, top_y, right_x, bottom_y);
+ }
+ }
+ tab_win->Update();
+ return tab_win;
+}
+
+#endif // !GRAPHICS_DISABLED
+
+// Helper returns true if the total number of line_crossings of all the blobs
+// in the list is at least 2.
+static bool AtLeast2LineCrossings(BLOBNBOX_CLIST* blobs) {
+ BLOBNBOX_C_IT it(blobs);
+ int total_crossings = 0;
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ total_crossings += it.data()->line_crossings();
+ }
+ return total_crossings >= 2;
+}
+
+// Destructor.
+// It is defined here, so the compiler can create a single vtable
+// instead of weak vtables in every compilation unit.
+AlignedBlob::~AlignedBlob() = default;
+
+// Finds a vector corresponding to a set of vertically aligned blob edges
+// running through the given box. The type of vector returned and the
+// search parameters are determined by the AlignedBlobParams.
+// vertical_x and y are updated with an estimate of the real
+// vertical direction. (skew finding.)
+// Returns nullptr if no decent vector can be found.
+TabVector* AlignedBlob::FindVerticalAlignment(AlignedBlobParams align_params,
+ BLOBNBOX* bbox,
+ int* vertical_x,
+ int* vertical_y) {
+ int ext_start_y, ext_end_y;
+ BLOBNBOX_CLIST good_points;
+ // Search up and then down from the starting bbox.
+ TBOX box = bbox->bounding_box();
+ bool debug = WithinTestRegion(2, box.left(), box.bottom());
+ int pt_count = AlignTabs(align_params, false, bbox, &good_points, &ext_end_y);
+ pt_count += AlignTabs(align_params, true, bbox, &good_points, &ext_start_y);
+ BLOBNBOX_C_IT it(&good_points);
+ it.move_to_last();
+ box = it.data()->bounding_box();
+ int end_y = box.top();
+ int end_x = align_params.right_tab ? box.right() : box.left();
+ it.move_to_first();
+ box = it.data()->bounding_box();
+ int start_x = align_params.right_tab ? box.right() : box.left();
+ int start_y = box.bottom();
+ // Acceptable tab vectors must have a minimum number of points,
+ // have a minimum acceptable length, and have a minimum gradient.
+ // The gradient corresponds to the skew angle.
+ // Ragged tabs don't need to satisfy the gradient condition, as they
+ // will always end up parallel to the vertical direction.
+ bool at_least_2_crossings = AtLeast2LineCrossings(&good_points);
+ if ((pt_count >= align_params.min_points &&
+ end_y - start_y >= align_params.min_length &&
+ (align_params.ragged ||
+ end_y - start_y >= abs(end_x - start_x) * kMinTabGradient)) ||
+ at_least_2_crossings) {
+ int confirmed_points = 0;
+ // Count existing confirmed points to see if vector is acceptable.
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ bbox = it.data();
+ if (align_params.right_tab) {
+ if (bbox->right_tab_type() == align_params.confirmed_type)
+ ++confirmed_points;
+ } else {
+ if (bbox->left_tab_type() == align_params.confirmed_type)
+ ++confirmed_points;
+ }
+ }
+ // Ragged vectors are not allowed to use too many already used points.
+ if (!align_params.ragged ||
+ confirmed_points + confirmed_points < pt_count) {
+ const TBOX& box = bbox->bounding_box();
+ if (debug) {
+ tprintf("Confirming tab vector of %d pts starting at %d,%d\n",
+ pt_count, box.left(), box.bottom());
+ }
+ // Flag all the aligned neighbours as confirmed .
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ bbox = it.data();
+ if (align_params.right_tab) {
+ bbox->set_right_tab_type(align_params.confirmed_type);
+ } else {
+ bbox->set_left_tab_type(align_params.confirmed_type);
+ }
+ if (debug) {
+ bbox->bounding_box().print();
+ }
+ }
+ // Now make the vector and return it.
+ TabVector* result = TabVector::FitVector(align_params.alignment,
+ align_params.vertical,
+ ext_start_y, ext_end_y,
+ &good_points,
+ vertical_x, vertical_y);
+ result->set_intersects_other_lines(at_least_2_crossings);
+ if (debug) {
+ tprintf("Box was %d, %d\n", box.left(), box.bottom());
+ result->Print("After fitting");
+ }
+ return result;
+ } else if (debug) {
+ tprintf("Ragged tab used too many used points: %d out of %d\n",
+ confirmed_points, pt_count);
+ }
+ } else if (debug) {
+ tprintf("Tab vector failed basic tests: pt count %d vs min %d, "
+ "length %d vs min %d, min grad %g\n",
+ pt_count, align_params.min_points, end_y - start_y,
+ align_params.min_length, abs(end_x - start_x) * kMinTabGradient);
+ }
+ return nullptr;
+}
+
+// Find a set of blobs that are aligned in the given vertical
+// direction with the given blob. Returns a list of aligned
+// blobs and the number in the list.
+// For other parameters see FindAlignedBlob below.
+int AlignedBlob::AlignTabs(const AlignedBlobParams& params,
+ bool top_to_bottom, BLOBNBOX* bbox,
+ BLOBNBOX_CLIST* good_points, int* end_y) {
+ int ptcount = 0;
+ BLOBNBOX_C_IT it(good_points);
+
+ TBOX box = bbox->bounding_box();
+ bool debug = WithinTestRegion(2, box.left(), box.bottom());
+ if (debug) {
+ tprintf("Starting alignment run at blob:");
+ box.print();
+ }
+ int x_start = params.right_tab ? box.right() : box.left();
+ while (bbox != nullptr) {
+ // Add the blob to the list if the appropriate side is a tab candidate,
+ // or if we are working on a ragged tab.
+ TabType type = params.right_tab ? bbox->right_tab_type()
+ : bbox->left_tab_type();
+ if (((type != TT_NONE && type != TT_MAYBE_RAGGED) || params.ragged) &&
+ (it.empty() || it.data() != bbox)) {
+ if (top_to_bottom)
+ it.add_before_then_move(bbox);
+ else
+ it.add_after_then_move(bbox);
+ ++ptcount;
+ }
+ // Find the next blob that is aligned with the current one.
+ // FindAlignedBlob guarantees that forward progress will be made in the
+ // top_to_bottom direction, and therefore eventually it will return nullptr,
+ // making this while (bbox != nullptr) loop safe.
+ bbox = FindAlignedBlob(params, top_to_bottom, bbox, x_start, end_y);
+ if (bbox != nullptr) {
+ box = bbox->bounding_box();
+ if (!params.ragged)
+ x_start = params.right_tab ? box.right() : box.left();
+ }
+ }
+ if (debug) {
+ tprintf("Alignment run ended with %d pts at blob:", ptcount);
+ box.print();
+ }
+ return ptcount;
+}
+
+// Search vertically for a blob that is aligned with the input bbox.
+// The search parameters are determined by AlignedBlobParams.
+// top_to_bottom tells whether to search down or up.
+// The return value is nullptr if nothing was found in the search box
+// or if a blob was found in the gutter. On a nullptr return, end_y
+// is set to the edge of the search box or the leading edge of the
+// gutter blob if one was found.
+BLOBNBOX* AlignedBlob::FindAlignedBlob(const AlignedBlobParams& p,
+ bool top_to_bottom, BLOBNBOX* bbox,
+ int x_start, int* end_y) {
+ TBOX box = bbox->bounding_box();
+ // If there are separator lines, get the column edges.
+ int left_column_edge = bbox->left_rule();
+ int right_column_edge = bbox->right_rule();
+ // start_y is used to guarantee that forward progress is made and the
+ // search does not go into an infinite loop. New blobs must extend the
+ // line beyond start_y.
+ int start_y = top_to_bottom ? box.bottom() : box.top();
+ if (WithinTestRegion(2, x_start, start_y)) {
+ tprintf("Column edges for blob at (%d,%d)->(%d,%d) are [%d, %d]\n",
+ box.left(), box.top(), box.right(), box.bottom(),
+ left_column_edge, right_column_edge);
+ }
+ // Compute skew tolerance.
+ int skew_tolerance = p.max_v_gap / kMaxSkewFactor;
+ // Calculate xmin and xmax of the search box so that it contains
+ // all possibly relevant boxes up to p.max_v_gap above or below according
+ // to top_to_bottom.
+ // Start with a notion of vertical with the current estimate.
+ int x2 = (p.max_v_gap * p.vertical.x() + p.vertical.y()/2) / p.vertical.y();
+ if (top_to_bottom) {
+ x2 = x_start - x2;
+ *end_y = start_y - p.max_v_gap;
+ } else {
+ x2 = x_start + x2;
+ *end_y = start_y + p.max_v_gap;
+ }
+ // Expand the box by an additional skew tolerance
+ int xmin = std::min(x_start, x2) - skew_tolerance;
+ int xmax = std::max(x_start, x2) + skew_tolerance;
+ // Now add direction-specific tolerances.
+ if (p.right_tab) {
+ xmax += p.min_gutter;
+ xmin -= p.l_align_tolerance;
+ } else {
+ xmax += p.r_align_tolerance;
+ xmin -= p.min_gutter;
+ }
+ // Setup a vertical search for an aligned blob.
+ GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> vsearch(this);
+ if (WithinTestRegion(2, x_start, start_y))
+ tprintf("Starting %s %s search at %d-%d,%d, search_size=%d, gutter=%d\n",
+ p.ragged ? "Ragged" : "Aligned", p.right_tab ? "Right" : "Left",
+ xmin, xmax, start_y, p.max_v_gap, p.min_gutter);
+ vsearch.StartVerticalSearch(xmin, xmax, start_y);
+ // result stores the best real return value.
+ BLOBNBOX* result = nullptr;
+ // The backup_result is not a tab candidate and can be used if no
+ // real tab candidate result is found.
+ BLOBNBOX* backup_result = nullptr;
+ // neighbour is the blob that is currently being investigated.
+ BLOBNBOX* neighbour = nullptr;
+ while ((neighbour = vsearch.NextVerticalSearch(top_to_bottom)) != nullptr) {
+ if (neighbour == bbox)
+ continue;
+ TBOX nbox = neighbour->bounding_box();
+ int n_y = (nbox.top() + nbox.bottom()) / 2;
+ if ((!top_to_bottom && n_y > start_y + p.max_v_gap) ||
+ (top_to_bottom && n_y < start_y - p.max_v_gap)) {
+ if (WithinTestRegion(2, x_start, start_y))
+ tprintf("Neighbour too far at (%d,%d)->(%d,%d)\n",
+ nbox.left(), nbox.bottom(), nbox.right(), nbox.top());
+ break; // Gone far enough.
+ }
+ // It is CRITICAL to ensure that forward progress is made, (strictly
+ // in/decreasing n_y) or the caller could loop infinitely, while
+ // waiting for a sequence of blobs in a line to end.
+ // NextVerticalSearch alone does not guarantee this, as there may be
+ // more than one blob in a grid cell. See comment in AlignTabs.
+ if ((n_y < start_y) != top_to_bottom || nbox.y_overlap(box))
+ continue; // Only look in the required direction.
+ if (result != nullptr && result->bounding_box().y_gap(nbox) > gridsize())
+ return result; // This result is clear.
+ if (backup_result != nullptr && p.ragged && result == nullptr &&
+ backup_result->bounding_box().y_gap(nbox) > gridsize())
+ return backup_result; // This result is clear.
+
+ // If the neighbouring blob is the wrong side of a separator line, then it
+ // "doesn't exist" as far as we are concerned.
+ int x_at_n_y = x_start + (n_y - start_y) * p.vertical.x() / p.vertical.y();
+ if (x_at_n_y < neighbour->left_crossing_rule() ||
+ x_at_n_y > neighbour->right_crossing_rule())
+ continue; // Separator line in the way.
+ int n_left = nbox.left();
+ int n_right = nbox.right();
+ int n_x = p.right_tab ? n_right : n_left;
+ if (WithinTestRegion(2, x_start, start_y))
+ tprintf("neighbour at (%d,%d)->(%d,%d), n_x=%d, n_y=%d, xatn=%d\n",
+ nbox.left(), nbox.bottom(), nbox.right(), nbox.top(),
+ n_x, n_y, x_at_n_y);
+ if (p.right_tab &&
+ n_left < x_at_n_y + p.min_gutter &&
+ n_right > x_at_n_y + p.r_align_tolerance &&
+ (p.ragged || n_left < x_at_n_y + p.gutter_fraction * nbox.height())) {
+ // In the gutter so end of line.
+ if (bbox->right_tab_type() >= TT_MAYBE_ALIGNED)
+ bbox->set_right_tab_type(TT_DELETED);
+ *end_y = top_to_bottom ? nbox.top() : nbox.bottom();
+ if (WithinTestRegion(2, x_start, start_y))
+ tprintf("gutter\n");
+ return nullptr;
+ }
+ if (!p.right_tab &&
+ n_left < x_at_n_y - p.l_align_tolerance &&
+ n_right > x_at_n_y - p.min_gutter &&
+ (p.ragged || n_right > x_at_n_y - p.gutter_fraction * nbox.height())) {
+ // In the gutter so end of line.
+ if (bbox->left_tab_type() >= TT_MAYBE_ALIGNED)
+ bbox->set_left_tab_type(TT_DELETED);
+ *end_y = top_to_bottom ? nbox.top() : nbox.bottom();
+ if (WithinTestRegion(2, x_start, start_y))
+ tprintf("gutter\n");
+ return nullptr;
+ }
+ if ((p.right_tab && neighbour->leader_on_right()) ||
+ (!p.right_tab && neighbour->leader_on_left()))
+ continue; // Neighbours of leaders are not allowed to be used.
+ if (n_x <= x_at_n_y + p.r_align_tolerance &&
+ n_x >= x_at_n_y - p.l_align_tolerance) {
+ // Aligned so keep it. If it is a marked tab save it as result,
+ // otherwise keep it as backup_result to return in case of later failure.
+ if (WithinTestRegion(2, x_start, start_y))
+ tprintf("aligned, seeking%d, l=%d, r=%d\n",
+ p.right_tab, neighbour->left_tab_type(),
+ neighbour->right_tab_type());
+ TabType n_type = p.right_tab ? neighbour->right_tab_type()
+ : neighbour->left_tab_type();
+ if (n_type != TT_NONE && (p.ragged || n_type != TT_MAYBE_RAGGED)) {
+ if (result == nullptr) {
+ result = neighbour;
+ } else {
+ // Keep the closest neighbour by Euclidean distance.
+ // This prevents it from picking a tab blob in another column.
+ const TBOX& old_box = result->bounding_box();
+ int x_diff = p.right_tab ? old_box.right() : old_box.left();
+ x_diff -= x_at_n_y;
+ int y_diff = (old_box.top() + old_box.bottom()) / 2 - start_y;
+ int old_dist = x_diff * x_diff + y_diff * y_diff;
+ x_diff = n_x - x_at_n_y;
+ y_diff = n_y - start_y;
+ int new_dist = x_diff * x_diff + y_diff * y_diff;
+ if (new_dist < old_dist)
+ result = neighbour;
+ }
+ } else if (backup_result == nullptr) {
+ if (WithinTestRegion(2, x_start, start_y))
+ tprintf("Backup\n");
+ backup_result = neighbour;
+ } else {
+ TBOX backup_box = backup_result->bounding_box();
+ if ((p.right_tab && backup_box.right() < nbox.right()) ||
+ (!p.right_tab && backup_box.left() > nbox.left())) {
+ if (WithinTestRegion(2, x_start, start_y))
+ tprintf("Better backup\n");
+ backup_result = neighbour;
+ }
+ }
+ }
+ }
+ return result != nullptr ? result : backup_result;
+}
+
+} // namespace tesseract.
diff --git a/tesseract/src/textord/alignedblob.h b/tesseract/src/textord/alignedblob.h
new file mode 100644
index 00000000..e69b3354
--- /dev/null
+++ b/tesseract/src/textord/alignedblob.h
@@ -0,0 +1,124 @@
+///////////////////////////////////////////////////////////////////////
+// File: alignedblob.h
+// Description: A class to find vertically aligned blobs in a BBGrid,
+// and a struct to hold control parameters.
+// Author: Ray Smith
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TEXTORD_ALIGNEDBLOB_H_
+#define TESSERACT_TEXTORD_ALIGNEDBLOB_H_
+
+#include "bbgrid.h"
+#include "blobbox.h"
+#include "tabvector.h"
+
+namespace tesseract {
+
+extern INT_VAR_H(textord_debug_bugs, 0,
+ "Turn on output related to bugs in tab finding");
+extern INT_VAR_H(textord_debug_tabfind, 2, "Debug tab finding");
+extern BOOL_VAR_H(textord_debug_printable, false,
+ "Make debug windows printable");
+
+// Simple structure to hold the search parameters for AlignedBlob.
+// The members are mostly derived from constants, which are
+// conditioned on the alignment parameter.
+// For finding vertical lines, a different set of constants are
+// used, conditioned on the different constructor.
+struct AlignedBlobParams {
+ // Constructor to set the parameters for finding aligned and ragged tabs.
+ // Vertical_x and vertical_y are the current estimates of the true vertical
+ // direction (up) in the image. Height is the height of the starter blob.
+ // v_gap_multiple is the multiple of height that will be used as a limit
+ // on vertical gap before giving up and calling the line ended.
+ // resolution is the original image resolution, and align0 indicates the
+ // type of tab stop to be found.
+ AlignedBlobParams(int vertical_x, int vertical_y, int height,
+ int v_gap_multiple, int min_gutter_width, int resolution,
+ TabAlignment alignment0);
+ // Constructor to set the parameters for finding vertical lines.
+ // Vertical_x and vertical_y are the current estimates of the true vertical
+ // direction (up) in the image. Width is the width of the starter blob.
+ AlignedBlobParams(int vertical_x, int vertical_y, int width);
+
+ // Fit the vertical vector into an ICOORD, which is 16 bit.
+ void set_vertical(int vertical_x, int vertical_y);
+
+ double gutter_fraction; // Multiple of height used for min_gutter.
+ bool right_tab; // We are looking at right edges.
+ bool ragged; // We are looking for a ragged (vs aligned) edge.
+ TabAlignment alignment; // The type we are trying to produce.
+ TabType confirmed_type; // Type to flag blobs if accepted.
+ int max_v_gap; // Max vertical gap to be tolerated.
+ int min_gutter; // Minimum gutter between columns.
+ // Tolerances allowed on horizontal alignment of aligned edges.
+ int l_align_tolerance; // Left edges.
+ int r_align_tolerance; // Right edges.
+ // Conditions for accepting a line.
+ int min_points; // Minimum number of points to be OK.
+ int min_length; // Min length of completed line.
+
+ ICOORD vertical; // Current estimate of logical vertical.
+};
+
+// The AlignedBlob class contains code to find vertically aligned blobs.
+// This is factored out into a separate class, so it can be used by both
+// vertical line finding (LineFind) and tabstop finding (TabFind).
+class TESS_API AlignedBlob : public BlobGrid {
+ public:
+ AlignedBlob(int gridsize, const ICOORD& bleft, const ICOORD& tright);
+ ~AlignedBlob() override;
+
+ // Return true if the given coordinates are within the test rectangle
+ // and the debug level is at least the given detail level.
+ static bool WithinTestRegion(int detail_level, int x, int y);
+
+ // Display the tab codes of the BLOBNBOXes in this grid.
+ ScrollView* DisplayTabs(const char* window_name, ScrollView* tab_win);
+
+ // Finds a vector corresponding to a set of vertically aligned blob edges
+ // running through the given box. The type of vector returned and the
+ // search parameters are determined by the AlignedBlobParams.
+ // vertical_x and y are updated with an estimate of the real
+ // vertical direction. (skew finding.)
+ // Returns nullptr if no decent vector can be found.
+ TabVector* FindVerticalAlignment(AlignedBlobParams align_params,
+ BLOBNBOX* bbox,
+ int* vertical_x, int* vertical_y);
+
+ private:
+ // Find a set of blobs that are aligned in the given vertical
+ // direction with the given blob. Returns a list of aligned
+ // blobs and the number in the list.
+ // For other parameters see FindAlignedBlob below.
+ int AlignTabs(const AlignedBlobParams& params,
+ bool top_to_bottom, BLOBNBOX* bbox,
+ BLOBNBOX_CLIST* good_points, int* end_y);
+
+ // Search vertically for a blob that is aligned with the input bbox.
+ // The search parameters are determined by AlignedBlobParams.
+ // top_to_bottom tells whether to search down or up.
+ // The return value is nullptr if nothing was found in the search box
+ // or if a blob was found in the gutter. On a nullptr return, end_y
+ // is set to the edge of the search box or the leading edge of the
+ // gutter blob if one was found.
+ BLOBNBOX* FindAlignedBlob(const AlignedBlobParams& p,
+ bool top_to_bottom, BLOBNBOX* bbox,
+ int x_start, int* end_y);
+};
+
+} // namespace tesseract.
+
+#endif // TESSERACT_TEXTORD_ALIGNEDBLOB_H_
diff --git a/tesseract/src/textord/baselinedetect.cpp b/tesseract/src/textord/baselinedetect.cpp
new file mode 100644
index 00000000..ef3b91c8
--- /dev/null
+++ b/tesseract/src/textord/baselinedetect.cpp
@@ -0,0 +1,869 @@
+///////////////////////////////////////////////////////////////////////
+// File: baselinedetect.cpp
+// Description: Initial Baseline Determination.
+// Copyright 2012 Google Inc. All Rights Reserved.
+// Author: rays@google.com (Ray Smith)
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#define _USE_MATH_DEFINES // for M_PI
+
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "baselinedetect.h"
+
+#include <algorithm>
+#include <cfloat> // for FLT_MAX
+#include <cmath> // for M_PI
+#include "allheaders.h"
+#include "blobbox.h"
+#include "detlinefit.h"
+#include "drawtord.h"
+#include "helpers.h"
+#include "linlsq.h"
+#include "makerow.h"
+#include "textord.h"
+#include "tprintf.h"
+#include "underlin.h"
+
+// Number of displacement modes kept in displacement_modes_;
+const int kMaxDisplacementsModes = 3;
+// Number of points to skip when retrying initial fit.
+const int kNumSkipPoints = 3;
+// Max angle deviation (in radians) allowed to keep the independent baseline.
+const double kMaxSkewDeviation = 1.0 / 64;
+// Fraction of line spacing estimate for quantization of blob displacements.
+const double kOffsetQuantizationFactor = 3.0 / 64;
+// Fraction of line spacing estimate for computing blob fit error.
+const double kFitHalfrangeFactor = 6.0 / 64;
+// Max fraction of line spacing allowed before a baseline counts as badly fitting.
+const double kMaxBaselineError = 3.0 / 64;
+// Multiple of linespacing that sets max_blob_size in TO_BLOCK.
+// Copied from textord_excess_blobsize.
+const double kMaxBlobSizeMultiple = 1.3;
+// Min fraction of linespacing gaps that should be close to the model before
+// we will force the linespacing model on all the lines.
+const double kMinFittingLinespacings = 0.25;
+// A y-coordinate within a textline that is to be debugged.
+//#define kDebugYCoord 1525
+
+namespace tesseract {
+
+BaselineRow::BaselineRow(double line_spacing, TO_ROW* to_row)
+ : blobs_(to_row->blob_list()),
+ baseline_pt1_(0.0f, 0.0f), baseline_pt2_(0.0f, 0.0f),
+ baseline_error_(0.0), good_baseline_(false) {
+ ComputeBoundingBox();
+ // Compute a scale factor for rounding to ints.
+ disp_quant_factor_ = kOffsetQuantizationFactor * line_spacing;
+ fit_halfrange_ = kFitHalfrangeFactor * line_spacing;
+ max_baseline_error_ = kMaxBaselineError * line_spacing;
+}
+
+// Sets the TO_ROW with the output straight line.
+void BaselineRow::SetupOldLineParameters(TO_ROW* row) const {
+ // TODO(rays) get rid of this when m and c are no longer used.
+ double gradient = tan(BaselineAngle());
+ // para_c is the actual intercept of the baseline on the y-axis.
+ float para_c = StraightYAtX(0.0);
+ row->set_line(gradient, para_c, baseline_error_);
+ row->set_parallel_line(gradient, para_c, baseline_error_);
+}
+
+// Outputs diagnostic information.
+void BaselineRow::Print() const {
+ tprintf("Baseline (%g,%g)->(%g,%g), angle=%g, intercept=%g\n",
+ baseline_pt1_.x(), baseline_pt1_.y(),
+ baseline_pt2_.x(), baseline_pt2_.y(),
+ BaselineAngle(), StraightYAtX(0.0));
+ tprintf("Quant factor=%g, error=%g, good=%d, box:",
+ disp_quant_factor_, baseline_error_, good_baseline_);
+ bounding_box_.print();
+}
+
+// Returns the skew angle (in radians) of the current baseline in [-pi,pi].
+double BaselineRow::BaselineAngle() const {
+ FCOORD baseline_dir(baseline_pt2_ - baseline_pt1_);
+ double angle = baseline_dir.angle();
+ // Baseline directions are only unique in a range of pi so constrain to
+ // [-pi/2, pi/2].
+ return fmod(angle + M_PI * 1.5, M_PI) - M_PI * 0.5;
+}
+
+// Computes and returns the linespacing at the middle of the overlap
+// between this and other.
+double BaselineRow::SpaceBetween(const BaselineRow& other) const {
+ // Find the x-centre of overlap of the lines.
+ float x = (std::max(bounding_box_.left(), other.bounding_box_.left()) +
+ std::min(bounding_box_.right(), other.bounding_box_.right())) / 2.0f;
+ // Find the vertical centre between them.
+ float y = (StraightYAtX(x) + other.StraightYAtX(x)) / 2.0f;
+ // Find the perpendicular distance of (x,y) from each line.
+ FCOORD pt(x, y);
+ return PerpDistanceFromBaseline(pt) + other.PerpDistanceFromBaseline(pt);
+}
+
+// Computes and returns the displacement of the center of the line
+// perpendicular to the given direction.
+double BaselineRow::PerpDisp(const FCOORD& direction) const {
+ float middle_x = (bounding_box_.left() + bounding_box_.right()) / 2.0f;
+ FCOORD middle_pos(middle_x, StraightYAtX(middle_x));
+ return direction * middle_pos / direction.length();
+}
+
+// Computes the y coordinate at the given x using the straight baseline
+// defined by baseline_pt1_ and baseline_pt2__.
+double BaselineRow::StraightYAtX(double x) const {
+ double denominator = baseline_pt2_.x() - baseline_pt1_.x();
+ if (denominator == 0.0)
+ return (baseline_pt1_.y() + baseline_pt2_.y()) / 2.0;
+ return baseline_pt1_.y() +
+ (x - baseline_pt1_.x()) * (baseline_pt2_.y() - baseline_pt1_.y()) /
+ denominator;
+}
+
+// Fits a straight baseline to the points. Returns true if it had enough
+// points to be reasonably sure of the fitted baseline.
+// If use_box_bottoms is false, baselines positions are formed by
+// considering the outlines of the blobs.
+bool BaselineRow::FitBaseline(bool use_box_bottoms) {
+ // Deterministic fitting is used wherever possible.
+ fitter_.Clear();
+ // Linear least squares is a backup if the DetLineFit produces a bad line.
+ LLSQ llsq;
+ BLOBNBOX_IT blob_it(blobs_);
+
+ for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+ BLOBNBOX* blob = blob_it.data();
+ if (!use_box_bottoms) blob->EstimateBaselinePosition();
+ const TBOX& box = blob->bounding_box();
+ int x_middle = (box.left() + box.right()) / 2;
+#ifdef kDebugYCoord
+ if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) {
+ tprintf("Box bottom = %d, baseline pos=%d for box at:",
+ box.bottom(), blob->baseline_position());
+ box.print();
+ }
+#endif
+ fitter_.Add(ICOORD(x_middle, blob->baseline_position()), box.width() / 2);
+ llsq.add(x_middle, blob->baseline_position());
+ }
+ // Fit the line.
+ ICOORD pt1, pt2;
+ baseline_error_ = fitter_.Fit(&pt1, &pt2);
+ baseline_pt1_ = pt1;
+ baseline_pt2_ = pt2;
+ if (baseline_error_ > max_baseline_error_ &&
+ fitter_.SufficientPointsForIndependentFit()) {
+ // The fit was bad but there were plenty of points, so try skipping
+ // the first and last few, and use the new line if it dramatically improves
+ // the error of fit.
+ double error = fitter_.Fit(kNumSkipPoints, kNumSkipPoints, &pt1, &pt2);
+ if (error < baseline_error_ / 2.0) {
+ baseline_error_ = error;
+ baseline_pt1_ = pt1;
+ baseline_pt2_ = pt2;
+ }
+ }
+ int debug = 0;
+#ifdef kDebugYCoord
+ Print();
+ debug = bounding_box_.bottom() < kDebugYCoord &&
+ bounding_box_.top() > kDebugYCoord
+ ? 3 : 2;
+#endif
+ // Now we obtained a direction from that fit, see if we can improve the
+ // fit using the same direction and some other start point.
+ FCOORD direction(pt2 - pt1);
+ double target_offset = direction * pt1;
+ good_baseline_ = false;
+ FitConstrainedIfBetter(debug, direction, 0.0, target_offset);
+ // Wild lines can be produced because DetLineFit allows vertical lines, but
+ // vertical text has been rotated so angles over pi/4 should be disallowed.
+ // Near vertical lines can still be produced by vertically aligned components
+ // on very short lines.
+ double angle = BaselineAngle();
+ if (fabs(angle) > M_PI * 0.25) {
+ // Use the llsq fit as a backup.
+ baseline_pt1_ = llsq.mean_point();
+ baseline_pt2_ = baseline_pt1_ + FCOORD(1.0f, llsq.m());
+ // TODO(rays) get rid of this when m and c are no longer used.
+ double m = llsq.m();
+ double c = llsq.c(m);
+ baseline_error_ = llsq.rms(m, c);
+ good_baseline_ = false;
+ }
+ return good_baseline_;
+}
+
+// Modifies an existing result of FitBaseline to be parallel to the given
+// direction vector if that produces a better result.
+void BaselineRow::AdjustBaselineToParallel(int debug,
+ const FCOORD& direction) {
+ SetupBlobDisplacements(direction);
+ if (displacement_modes_.empty())
+ return;
+#ifdef kDebugYCoord
+ if (bounding_box_.bottom() < kDebugYCoord &&
+ bounding_box_.top() > kDebugYCoord && debug < 3)
+ debug = 3;
+#endif
+ FitConstrainedIfBetter(debug, direction, 0.0, displacement_modes_[0]);
+}
+
+// Modifies the baseline to snap to the textline grid if the existing
+// result is not good enough.
+double BaselineRow::AdjustBaselineToGrid(int debug,
+ const FCOORD& direction,
+ double line_spacing,
+ double line_offset) {
+ if (blobs_->empty()) {
+ if (debug > 1) {
+ tprintf("Row empty at:");
+ bounding_box_.print();
+ }
+ return line_offset;
+ }
+ // Find the displacement_modes_ entry nearest to the grid.
+ double best_error = 0.0;
+ int best_index = -1;
+ for (int i = 0; i < displacement_modes_.size(); ++i) {
+ double blob_y = displacement_modes_[i];
+ double error = BaselineBlock::SpacingModelError(blob_y, line_spacing,
+ line_offset);
+ if (debug > 1) {
+ tprintf("Mode at %g has error %g from model \n", blob_y, error);
+ }
+ if (best_index < 0 || error < best_error) {
+ best_error = error;
+ best_index = i;
+ }
+ }
+ // We will move the baseline only if the chosen mode is close enough to the
+ // model.
+ double model_margin = max_baseline_error_ - best_error;
+ if (best_index >= 0 && model_margin > 0.0) {
+ // But if the current baseline is already close to the mode there is no
+ // point, and only the potential to damage accuracy by changing its angle.
+ double perp_disp = PerpDisp(direction);
+ double shift = displacement_modes_[best_index] - perp_disp;
+ if (fabs(shift) > max_baseline_error_) {
+ if (debug > 1) {
+ tprintf("Attempting linespacing model fit with mode %g to row at:",
+ displacement_modes_[best_index]);
+ bounding_box_.print();
+ }
+ FitConstrainedIfBetter(debug, direction, model_margin,
+ displacement_modes_[best_index]);
+ } else if (debug > 1) {
+ tprintf("Linespacing model only moves current line by %g for row at:",
+ shift);
+ bounding_box_.print();
+ }
+ } else if (debug > 1) {
+ tprintf("Linespacing model not close enough to any mode for row at:");
+ bounding_box_.print();
+ }
+ return fmod(PerpDisp(direction), line_spacing);
+}
+
+// Sets up displacement_modes_ with the top few modes of the perpendicular
+// distance of each blob from the given direction vector, after rounding.
+void BaselineRow::SetupBlobDisplacements(const FCOORD& direction) {
+ // Set of perpendicular displacements of the blob bottoms from the required
+ // baseline direction.
+ GenericVector<double> perp_blob_dists;
+ displacement_modes_.truncate(0);
+ // Gather the skew-corrected position of every blob.
+ double min_dist = FLT_MAX;
+ double max_dist = -FLT_MAX;
+ BLOBNBOX_IT blob_it(blobs_);
+#ifdef kDebugYCoord
+ bool debug = false;
+#endif
+ for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+ BLOBNBOX* blob = blob_it.data();
+ const TBOX& box = blob->bounding_box();
+#ifdef kDebugYCoord
+ if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) debug = true;
+#endif
+ FCOORD blob_pos((box.left() + box.right()) / 2.0f,
+ blob->baseline_position());
+ double offset = direction * blob_pos;
+ perp_blob_dists.push_back(offset);
+#ifdef kDebugYCoord
+ if (debug) {
+ tprintf("Displacement %g for blob at:", offset);
+ box.print();
+ }
+#endif
+ UpdateRange(offset, &min_dist, &max_dist);
+ }
+ // Set up a histogram using disp_quant_factor_ as the bucket size.
+ STATS dist_stats(IntCastRounded(min_dist / disp_quant_factor_),
+ IntCastRounded(max_dist / disp_quant_factor_) + 1);
+ for (int i = 0; i < perp_blob_dists.size(); ++i) {
+ dist_stats.add(IntCastRounded(perp_blob_dists[i] / disp_quant_factor_), 1);
+ }
+ GenericVector<KDPairInc<float, int> > scaled_modes;
+ dist_stats.top_n_modes(kMaxDisplacementsModes, &scaled_modes);
+#ifdef kDebugYCoord
+ if (debug) {
+ for (int i = 0; i < scaled_modes.size(); ++i) {
+ tprintf("Top mode = %g * %d\n",
+ scaled_modes[i].key * disp_quant_factor_, scaled_modes[i].data());
+ }
+ }
+#endif
+ for (int i = 0; i < scaled_modes.size(); ++i)
+ displacement_modes_.push_back(disp_quant_factor_ * scaled_modes[i].key());
+}
+
+// Fits a line in the given direction to blobs that are close to the given
+// target_offset perpendicular displacement from the direction. The fit
+// error is allowed to be cheat_allowance worse than the existing fit, and
+// will still be used.
+// If cheat_allowance > 0, the new fit will be good and replace the current
+// fit if it has better fit (with cheat) OR its error is below
+// max_baseline_error_ and the old fit is marked bad.
+// Otherwise the new fit will only replace the old if it is really better,
+// or the old fit is marked bad and the new fit has sufficient points, as
+// well as being within the max_baseline_error_.
+void BaselineRow::FitConstrainedIfBetter(int debug,
+ const FCOORD& direction,
+ double cheat_allowance,
+ double target_offset) {
+ double halfrange = fit_halfrange_ * direction.length();
+ double min_dist = target_offset - halfrange;
+ double max_dist = target_offset + halfrange;
+ ICOORD line_pt;
+ double new_error = fitter_.ConstrainedFit(direction, min_dist, max_dist,
+ debug > 2, &line_pt);
+ // Allow cheat_allowance off the new error
+ new_error -= cheat_allowance;
+ double old_angle = BaselineAngle();
+ double new_angle = direction.angle();
+ if (debug > 1) {
+ tprintf("Constrained error = %g, original = %g",
+ new_error, baseline_error_);
+ tprintf(" angles = %g, %g, delta=%g vs threshold %g\n",
+ old_angle, new_angle,
+ new_angle - old_angle, kMaxSkewDeviation);
+ }
+ bool new_good_baseline = new_error <= max_baseline_error_ &&
+ (cheat_allowance > 0.0 || fitter_.SufficientPointsForIndependentFit());
+ // The new will replace the old if any are true:
+ // 1. the new error is better
+ // 2. the old is NOT good, but the new is
+ // 3. there is a wild angular difference between them (assuming that the new
+ // is a better guess at the angle.)
+ if (new_error <= baseline_error_ ||
+ (!good_baseline_ && new_good_baseline) ||
+ fabs(new_angle - old_angle) > kMaxSkewDeviation) {
+ baseline_error_ = new_error;
+ baseline_pt1_ = line_pt;
+ baseline_pt2_ = baseline_pt1_ + direction;
+ good_baseline_ = new_good_baseline;
+ if (debug > 1) {
+ tprintf("Replacing with constrained baseline, good = %d\n",
+ good_baseline_);
+ }
+ } else if (debug > 1) {
+ tprintf("Keeping old baseline\n");
+ }
+}
+
+// Returns the perpendicular distance of the point from the straight
+// baseline.
+float BaselineRow::PerpDistanceFromBaseline(const FCOORD& pt) const {
+ FCOORD baseline_vector(baseline_pt2_ - baseline_pt1_);
+ FCOORD offset_vector(pt - baseline_pt1_);
+ float distance = baseline_vector * offset_vector;
+ float sqlength = baseline_vector.sqlength();
+ if (sqlength == 0.0f) {
+ tprintf("unexpected baseline vector (0,0)\n");
+ return 0.0f;
+ }
+ return std::sqrt(distance * distance / sqlength);
+}
+
+// Computes the bounding box of the row.
+void BaselineRow::ComputeBoundingBox() {
+ BLOBNBOX_IT it(blobs_);
+ TBOX box;
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ box += it.data()->bounding_box();
+ }
+ bounding_box_ = box;
+}
+
+
+BaselineBlock::BaselineBlock(int debug_level, bool non_text, TO_BLOCK* block)
+ : block_(block), debug_level_(debug_level), non_text_block_(non_text),
+ good_skew_angle_(false), skew_angle_(0.0),
+ line_spacing_(block->line_spacing), line_offset_(0.0), model_error_(0.0) {
+ TO_ROW_IT row_it(block_->get_rows());
+ for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+ // Sort the blobs on the rows.
+ row_it.data()->blob_list()->sort(blob_x_order);
+ rows_.push_back(new BaselineRow(block->line_spacing, row_it.data()));
+ }
+}
+
+// Computes and returns the absolute error of the given perp_disp from the
+// given linespacing model.
+double BaselineBlock::SpacingModelError(double perp_disp, double line_spacing,
+ double line_offset) {
+ // Round to the nearest multiple of line_spacing + line offset.
+ int multiple = IntCastRounded((perp_disp - line_offset) / line_spacing);
+ double model_y = line_spacing * multiple + line_offset;
+ return fabs(perp_disp - model_y);
+}
+
+// Fits straight line baselines and computes the skew angle from the
+// median angle. Returns true if a good angle is found.
+// If use_box_bottoms is false, baseline positions are formed by
+// considering the outlines of the blobs.
+bool BaselineBlock::FitBaselinesAndFindSkew(bool use_box_bottoms) {
+ if (non_text_block_) return false;
+ GenericVector<double> angles;
+ for (int r = 0; r < rows_.size(); ++r) {
+ BaselineRow* row = rows_[r];
+ if (row->FitBaseline(use_box_bottoms)) {
+ double angle = row->BaselineAngle();
+ angles.push_back(angle);
+ }
+ if (debug_level_ > 1)
+ row->Print();
+ }
+
+ if (!angles.empty()) {
+ skew_angle_ = MedianOfCircularValues(M_PI, &angles);
+ good_skew_angle_ = true;
+ } else {
+ skew_angle_ = 0.0f;
+ good_skew_angle_ = false;
+ }
+ if (debug_level_ > 0) {
+ tprintf("Initial block skew angle = %g, good = %d\n",
+ skew_angle_, good_skew_angle_);
+ }
+ return good_skew_angle_;
+}
+
+// Refits the baseline to a constrained angle, using the stored block
+// skew if good enough, otherwise the supplied default skew.
+void BaselineBlock::ParallelizeBaselines(double default_block_skew) {
+ if (non_text_block_) return;
+ if (!good_skew_angle_) skew_angle_ = default_block_skew;
+ if (debug_level_ > 0)
+ tprintf("Adjusting block to skew angle %g\n", skew_angle_);
+ FCOORD direction(cos(skew_angle_), sin(skew_angle_));
+ for (int r = 0; r < rows_.size(); ++r) {
+ BaselineRow* row = rows_[r];
+ row->AdjustBaselineToParallel(debug_level_, direction);
+ if (debug_level_ > 1)
+ row->Print();
+ }
+ if (rows_.size() < 3 || !ComputeLineSpacing())
+ return;
+ // Enforce the line spacing model on all lines that don't yet have a good
+ // baseline.
+ // Start by finding the row that is best fitted to the model.
+ int best_row = 0;
+ double best_error = SpacingModelError(rows_[0]->PerpDisp(direction),
+ line_spacing_, line_offset_);
+ for (int r = 1; r < rows_.size(); ++r) {
+ double error = SpacingModelError(rows_[r]->PerpDisp(direction),
+ line_spacing_, line_offset_);
+ if (error < best_error) {
+ best_error = error;
+ best_row = r;
+ }
+ }
+ // Starting at the best fitting row, work outwards, syncing the offset.
+ double offset = line_offset_;
+ for (int r = best_row + 1; r < rows_.size(); ++r) {
+ offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction,
+ line_spacing_, offset);
+ }
+ offset = line_offset_;
+ for (int r = best_row - 1; r >= 0; --r) {
+ offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction,
+ line_spacing_, offset);
+ }
+}
+
+// Sets the parameters in TO_BLOCK that are needed by subsequent processes.
+void BaselineBlock::SetupBlockParameters() const {
+ if (line_spacing_ > 0.0) {
+ // Where was block_line_spacing set before?
+ float min_spacing = std::min(block_->line_spacing, static_cast<float>(line_spacing_));
+ if (min_spacing < block_->line_size)
+ block_->line_size = min_spacing;
+ block_->line_spacing = line_spacing_;
+ block_->baseline_offset = line_offset_;
+ block_->max_blob_size = line_spacing_ * kMaxBlobSizeMultiple;
+ }
+ // Setup the parameters on all the rows.
+ TO_ROW_IT row_it(block_->get_rows());
+ for (int r = 0; r < rows_.size(); ++r, row_it.forward()) {
+ BaselineRow* row = rows_[r];
+ TO_ROW* to_row = row_it.data();
+ row->SetupOldLineParameters(to_row);
+ }
+}
+
+// Processing that is required before fitting baseline splines, but requires
+// linear baselines in order to be successful:
+// Removes noise if required
+// Separates out underlines
+// Pre-associates blob fragments.
+// TODO(rays/joeliu) This entire section of code is inherited from the past
+// and could be improved/eliminated.
+// page_tr is used to size a debug window.
+void BaselineBlock::PrepareForSplineFitting(ICOORD page_tr, bool remove_noise) {
+ if (non_text_block_) return;
+ if (remove_noise) {
+ vigorous_noise_removal(block_);
+ }
+ FCOORD rotation(1.0f, 0.0f);
+ double gradient = tan(skew_angle_);
+ separate_underlines(block_, gradient, rotation, true);
+ pre_associate_blobs(page_tr, block_, rotation, true);
+}
+
+// Fits splines to the textlines, or creates fake QSPLINES from the straight
+// baselines that are already on the TO_ROWs.
+// As a side-effect, computes the xheights of the rows and the block.
+// Although x-height estimation is conceptually separate, it is part of
+// detecting perspective distortion and therefore baseline fitting.
+void BaselineBlock::FitBaselineSplines(bool enable_splines,
+ bool show_final_rows,
+ Textord* textord) {
+ double gradient = tan(skew_angle_);
+ FCOORD rotation(1.0f, 0.0f);
+
+ if (enable_splines) {
+ textord->make_spline_rows(block_, gradient, show_final_rows);
+ } else {
+ // Make a fake spline from the existing line.
+ TBOX block_box= block_->block->pdblk.bounding_box();
+ TO_ROW_IT row_it = block_->get_rows();
+ for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+ TO_ROW* row = row_it.data();
+ int32_t xstarts[2] = { block_box.left(), block_box.right() };
+ double coeffs[3] = { 0.0, row->line_m(), row->line_c() };
+ row->baseline = QSPLINE(1, xstarts, coeffs);
+ textord->compute_row_xheight(row, block_->block->classify_rotation(),
+ row->line_m(), block_->line_size);
+ }
+ }
+ textord->compute_block_xheight(block_, gradient);
+ block_->block->set_xheight(block_->xheight);
+ if (textord_restore_underlines) // fix underlines
+ restore_underlined_blobs(block_);
+}
+
+#ifndef GRAPHICS_DISABLED
+
+// Draws the (straight) baselines and final blobs colored according to
+// what was discarded as noise and what is associated with each row.
+void BaselineBlock::DrawFinalRows(const ICOORD& page_tr) {
+ if (non_text_block_) return;
+ double gradient = tan(skew_angle_);
+ FCOORD rotation(1.0f, 0.0f);
+ int left_edge = block_->block->pdblk.bounding_box().left();
+ ScrollView* win = create_to_win(page_tr);
+ ScrollView::Color colour = ScrollView::RED;
+ TO_ROW_IT row_it = block_->get_rows();
+ for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+ plot_parallel_row(row_it.data(), gradient, left_edge, colour, rotation);
+ colour = static_cast<ScrollView::Color>(colour + 1);
+ if (colour > ScrollView::MAGENTA)
+ colour = ScrollView::RED;
+ }
+ plot_blob_list(win, &block_->blobs, ScrollView::MAGENTA, ScrollView::WHITE);
+ // Show discarded blobs.
+ plot_blob_list(win, &block_->underlines,
+ ScrollView::YELLOW, ScrollView::CORAL);
+ if (block_->blobs.length() > 0)
+ tprintf("%d blobs discarded as noise\n", block_->blobs.length());
+ draw_meanlines(block_, gradient, left_edge, ScrollView::WHITE, rotation);
+}
+
+#endif // !GRAPHICS_DISABLED
+
+void BaselineBlock::DrawPixSpline(Pix* pix_in) {
+ if (non_text_block_) return;
+ TO_ROW_IT row_it = block_->get_rows();
+ for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+ row_it.data()->baseline.plot(pix_in);
+ }
+}
+
+// Top-level line-spacing calculation. Computes an estimate of the line-
+// spacing, using the current baselines in the TO_ROWS of the block, and
+// then refines it by fitting a regression line to the baseline positions
+// as a function of their integer index.
+// Returns true if it seems that the model is a reasonable fit to the
+// observations.
+bool BaselineBlock::ComputeLineSpacing() {
+ FCOORD direction(cos(skew_angle_), sin(skew_angle_));
+ GenericVector<double> row_positions;
+ ComputeBaselinePositions(direction, &row_positions);
+ if (row_positions.size() < 2) return false;
+ EstimateLineSpacing();
+ RefineLineSpacing(row_positions);
+ // Verify that the model is reasonable.
+ double max_baseline_error = kMaxBaselineError * line_spacing_;
+ int non_trivial_gaps = 0;
+ int fitting_gaps = 0;
+ for (int i = 1; i < row_positions.size(); ++i) {
+ double row_gap = fabs(row_positions[i - 1] - row_positions[i]);
+ if (row_gap > max_baseline_error) {
+ ++non_trivial_gaps;
+ if (fabs(row_gap - line_spacing_) <= max_baseline_error)
+ ++fitting_gaps;
+ }
+ }
+ if (debug_level_ > 0) {
+ tprintf("Spacing %g, in %d rows, %d gaps fitted out of %d non-trivial\n",
+ line_spacing_, row_positions.size(), fitting_gaps,
+ non_trivial_gaps);
+ }
+ return fitting_gaps > non_trivial_gaps * kMinFittingLinespacings;
+}
+
+// Computes the deskewed vertical position of each baseline in the block and
+// stores them in the given vector.
+// This is calculated as the perpendicular distance of the middle of each
+// baseline (in case it has a different skew angle) from the line passing
+// through the origin parallel to the block baseline angle.
+// NOTE that "distance" above is a signed quantity so we can tell which side
+// of the block baseline a line sits, hence the function and argument name
+// positions not distances.
+void BaselineBlock::ComputeBaselinePositions(const FCOORD& direction,
+ GenericVector<double>* positions) {
+ positions->clear();
+ for (int r = 0; r < rows_.size(); ++r) {
+ BaselineRow* row = rows_[r];
+ const TBOX& row_box = row->bounding_box();
+ float x_middle = (row_box.left() + row_box.right()) / 2.0f;
+ FCOORD row_pos(x_middle, static_cast<float>(row->StraightYAtX(x_middle)));
+ float offset = direction * row_pos;
+ positions->push_back(offset);
+ }
+}
+
+// Computes an estimate of the line spacing of the block from the median
+// of the spacings between adjacent overlapping textlines.
+void BaselineBlock::EstimateLineSpacing() {
+ GenericVector<float> spacings;
+ for (int r = 0; r < rows_.size(); ++r) {
+ BaselineRow* row = rows_[r];
+ // Exclude silly lines.
+ if (fabs(row->BaselineAngle()) > M_PI * 0.25) continue;
+ // Find the first row after row that overlaps it significantly.
+ const TBOX& row_box = row->bounding_box();
+ int r2;
+ for (r2 = r + 1; r2 < rows_.size() &&
+ !row_box.major_x_overlap(rows_[r2]->bounding_box());
+ ++r2);
+ if (r2 < rows_.size()) {
+ BaselineRow* row2 = rows_[r2];
+ // Exclude silly lines.
+ if (fabs(row2->BaselineAngle()) > M_PI * 0.25) continue;
+ float spacing = row->SpaceBetween(*row2);
+ spacings.push_back(spacing);
+ }
+ }
+ // If we have at least one value, use it, otherwise leave the previous
+ // value unchanged.
+ if (!spacings.empty()) {
+ line_spacing_ = spacings[spacings.choose_nth_item(spacings.size() / 2)];
+ if (debug_level_ > 1)
+ tprintf("Estimate of linespacing = %g\n", line_spacing_);
+ }
+}
+
+// Refines the line spacing of the block by fitting a regression
+// line to the deskewed y-position of each baseline as a function of its
+// estimated line index, allowing for a small error in the initial linespacing
+// and choosing the best available model.
+void BaselineBlock::RefineLineSpacing(const GenericVector<double>& positions) {
+ double spacings[3], offsets[3], errors[3];
+ int index_range;
+ errors[0] = FitLineSpacingModel(positions, line_spacing_,
+ &spacings[0], &offsets[0], &index_range);
+ if (index_range > 1) {
+ double spacing_plus = line_spacing_ / (1.0 + 1.0 / index_range);
+ // Try the hypotheses that there might be index_range +/- 1 line spaces.
+ errors[1] = FitLineSpacingModel(positions, spacing_plus,
+ &spacings[1], &offsets[1], nullptr);
+ double spacing_minus = line_spacing_ / (1.0 - 1.0 / index_range);
+ errors[2] = FitLineSpacingModel(positions, spacing_minus,
+ &spacings[2], &offsets[2], nullptr);
+ for (int i = 1; i <= 2; ++i) {
+ if (errors[i] < errors[0]) {
+ spacings[0] = spacings[i];
+ offsets[0] = offsets[i];
+ errors[0] = errors[i];
+ }
+ }
+ }
+ if (spacings[0] > 0.0) {
+ line_spacing_ = spacings[0];
+ line_offset_ = offsets[0];
+ model_error_ = errors[0];
+ if (debug_level_ > 0) {
+ tprintf("Final linespacing model = %g + offset %g, error %g\n",
+ line_spacing_, line_offset_, model_error_);
+ }
+ }
+}
+
+// Given an initial estimate of line spacing (m_in) and the positions of each
+// baseline, computes the line spacing of the block more accurately in m_out,
+// and the corresponding intercept in c_out, and the number of spacings seen
+// in index_delta. Returns the error of fit to the line spacing model.
+// Uses a simple linear regression, but optimized the offset using the median.
+double BaselineBlock::FitLineSpacingModel(
+ const GenericVector<double>& positions, double m_in,
+ double* m_out, double* c_out, int* index_delta) {
+ if (m_in == 0.0f || positions.size() < 2) {
+ *m_out = m_in;
+ *c_out = 0.0;
+ if (index_delta != nullptr) *index_delta = 0;
+ return 0.0;
+ }
+ GenericVector<double> offsets;
+ // Get the offset (remainder) linespacing for each line and choose the median.
+ for (int i = 0; i < positions.size(); ++i)
+ offsets.push_back(fmod(positions[i], m_in));
+ // Get the median offset.
+ double median_offset = MedianOfCircularValues(m_in, &offsets);
+ // Now fit a line to quantized line number and offset.
+ LLSQ llsq;
+ int min_index = INT32_MAX;
+ int max_index = -INT32_MAX;
+ for (int i = 0; i < positions.size(); ++i) {
+ double y_pos = positions[i];
+ int row_index = IntCastRounded((y_pos - median_offset) / m_in);
+ UpdateRange(row_index, &min_index, &max_index);
+ llsq.add(row_index, y_pos);
+ }
+ // Get the refined line spacing.
+ *m_out = llsq.m();
+ // Use the median offset rather than the mean.
+ offsets.truncate(0);
+ for (int i = 0; i < positions.size(); ++i)
+ offsets.push_back(fmod(positions[i], *m_out));
+ // Get the median offset.
+ if (debug_level_ > 2) {
+ for (int i = 0; i < offsets.size(); ++i)
+ tprintf("%d: %g\n", i, offsets[i]);
+ }
+ *c_out = MedianOfCircularValues(*m_out, &offsets);
+ if (debug_level_ > 1) {
+ tprintf("Median offset = %g, compared to mean of %g.\n",
+ *c_out, llsq.c(*m_out));
+ }
+ // Index_delta is the number of hypothesized line gaps present.
+ if (index_delta != nullptr)
+ *index_delta = max_index - min_index;
+ // Use the regression model's intercept to compute the error, as it may be
+ // a full line-spacing in disagreement with the median.
+ double rms_error = llsq.rms(*m_out, llsq.c(*m_out));
+ if (debug_level_ > 1) {
+ tprintf("Linespacing of y=%g x + %g improved to %g x + %g, rms=%g\n",
+ m_in, median_offset, *m_out, *c_out, rms_error);
+ }
+ return rms_error;
+}
+
+BaselineDetect::BaselineDetect(int debug_level, const FCOORD& page_skew,
+ TO_BLOCK_LIST* blocks)
+ : page_skew_(page_skew), debug_level_(debug_level) {
+ TO_BLOCK_IT it(blocks);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ TO_BLOCK* to_block = it.data();
+ BLOCK* block = to_block->block;
+ POLY_BLOCK* pb = block->pdblk.poly_block();
+ // A note about non-text blocks.
+ // On output, non-text blocks are supposed to contain a single empty word
+ // in each incoming text line. These mark out the polygonal bounds of the
+ // block. Ideally no baselines should be required, but currently
+ // make_words crashes if a baseline and xheight are not provided, so we
+ // include non-text blocks here, but flag them for special treatment.
+ bool non_text = pb != nullptr && !pb->IsText();
+ blocks_.push_back(new BaselineBlock(debug_level_, non_text, to_block));
+ }
+}
+
+// Finds the initial baselines for each TO_ROW in each TO_BLOCK, gathers
+// block-wise and page-wise data to smooth small blocks/rows, and applies
+// smoothing based on block/page-level skew and block-level linespacing.
+void BaselineDetect::ComputeStraightBaselines(bool use_box_bottoms) {
+ GenericVector<double> block_skew_angles;
+ for (int i = 0; i < blocks_.size(); ++i) {
+ BaselineBlock* bl_block = blocks_[i];
+ if (debug_level_ > 0)
+ tprintf("Fitting initial baselines...\n");
+ if (bl_block->FitBaselinesAndFindSkew(use_box_bottoms)) {
+ block_skew_angles.push_back(bl_block->skew_angle());
+ }
+ }
+ // Compute a page-wide default skew for blocks with too little information.
+ double default_block_skew = page_skew_.angle();
+ if (!block_skew_angles.empty()) {
+ default_block_skew = MedianOfCircularValues(M_PI, &block_skew_angles);
+ }
+ if (debug_level_ > 0) {
+ tprintf("Page skew angle = %g\n", default_block_skew);
+ }
+ // Set bad lines in each block to the default block skew and then force fit
+ // a linespacing model where it makes sense to do so.
+ for (int i = 0; i < blocks_.size(); ++i) {
+ BaselineBlock* bl_block = blocks_[i];
+ bl_block->ParallelizeBaselines(default_block_skew);
+ bl_block->SetupBlockParameters(); // This replaced compute_row_stats.
+ }
+}
+
+// Computes the baseline splines for each TO_ROW in each TO_BLOCK and
+// other associated side-effects, including pre-associating blobs, computing
+// x-heights and displaying debug information.
+// NOTE that ComputeStraightBaselines must have been called first as this
+// sets up data in the TO_ROWs upon which this function depends.
+void BaselineDetect::ComputeBaselineSplinesAndXheights(const ICOORD& page_tr,
+ bool enable_splines,
+ bool remove_noise,
+ bool show_final_rows,
+ Textord* textord) {
+ for (int i = 0; i < blocks_.size(); ++i) {
+ BaselineBlock* bl_block = blocks_[i];
+ if (enable_splines)
+ bl_block->PrepareForSplineFitting(page_tr, remove_noise);
+ bl_block->FitBaselineSplines(enable_splines, show_final_rows, textord);
+#ifndef GRAPHICS_DISABLED
+ if (show_final_rows) {
+ bl_block->DrawFinalRows(page_tr);
+ }
+#endif
+ }
+}
+
+} // namespace tesseract.
diff --git a/tesseract/src/textord/baselinedetect.h b/tesseract/src/textord/baselinedetect.h
new file mode 100644
index 00000000..579558ed
--- /dev/null
+++ b/tesseract/src/textord/baselinedetect.h
@@ -0,0 +1,276 @@
+///////////////////////////////////////////////////////////////////////
+// File: baselinedetect.h
+// Description: Initial Baseline Determination.
+// Copyright 2012 Google Inc. All Rights Reserved.
+// Author: rays@google.com (Ray Smith)
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TEXTORD_BASELINEDETECT_H_
+#define TESSERACT_TEXTORD_BASELINEDETECT_H_
+
+#include "detlinefit.h"
+#include "points.h"
+#include "rect.h"
+
+#include "genericvector.h"
+
+struct Pix;
+
+namespace tesseract {
+
+class Textord;
+class BLOBNBOX_LIST;
+class TO_BLOCK;
+class TO_BLOCK_LIST;
+class TO_ROW;
+
+// Class to compute and hold baseline data for a TO_ROW.
+class BaselineRow {
+ public:
+ BaselineRow(double line_size, TO_ROW* to_row);
+
+ const TBOX& bounding_box() const {
+ return bounding_box_;
+ }
+ // Sets the TO_ROW with the output straight line.
+ void SetupOldLineParameters(TO_ROW* row) const;
+
+ // Outputs diagnostic information.
+ void Print() const;
+
+ // Returns the skew angle (in radians) of the current baseline in [-pi,pi].
+ double BaselineAngle() const;
+ // Computes and returns the linespacing at the middle of the overlap
+ // between this and other.
+ double SpaceBetween(const BaselineRow& other) const;
+ // Computes and returns the displacement of the center of the line
+ // perpendicular to the given direction.
+ double PerpDisp(const FCOORD& direction) const;
+ // Computes the y coordinate at the given x using the straight baseline
+ // defined by baseline1_ and baseline2_.
+ double StraightYAtX(double x) const;
+
+ // Fits a straight baseline to the points. Returns true if it had enough
+ // points to be reasonably sure of the fitted baseline.
+ // If use_box_bottoms is false, baselines positions are formed by
+ // considering the outlines of the blobs.
+ bool FitBaseline(bool use_box_bottoms);
+ // Modifies an existing result of FitBaseline to be parallel to the given
+ // vector if that produces a better result.
+ void AdjustBaselineToParallel(int debug, const FCOORD& direction);
+ // Modifies the baseline to snap to the textline grid if the existing
+ // result is not good enough.
+ double AdjustBaselineToGrid(int debug, const FCOORD& direction,
+ double line_spacing, double line_offset);
+
+ private:
+ // Sets up displacement_modes_ with the top few modes of the perpendicular
+ // distance of each blob from the given direction vector, after rounding.
+ void SetupBlobDisplacements(const FCOORD& direction);
+
+ // Fits a line in the given direction to blobs that are close to the given
+ // target_offset perpendicular displacement from the direction. The fit
+ // error is allowed to be cheat_allowance worse than the existing fit, and
+ // will still be used.
+ // If cheat_allowance > 0, the new fit will be good and replace the current
+ // fit if it has better fit (with cheat) OR its error is below
+ // max_baseline_error_ and the old fit is marked bad.
+ // Otherwise the new fit will only replace the old if it is really better,
+ // or the old fit is marked bad and the new fit has sufficient points, as
+ // well as being within the max_baseline_error_.
+ void FitConstrainedIfBetter(int debug, const FCOORD& direction,
+ double cheat_allowance,
+ double target_offset);
+ // Returns the perpendicular distance of the point from the straight
+ // baseline.
+ float PerpDistanceFromBaseline(const FCOORD& pt) const;
+ // Computes the bounding box of the row.
+ void ComputeBoundingBox();
+
+ // The blobs of the row to which this BaselineRow adds extra information
+ // during baseline fitting. Note that blobs_ could easily come from either
+ // a TO_ROW or a ColPartition.
+ BLOBNBOX_LIST* blobs_;
+ // Bounding box of all the blobs.
+ TBOX bounding_box_;
+ // Fitter used to fit lines to the blobs.
+ DetLineFit fitter_;
+ // 2 points on the straight baseline.
+ FCOORD baseline_pt1_;
+ FCOORD baseline_pt2_;
+ // Set of modes of displacements. They indicate preferable baseline positions.
+ GenericVector<double> displacement_modes_;
+ // Quantization factor used for displacement_modes_.
+ double disp_quant_factor_;
+ // Half the acceptance range of blob displacements for computing the
+ // error during a constrained fit.
+ double fit_halfrange_;
+ // Max baseline error before a line is regarded as fitting badly.
+ double max_baseline_error_;
+ // The error of fit of the baseline.
+ double baseline_error_;
+ // True if this row seems to have a good baseline.
+ bool good_baseline_;
+};
+
+// Class to compute and hold baseline data for a TO_BLOCK.
+class BaselineBlock {
+ public:
+ BaselineBlock(int debug_level, bool non_text, TO_BLOCK* block);
+
+ TO_BLOCK* block() const {
+ return block_;
+ }
+ double skew_angle() const {
+ return skew_angle_;
+ }
+
+ // Computes and returns the absolute error of the given perp_disp from the
+ // given linespacing model.
+ static double SpacingModelError(double perp_disp, double line_spacing,
+ double line_offset);
+
+ // Fits straight line baselines and computes the skew angle from the
+ // median angle. Returns true if a good angle is found.
+ // If use_box_bottoms is false, baseline positions are formed by
+ // considering the outlines of the blobs.
+ bool FitBaselinesAndFindSkew(bool use_box_bottoms);
+
+ // Refits the baseline to a constrained angle, using the stored block
+ // skew if good enough, otherwise the supplied default skew.
+ void ParallelizeBaselines(double default_block_skew);
+
+ // Sets the parameters in TO_BLOCK that are needed by subsequent processes.
+ void SetupBlockParameters() const;
+
+ // Processing that is required before fitting baseline splines, but requires
+ // linear baselines in order to be successful:
+ // Removes noise if required
+ // Separates out underlines
+ // Pre-associates blob fragments.
+ // TODO(rays/joeliu) This entire section of code is inherited from the past
+ // and could be improved/eliminated.
+ // page_tr is used to size a debug window.
+ void PrepareForSplineFitting(ICOORD page_tr, bool remove_noise);
+
+ // Fits splines to the textlines, or creates fake QSPLINES from the straight
+ // baselines that are already on the TO_ROWs.
+ // As a side-effect, computes the xheights of the rows and the block.
+ // Although x-height estimation is conceptually separate, it is part of
+ // detecting perspective distortion and therefore baseline fitting.
+ void FitBaselineSplines(bool enable_splines, bool show_final_rows,
+ Textord* textord);
+
+ // Draws the (straight) baselines and final blobs colored according to
+ // what was discarded as noise and what is associated with each row.
+ void DrawFinalRows(const ICOORD& page_tr);
+
+ // Render the generated spline baselines for this block on pix_in.
+ void DrawPixSpline(Pix* pix_in);
+
+ private:
+ // Top-level line-spacing calculation. Computes an estimate of the line-
+ // spacing, using the current baselines in the TO_ROWS of the block, and
+ // then refines it by fitting a regression line to the baseline positions
+ // as a function of their integer index.
+ // Returns true if it seems that the model is a reasonable fit to the
+ // observations.
+ bool ComputeLineSpacing();
+
+ // Computes the deskewed vertical position of each baseline in the block and
+ // stores them in the given vector.
+ void ComputeBaselinePositions(const FCOORD& direction,
+ GenericVector<double>* positions);
+
+ // Computes an estimate of the line spacing of the block from the median
+ // of the spacings between adjacent overlapping textlines.
+ void EstimateLineSpacing();
+
+ // Refines the line spacing of the block by fitting a regression
+ // line to the deskewed y-position of each baseline as a function of its
+ // estimated line index, allowing for a small error in the initial linespacing
+ // and choosing the best available model.
+ void RefineLineSpacing(const GenericVector<double>& positions);
+
+ // Given an initial estimate of line spacing (m_in) and the positions of each
+ // baseline, computes the line spacing of the block more accurately in m_out,
+ // and the corresponding intercept in c_out, and the number of spacings seen
+ // in index_delta. Returns the error of fit to the line spacing model.
+ double FitLineSpacingModel(const GenericVector<double>& positions,
+ double m_in, double* m_out, double* c_out,
+ int* index_delta);
+
+
+ // The block to which this class adds extra information used during baseline
+ // calculation.
+ TO_BLOCK* block_;
+ // The rows in the block that we will be working with.
+ PointerVector<BaselineRow> rows_;
+ // Amount of debugging output to provide.
+ int debug_level_;
+ // True if the block is non-text (graphic).
+ bool non_text_block_;
+ // True if the block has at least one good enough baseline to compute the
+ // skew angle and therefore skew_angle_ is valid.
+ bool good_skew_angle_;
+ // Angle of skew in radians using the conventional anticlockwise from x-axis.
+ double skew_angle_;
+ // Current best estimate line spacing in pixels perpendicular to skew_angle_.
+ double line_spacing_;
+ // Offset for baseline positions, in pixels. Each baseline is at
+ // line_spacing_ * n + line_offset_ for integer n, which represents
+ // [textline] line number in a line numbering system that has line 0 on or
+ // at least near the x-axis. Not equal to the actual line number of a line
+ // within a block as most blocks are not near the x-axis.
+ double line_offset_;
+ // The error of the line spacing model.
+ double model_error_;
+};
+
+class BaselineDetect {
+ public:
+ BaselineDetect(int debug_level, const FCOORD& page_skew,
+ TO_BLOCK_LIST* blocks);
+
+ ~BaselineDetect() = default;
+
+ // Finds the initial baselines for each TO_ROW in each TO_BLOCK, gathers
+ // block-wise and page-wise data to smooth small blocks/rows, and applies
+ // smoothing based on block/page-level skew and block-level linespacing.
+ void ComputeStraightBaselines(bool use_box_bottoms);
+
+ // Computes the baseline splines for each TO_ROW in each TO_BLOCK and
+ // other associated side-effects, including pre-associating blobs, computing
+ // x-heights and displaying debug information.
+ // NOTE that ComputeStraightBaselines must have been called first as this
+ // sets up data in the TO_ROWs upon which this function depends.
+ void ComputeBaselineSplinesAndXheights(const ICOORD& page_tr,
+ bool enable_splines,
+ bool remove_noise,
+ bool show_final_rows,
+ Textord* textord);
+
+ private:
+ // Average (median) skew of the blocks on the page among those that have
+ // a good angle of their own.
+ FCOORD page_skew_;
+ // Amount of debug output to produce.
+ int debug_level_;
+ // The blocks that we are working with.
+ PointerVector<BaselineBlock> blocks_;
+};
+
+} // namespace tesseract
+
+#endif // TESSERACT_TEXTORD_BASELINEDETECT_H_
diff --git a/tesseract/src/textord/bbgrid.cpp b/tesseract/src/textord/bbgrid.cpp
new file mode 100644
index 00000000..6e3e3346
--- /dev/null
+++ b/tesseract/src/textord/bbgrid.cpp
@@ -0,0 +1,285 @@
+///////////////////////////////////////////////////////////////////////
+// File: bbgrid.cpp
+// Description: Class to hold BLOBNBOXs in a grid for fast access
+// to neighbours.
+// Author: Ray Smith
+//
+// (C) Copyright 2007, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "bbgrid.h"
+#include "helpers.h"
+#include "ocrblock.h"
+
+namespace tesseract {
+
+///////////////////////////////////////////////////////////////////////
+// BBGrid IMPLEMENTATION.
+///////////////////////////////////////////////////////////////////////
+GridBase::GridBase(int gridsize, const ICOORD& bleft, const ICOORD& tright) {
+ Init(gridsize, bleft, tright);
+}
+
+// Destructor.
+// It is defined here, so the compiler can create a single vtable
+// instead of weak vtables in every compilation unit.
+GridBase::~GridBase() = default;
+
+// (Re)Initialize the grid. The gridsize is the size in pixels of each cell,
+// and bleft, tright are the bounding box of everything to go in it.
+void GridBase::Init(int gridsize, const ICOORD& bleft, const ICOORD& tright) {
+ gridsize_ = gridsize;
+ bleft_ = bleft;
+ tright_ = tright;
+ if (gridsize_ == 0)
+ gridsize_ = 1;
+ gridwidth_ = (tright.x() - bleft.x() + gridsize_ - 1) / gridsize_;
+ gridheight_ = (tright.y() - bleft.y() + gridsize_ - 1) / gridsize_;
+ gridbuckets_ = gridwidth_ * gridheight_;
+}
+
+// Compute the given grid coordinates from image coords.
+void GridBase::GridCoords(int x, int y, int* grid_x, int* grid_y) const {
+ *grid_x = (x - bleft_.x()) / gridsize_;
+ *grid_y = (y - bleft_.y()) / gridsize_;
+ ClipGridCoords(grid_x, grid_y);
+}
+
+// Clip the given grid coordinates to fit within the grid.
+void GridBase::ClipGridCoords(int* x, int* y) const {
+ *x = ClipToRange(*x, 0, gridwidth_ - 1);
+ *y = ClipToRange(*y, 0, gridheight_ - 1);
+}
+
+IntGrid::IntGrid() {
+ grid_ = nullptr;
+}
+
+IntGrid::IntGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright)
+ : grid_(nullptr) {
+ Init(gridsize, bleft, tright);
+}
+
+IntGrid::~IntGrid() {
+ delete [] grid_;
+}
+
+// (Re)Initialize the grid. The gridsize is the size in pixels of each cell,
+// and bleft, tright are the bounding box of everything to go in it.
+void IntGrid::Init(int gridsize, const ICOORD& bleft, const ICOORD& tright) {
+ GridBase::Init(gridsize, bleft, tright);
+ delete [] grid_;
+ grid_ = new int[gridbuckets_];
+ Clear();
+}
+
+// Clear all the ints in the grid to zero.
+void IntGrid::Clear() {
+ for (int i = 0; i < gridbuckets_; ++i) {
+ grid_[i] = 0;
+ }
+}
+
+// Rotate the grid by rotation, keeping cell contents.
+// rotation must be a multiple of 90 degrees.
+// NOTE: due to partial cells, cell coverage in the rotated grid will be
+// inexact. This is why there is no Rotate for the generic BBGrid.
+// TODO(rays) investigate fixing this inaccuracy by moving the origin after
+// rotation.
+void IntGrid::Rotate(const FCOORD& rotation) {
+ ASSERT_HOST(rotation.x() == 0.0f || rotation.y() == 0.0f);
+ ICOORD old_bleft(bleft());
+ //ICOORD old_tright(tright());
+ int old_width = gridwidth();
+ int old_height = gridheight();
+ TBOX box(bleft(), tright());
+ box.rotate(rotation);
+ int* old_grid = grid_;
+ grid_ = nullptr;
+ Init(gridsize(), box.botleft(), box.topright());
+ // Iterate over the old grid, copying data to the rotated position in the new.
+ int oldi = 0;
+ FCOORD x_step(rotation);
+ x_step *= gridsize();
+ for (int oldy = 0; oldy < old_height; ++oldy) {
+ FCOORD line_pos(old_bleft.x(), old_bleft.y() + gridsize() * oldy);
+ line_pos.rotate(rotation);
+ for (int oldx = 0; oldx < old_width; ++oldx, line_pos += x_step, ++oldi) {
+ int grid_x, grid_y;
+ GridCoords(static_cast<int>(line_pos.x() + 0.5),
+ static_cast<int>(line_pos.y() + 0.5),
+ &grid_x, &grid_y);
+ grid_[grid_y * gridwidth() + grid_x] = old_grid[oldi];
+ }
+ }
+ delete [] old_grid;
+}
+
+// Returns a new IntGrid containing values equal to the sum of all the
+// neighbouring cells. The returned grid must be deleted after use.
+// For ease of implementation, edge cells are double counted, to make them
+// have the same range as the non-edge cells.
+IntGrid* IntGrid::NeighbourhoodSum() const {
+ auto* sumgrid = new IntGrid(gridsize(), bleft(), tright());
+ for (int y = 0; y < gridheight(); ++y) {
+ for (int x = 0; x < gridwidth(); ++x) {
+ int cell_count = 0;
+ for (int yoffset = -1; yoffset <= 1; ++yoffset) {
+ for (int xoffset = -1; xoffset <= 1; ++xoffset) {
+ int grid_x = x + xoffset;
+ int grid_y = y + yoffset;
+ ClipGridCoords(&grid_x, &grid_y);
+ cell_count += GridCellValue(grid_x, grid_y);
+ }
+ }
+ if (GridCellValue(x, y) > 1)
+ sumgrid->SetGridCell(x, y, cell_count);
+ }
+ }
+ return sumgrid;
+}
+
+// Returns true if more than half the area of the rect is covered by grid
+// cells that are over the threshold.
+bool IntGrid::RectMostlyOverThreshold(const TBOX& rect, int threshold) const {
+ int min_x, min_y, max_x, max_y;
+ GridCoords(rect.left(), rect.bottom(), &min_x, &min_y);
+ GridCoords(rect.right(), rect.top(), &max_x, &max_y);
+ int total_area = 0;
+ for (int y = min_y; y <= max_y; ++y) {
+ for (int x = min_x; x <= max_x; ++x) {
+ int value = GridCellValue(x, y);
+ if (value > threshold) {
+ TBOX cell_box(x * gridsize_, y * gridsize_,
+ (x + 1) * gridsize_, (y + 1) * gridsize_);
+ cell_box &= rect; // This is in-place box intersection.
+ total_area += cell_box.area();
+ }
+ }
+ }
+ return total_area * 2 > rect.area();
+}
+
+// Returns true if any cell value in the given rectangle is zero.
+bool IntGrid::AnyZeroInRect(const TBOX& rect) const {
+ int min_x, min_y, max_x, max_y;
+ GridCoords(rect.left(), rect.bottom(), &min_x, &min_y);
+ GridCoords(rect.right(), rect.top(), &max_x, &max_y);
+ for (int y = min_y; y <= max_y; ++y) {
+ for (int x = min_x; x <= max_x; ++x) {
+ if (GridCellValue(x, y) == 0)
+ return true;
+ }
+ }
+ return false;
+}
+
+// Returns a full-resolution binary pix in which each cell over the given
+// threshold is filled as a black square. pixDestroy after use.
+// Edge cells, which have a zero 4-neighbour, are not marked.
+Pix* IntGrid::ThresholdToPix(int threshold) const {
+ Pix* pix = pixCreate(tright().x() - bleft().x(),
+ tright().y() - bleft().y(), 1);
+ int cellsize = gridsize();
+ for (int y = 0; y < gridheight(); ++y) {
+ for (int x = 0; x < gridwidth(); ++x) {
+ if (GridCellValue(x, y) > threshold &&
+ GridCellValue(x - 1, y) > 0 && GridCellValue(x + 1, y) > 0 &&
+ GridCellValue(x, y - 1) > 0 && GridCellValue(x, y + 1) > 0) {
+ pixRasterop(pix, x * cellsize, tright().y() - ((y + 1) * cellsize),
+ cellsize, cellsize, PIX_SET, nullptr, 0, 0);
+ }
+ }
+ }
+ return pix;
+}
+
+// Make a Pix of the correct scaled size for the TraceOutline functions.
+static Pix* GridReducedPix(const TBOX& box, int gridsize,
+ ICOORD bleft, int* left, int* bottom) {
+ // Compute grid bounds of the outline and pad all round by 1.
+ int grid_left = (box.left() - bleft.x()) / gridsize - 1;
+ int grid_bottom = (box.bottom() - bleft.y()) / gridsize - 1;
+ int grid_right = (box.right() - bleft.x()) / gridsize + 1;
+ int grid_top = (box.top() - bleft.y()) / gridsize + 1;
+ *left = grid_left;
+ *bottom = grid_bottom;
+ return pixCreate(grid_right - grid_left + 1,
+ grid_top - grid_bottom + 1,
+ 1);
+}
+
+// Helper function to return a scaled Pix with one pixel per grid cell,
+// set (black) where the given outline enters the corresponding grid cell,
+// and clear where the outline does not touch the grid cell.
+// Also returns the grid coords of the bottom-left of the Pix, in *left
+// and *bottom, which corresponds to (0, 0) on the Pix.
+// Note that the Pix is used upside-down, with (0, 0) being the bottom-left.
+Pix* TraceOutlineOnReducedPix(C_OUTLINE* outline, int gridsize,
+ ICOORD bleft, int* left, int* bottom) {
+ const TBOX& box = outline->bounding_box();
+ Pix* pix = GridReducedPix(box, gridsize, bleft, left, bottom);
+ int wpl = pixGetWpl(pix);
+ l_uint32* data = pixGetData(pix);
+ int length = outline->pathlength();
+ ICOORD pos = outline->start_pos();
+ for (int i = 0; i < length; ++i) {
+ int grid_x = (pos.x() - bleft.x()) / gridsize - *left;
+ int grid_y = (pos.y() - bleft.y()) / gridsize - *bottom;
+ SET_DATA_BIT(data + grid_y * wpl, grid_x);
+ pos += outline->step(i);
+ }
+ return pix;
+}
+#if 0 // Example code shows how to use TraceOutlineOnReducedPix.
+ C_OUTLINE_IT ol_it(blob->cblob()->out_list());
+ int grid_left, grid_bottom;
+ Pix* pix = TraceOutlineOnReducedPix(ol_it.data(), gridsize_, bleft_,
+ &grid_left, &grid_bottom);
+ grid->InsertPixPtBBox(grid_left, grid_bottom, pix, blob);
+ pixDestroy(&pix);
+#endif
+
+// As TraceOutlineOnReducedPix above, but on a BLOCK instead of a C_OUTLINE.
+Pix* TraceBlockOnReducedPix(BLOCK* block, int gridsize,
+ ICOORD bleft, int* left, int* bottom) {
+ const TBOX& box = block->pdblk.bounding_box();
+ Pix* pix = GridReducedPix(box, gridsize, bleft, left, bottom);
+ int wpl = pixGetWpl(pix);
+ l_uint32* data = pixGetData(pix);
+ ICOORDELT_IT it(block->pdblk.poly_block()->points());
+ for (it.mark_cycle_pt(); !it.cycled_list();) {
+ ICOORD pos = *it.data();
+ it.forward();
+ ICOORD next_pos = *it.data();
+ ICOORD line_vector = next_pos - pos;
+ int major, minor;
+ ICOORD major_step, minor_step;
+ line_vector.setup_render(&major_step, &minor_step, &major, &minor);
+ int accumulator = major / 2;
+ while (pos != next_pos) {
+ int grid_x = (pos.x() - bleft.x()) / gridsize - *left;
+ int grid_y = (pos.y() - bleft.y()) / gridsize - *bottom;
+ SET_DATA_BIT(data + grid_y * wpl, grid_x);
+ pos += major_step;
+ accumulator += minor;
+ if (accumulator >= major) {
+ accumulator -= major;
+ pos += minor_step;
+ }
+ }
+ }
+ return pix;
+}
+
+} // namespace tesseract.
diff --git a/tesseract/src/textord/bbgrid.h b/tesseract/src/textord/bbgrid.h
new file mode 100644
index 00000000..5d75aa38
--- /dev/null
+++ b/tesseract/src/textord/bbgrid.h
@@ -0,0 +1,957 @@
+///////////////////////////////////////////////////////////////////////
+// File: bbgrid.h
+// Description: Class to hold BLOBNBOXs in a grid for fast access
+// to neighbours.
+// Author: Ray Smith
+//
+// (C) Copyright 2007, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TEXTORD_BBGRID_H_
+#define TESSERACT_TEXTORD_BBGRID_H_
+
+#include <unordered_set>
+
+#include "clst.h"
+#include "coutln.h"
+#include "rect.h"
+#include "scrollview.h"
+
+#include "allheaders.h"
+
+class BLOCK;
+
+namespace tesseract {
+
+// Helper function to return a scaled Pix with one pixel per grid cell,
+// set (black) where the given outline enters the corresponding grid cell,
+// and clear where the outline does not touch the grid cell.
+// Also returns the grid coords of the bottom-left of the Pix, in *left
+// and *bottom, which corresponds to (0, 0) on the Pix.
+// Note that the Pix is used upside-down, with (0, 0) being the bottom-left.
+Pix* TraceOutlineOnReducedPix(C_OUTLINE* outline, int gridsize,
+ ICOORD bleft, int* left, int* bottom);
+// As TraceOutlineOnReducedPix above, but on a BLOCK instead of a C_OUTLINE.
+Pix* TraceBlockOnReducedPix(BLOCK* block, int gridsize,
+ ICOORD bleft, int* left, int* bottom);
+
+template<class BBC, class BBC_CLIST, class BBC_C_IT> class GridSearch;
+
+// The GridBase class is the base class for BBGrid and IntGrid.
+// It holds the geometry and scale of the grid.
+class TESS_API GridBase {
+ public:
+ GridBase() = default;
+ GridBase(int gridsize, const ICOORD& bleft, const ICOORD& tright);
+ virtual ~GridBase();
+
+ // (Re)Initialize the grid. The gridsize is the size in pixels of each cell,
+ // and bleft, tright are the bounding box of everything to go in it.
+ void Init(int gridsize, const ICOORD& bleft, const ICOORD& tright);
+
+ // Simple accessors.
+ int gridsize() const {
+ return gridsize_;
+ }
+ int gridwidth() const {
+ return gridwidth_;
+ }
+ int gridheight() const {
+ return gridheight_;
+ }
+ const ICOORD& bleft() const {
+ return bleft_;
+ }
+ const ICOORD& tright() const {
+ return tright_;
+ }
+ // Compute the given grid coordinates from image coords.
+ void GridCoords(int x, int y, int* grid_x, int* grid_y) const;
+
+ // Clip the given grid coordinates to fit within the grid.
+ void ClipGridCoords(int* x, int* y) const;
+
+ protected:
+ // TODO(rays) Make these private and migrate to the accessors in subclasses.
+ int gridsize_; // Pixel size of each grid cell.
+ int gridwidth_; // Size of the grid in cells.
+ int gridheight_;
+ int gridbuckets_; // Total cells in grid.
+ ICOORD bleft_; // Pixel coords of bottom-left of grid.
+ ICOORD tright_; // Pixel coords of top-right of grid.
+
+ private:
+};
+
+// The IntGrid maintains a single int for each cell in a grid.
+class IntGrid : public GridBase {
+ public:
+ IntGrid();
+ IntGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright);
+ ~IntGrid() override;
+
+ // (Re)Initialize the grid. The gridsize is the size in pixels of each cell,
+ // and bleft, tright are the bounding box of everything to go in it.
+ void Init(int gridsize, const ICOORD& bleft, const ICOORD& tright);
+
+ // Clear all the ints in the grid to zero.
+ void Clear();
+
+ // Rotate the grid by rotation, keeping cell contents.
+ // rotation must be a multiple of 90 degrees.
+ // NOTE: due to partial cells, cell coverage in the rotated grid will be
+ // inexact. This is why there is no Rotate for the generic BBGrid.
+ void Rotate(const FCOORD& rotation);
+
+ // Returns a new IntGrid containing values equal to the sum of all the
+ // neighbouring cells. The returned grid must be deleted after use.
+ IntGrid* NeighbourhoodSum() const;
+
+ int GridCellValue(int grid_x, int grid_y) const {
+ ClipGridCoords(&grid_x, &grid_y);
+ return grid_[grid_y * gridwidth_ + grid_x];
+ }
+ void SetGridCell(int grid_x, int grid_y, int value) {
+ ASSERT_HOST(grid_x >= 0 && grid_x < gridwidth());
+ ASSERT_HOST(grid_y >= 0 && grid_y < gridheight());
+ grid_[grid_y * gridwidth_ + grid_x] = value;
+ }
+ // Returns true if more than half the area of the rect is covered by grid
+ // cells that are over the threshold.
+ bool RectMostlyOverThreshold(const TBOX& rect, int threshold) const;
+
+ // Returns true if any cell value in the given rectangle is zero.
+ bool AnyZeroInRect(const TBOX& rect) const;
+
+ // Returns a full-resolution binary pix in which each cell over the given
+ // threshold is filled as a black square. pixDestroy after use.
+ Pix* ThresholdToPix(int threshold) const;
+
+ private:
+ int* grid_; // 2-d array of ints.
+};
+
+// The BBGrid class holds C_LISTs of template classes BBC (bounding box class)
+// in a grid for fast neighbour access.
+// The BBC class must have a member const TBOX& bounding_box() const.
+// The BBC class must have been CLISTIZEH'ed elsewhere to make the
+// list class BBC_CLIST and the iterator BBC_C_IT.
+// Use of C_LISTs enables BBCs to exist in multiple cells simultaneously.
+// As a consequence, ownership of BBCs is assumed to be elsewhere and
+// persistent for at least the life of the BBGrid, or at least until Clear is
+// called which removes all references to inserted objects without actually
+// deleting them.
+// Most uses derive a class from a specific instantiation of BBGrid,
+// thereby making most of the ugly template notation go away.
+// The friend class GridSearch, with the same template arguments, is
+// used to search a grid efficiently in one of several search patterns.
+template<class BBC, class BBC_CLIST, class BBC_C_IT> class BBGrid
+ : public GridBase {
+ friend class GridSearch<BBC, BBC_CLIST, BBC_C_IT>;
+ public:
+ BBGrid();
+ BBGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright);
+ ~BBGrid() override;
+
+ // (Re)Initialize the grid. The gridsize is the size in pixels of each cell,
+ // and bleft, tright are the bounding box of everything to go in it.
+ void Init(int gridsize, const ICOORD& bleft, const ICOORD& tright);
+
+ // Empty all the lists but leave the grid itself intact.
+ void Clear();
+ // Deallocate the data in the lists but otherwise leave the lists and the grid
+ // intact.
+ void ClearGridData(void (*free_method)(BBC*));
+
+ // Insert a bbox into the appropriate place in the grid.
+ // If h_spread, then all cells covered horizontally by the box are
+ // used, otherwise, just the bottom-left. Similarly for v_spread.
+ // WARNING: InsertBBox may invalidate an active GridSearch. Call
+ // RepositionIterator() on any GridSearches that are active on this grid.
+ void InsertBBox(bool h_spread, bool v_spread, BBC* bbox);
+
+ // Using a pix from TraceOutlineOnReducedPix or TraceBlockOnReducedPix, in
+ // which each pixel corresponds to a grid cell, insert a bbox into every
+ // place in the grid where the corresponding pixel is 1. The Pix is handled
+ // upside-down to match the Tesseract coordinate system. (As created by
+ // TraceOutlineOnReducedPix or TraceBlockOnReducedPix.)
+ // (0, 0) in the pix corresponds to (left, bottom) in the
+ // grid (in grid coords), and the pix works up the grid from there.
+ // WARNING: InsertPixPtBBox may invalidate an active GridSearch. Call
+ // RepositionIterator() on any GridSearches that are active on this grid.
+ void InsertPixPtBBox(int left, int bottom, Pix* pix, BBC* bbox);
+
+ // Remove the bbox from the grid.
+ // WARNING: Any GridSearch operating on this grid could be invalidated!
+ // If a GridSearch is operating, call GridSearch::RemoveBBox() instead.
+ void RemoveBBox(BBC* bbox);
+
+ // Returns true if the given rectangle has no overlapping elements.
+ bool RectangleEmpty(const TBOX& rect);
+
+ // Returns an IntGrid showing the number of elements in each cell.
+ // Returned IntGrid must be deleted after use.
+ IntGrid* CountCellElements();
+
+ // Make a window of an appropriate size to display things in the grid.
+ ScrollView* MakeWindow(int x, int y, const char* window_name);
+
+ // Display the bounding boxes of the BLOBNBOXes in this grid.
+ // Use of this function requires an additional member of the BBC class:
+ // ScrollView::Color BBC::BoxColor() const.
+ void DisplayBoxes(ScrollView* window);
+
+ // ASSERT_HOST that every cell contains no more than one copy of each entry.
+ void AssertNoDuplicates();
+
+ // Handle a click event in a display window.
+ virtual void HandleClick(int x, int y);
+
+ protected:
+ BBC_CLIST* grid_; // 2-d array of CLISTS of BBC elements.
+
+ private:
+};
+
+// Hash functor for generic pointers.
+template<typename T> struct PtrHash {
+ size_t operator()(const T* ptr) const {
+ return reinterpret_cast<uintptr_t>(ptr) / sizeof(T);
+ }
+};
+
+
+// The GridSearch class enables neighbourhood searching on a BBGrid.
+template<class BBC, class BBC_CLIST, class BBC_C_IT> class GridSearch {
+ public:
+ GridSearch(BBGrid<BBC, BBC_CLIST, BBC_C_IT>* grid)
+ : grid_(grid) {
+ }
+
+ // Get the grid x, y coords of the most recently returned BBC.
+ int GridX() const {
+ return x_;
+ }
+ int GridY() const {
+ return y_;
+ }
+
+ // Sets the search mode to return a box only once.
+ // Efficiency warning: Implementation currently uses a squared-order
+ // search in the number of returned elements. Use only where a small
+ // number of elements are spread over a wide area, eg ColPartitions.
+ void SetUniqueMode(bool mode) {
+ unique_mode_ = mode;
+ }
+ // TODO(rays) Replace calls to ReturnedSeedElement with SetUniqueMode.
+ // It only works if the search includes the bottom-left corner.
+ // Apart from full search, all other searches return a box several
+ // times if the box is inserted with h_spread or v_spread.
+ // This method will return true for only one occurrence of each box
+ // that was inserted with both h_spread and v_spread as true.
+ // It will usually return false for boxes that were not inserted with
+ // both h_spread=true and v_spread=true
+ bool ReturnedSeedElement() const {
+ TBOX box = previous_return_->bounding_box();
+ int x_center = (box.left()+box.right())/2;
+ int y_center = (box.top()+box.bottom())/2;
+ int grid_x, grid_y;
+ grid_->GridCoords(x_center, y_center, &grid_x, &grid_y);
+ return (x_ == grid_x) && (y_ == grid_y);
+ }
+
+ // Various searching iterations... Note that these iterations
+ // all share data members, so you can't run more than one iteration
+ // in parallel in a single GridSearch instance, but multiple instances
+ // can search the same BBGrid in parallel.
+ // Note that all the searches can return blobs that may not exactly
+ // match the search conditions, since they return everything in the
+ // covered grid cells. It is up to the caller to check for
+ // appropriateness.
+ // TODO(rays) NextRectSearch only returns valid elements. Make the other
+ // searches test before return also and remove the tests from code
+ // that uses GridSearch.
+
+ // Start a new full search. Will iterate all stored blobs, from the top.
+ // If the blobs have been inserted using InsertBBox, (not InsertPixPtBBox)
+ // then the full search guarantees to return each blob in the grid once.
+ // Other searches may return a blob more than once if they have been
+ // inserted using h_spread or v_spread.
+ void StartFullSearch();
+ // Return the next bbox in the search or nullptr if done.
+ BBC* NextFullSearch();
+
+ // Start a new radius search. Will search in a spiral up to a
+ // given maximum radius in grid cells from the given center in pixels.
+ void StartRadSearch(int x, int y, int max_radius);
+ // Return the next bbox in the radius search or nullptr if the
+ // maximum radius has been reached.
+ BBC* NextRadSearch();
+
+ // Start a new left or right-looking search. Will search to the side
+ // for a box that vertically overlaps the given vertical line segment.
+ // CAVEAT: This search returns all blobs from the cells to the side
+ // of the start, and somewhat below, since there is no guarantee
+ // that there may not be a taller object in a lower cell. The
+ // blobs returned will include all those that vertically overlap and
+ // are no more than twice as high, but may also include some that do
+ // not overlap and some that are more than twice as high.
+ void StartSideSearch(int x, int ymin, int ymax);
+ // Return the next bbox in the side search or nullptr if the
+ // edge has been reached. Searches left to right or right to left
+ // according to the flag.
+ BBC* NextSideSearch(bool right_to_left);
+
+ // Start a vertical-looking search. Will search up or down
+ // for a box that horizontally overlaps the given line segment.
+ void StartVerticalSearch(int xmin, int xmax, int y);
+ // Return the next bbox in the vertical search or nullptr if the
+ // edge has been reached. Searches top to bottom or bottom to top
+ // according to the flag.
+ BBC* NextVerticalSearch(bool top_to_bottom);
+
+ // Start a rectangular search. Will search for a box that overlaps the
+ // given rectangle.
+ void StartRectSearch(const TBOX& rect);
+ // Return the next bbox in the rectangular search or nullptr if complete.
+ BBC* NextRectSearch();
+
+ // Remove the last returned BBC. Will not invalidate this. May invalidate
+ // any other concurrent GridSearch on the same grid. If any others are
+ // in use, call RepositionIterator on those, to continue without harm.
+ void RemoveBBox();
+ void RepositionIterator();
+
+ private:
+ // Factored out helper to start a search.
+ void CommonStart(int x, int y);
+ // Factored out helper to complete a next search.
+ BBC* CommonNext();
+ // Factored out final return when search is exhausted.
+ BBC* CommonEnd();
+ // Factored out function to set the iterator to the current x_, y_
+ // grid coords and mark the cycle pt.
+ void SetIterator();
+
+ private:
+ // The grid we are searching.
+ BBGrid<BBC, BBC_CLIST, BBC_C_IT>* grid_ = nullptr;
+ // For executing a search. The different search algorithms use these in
+ // different ways, but most use x_origin_ and y_origin_ as the start position.
+ int x_origin_ = 0;
+ int y_origin_ = 0;
+ int max_radius_ = 0;
+ int radius_ = 0;
+ int rad_index_ = 0;
+ int rad_dir_ = 0;
+ TBOX rect_;
+ int x_ = 0; // The current location in grid coords, of the current search.
+ int y_ = 0;
+ bool unique_mode_ = false;
+ BBC* previous_return_ = nullptr; // Previous return from Next*.
+ BBC* next_return_ = nullptr; // Current value of it_.data() used for repositioning.
+ // An iterator over the list at (x_, y_) in the grid_.
+ BBC_C_IT it_;
+ // Set of unique returned elements used when unique_mode_ is true.
+ std::unordered_set<BBC*, PtrHash<BBC> > returns_;
+};
+
+// Sort function to sort a BBC by bounding_box().left().
+template<class BBC>
+int SortByBoxLeft(const void* void1, const void* void2) {
+ // The void*s are actually doubly indirected, so get rid of one level.
+ const BBC* p1 = *static_cast<const BBC* const*>(void1);
+ const BBC* p2 = *static_cast<const BBC* const*>(void2);
+ int result = p1->bounding_box().left() - p2->bounding_box().left();
+ if (result != 0)
+ return result;
+ result = p1->bounding_box().right() - p2->bounding_box().right();
+ if (result != 0)
+ return result;
+ result = p1->bounding_box().bottom() - p2->bounding_box().bottom();
+ if (result != 0)
+ return result;
+ return p1->bounding_box().top() - p2->bounding_box().top();
+}
+
+// Sort function to sort a BBC by bounding_box().right() in right-to-left order.
+template<class BBC>
+int SortRightToLeft(const void* void1, const void* void2) {
+ // The void*s are actually doubly indirected, so get rid of one level.
+ const BBC* p1 = *static_cast<const BBC* const*>(void1);
+ const BBC* p2 = *static_cast<const BBC* const*>(void2);
+ int result = p2->bounding_box().right() - p1->bounding_box().right();
+ if (result != 0)
+ return result;
+ result = p2->bounding_box().left() - p1->bounding_box().left();
+ if (result != 0)
+ return result;
+ result = p1->bounding_box().bottom() - p2->bounding_box().bottom();
+ if (result != 0)
+ return result;
+ return p1->bounding_box().top() - p2->bounding_box().top();
+}
+
+// Sort function to sort a BBC by bounding_box().bottom().
+template<class BBC>
+int SortByBoxBottom(const void* void1, const void* void2) {
+ // The void*s are actually doubly indirected, so get rid of one level.
+ const BBC* p1 = *static_cast<const BBC* const*>(void1);
+ const BBC* p2 = *static_cast<const BBC* const*>(void2);
+ int result = p1->bounding_box().bottom() - p2->bounding_box().bottom();
+ if (result != 0)
+ return result;
+ result = p1->bounding_box().top() - p2->bounding_box().top();
+ if (result != 0)
+ return result;
+ result = p1->bounding_box().left() - p2->bounding_box().left();
+ if (result != 0)
+ return result;
+ return p1->bounding_box().right() - p2->bounding_box().right();
+}
+
+///////////////////////////////////////////////////////////////////////
+// BBGrid IMPLEMENTATION.
+///////////////////////////////////////////////////////////////////////
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+BBGrid<BBC, BBC_CLIST, BBC_C_IT>::BBGrid() : grid_(nullptr) {
+}
+
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+BBGrid<BBC, BBC_CLIST, BBC_C_IT>::BBGrid(
+ int gridsize, const ICOORD& bleft, const ICOORD& tright)
+ : grid_(nullptr) {
+ Init(gridsize, bleft, tright);
+}
+
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+BBGrid<BBC, BBC_CLIST, BBC_C_IT>::~BBGrid() {
+ delete [] grid_;
+}
+
+// (Re)Initialize the grid. The gridsize is the size in pixels of each cell,
+// and bleft, tright are the bounding box of everything to go in it.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+void BBGrid<BBC, BBC_CLIST, BBC_C_IT>::Init(int gridsize,
+ const ICOORD& bleft,
+ const ICOORD& tright) {
+ GridBase::Init(gridsize, bleft, tright);
+ delete [] grid_;
+ grid_ = new BBC_CLIST[gridbuckets_];
+}
+
+// Clear all lists, but leave the array of lists present.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+void BBGrid<BBC, BBC_CLIST, BBC_C_IT>::Clear() {
+ for (int i = 0; i < gridbuckets_; ++i) {
+ grid_[i].shallow_clear();
+ }
+}
+
+// Deallocate the data in the lists but otherwise leave the lists and the grid
+// intact.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+void BBGrid<BBC, BBC_CLIST, BBC_C_IT>::ClearGridData(
+ void (*free_method)(BBC*)) {
+ if (grid_ == nullptr) return;
+ GridSearch<BBC, BBC_CLIST, BBC_C_IT> search(this);
+ search.StartFullSearch();
+ BBC* bb;
+ BBC_CLIST bb_list;
+ BBC_C_IT it(&bb_list);
+ while ((bb = search.NextFullSearch()) != nullptr) {
+ it.add_after_then_move(bb);
+ }
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ free_method(it.data());
+ }
+}
+
+// Insert a bbox into the appropriate place in the grid.
+// If h_spread, then all cells covered horizontally by the box are
+// used, otherwise, just the bottom-left. Similarly for v_spread.
+// WARNING: InsertBBox may invalidate an active GridSearch. Call
+// RepositionIterator() on any GridSearches that are active on this grid.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+void BBGrid<BBC, BBC_CLIST, BBC_C_IT>::InsertBBox(bool h_spread, bool v_spread,
+ BBC* bbox) {
+ TBOX box = bbox->bounding_box();
+ int start_x, start_y, end_x, end_y;
+ GridCoords(box.left(), box.bottom(), &start_x, &start_y);
+ GridCoords(box.right(), box.top(), &end_x, &end_y);
+ if (!h_spread)
+ end_x = start_x;
+ if (!v_spread)
+ end_y = start_y;
+ int grid_index = start_y * gridwidth_;
+ for (int y = start_y; y <= end_y; ++y, grid_index += gridwidth_) {
+ for (int x = start_x; x <= end_x; ++x) {
+ grid_[grid_index + x].add_sorted(SortByBoxLeft<BBC>, true, bbox);
+ }
+ }
+}
+
+// Using a pix from TraceOutlineOnReducedPix or TraceBlockOnReducedPix, in
+// which each pixel corresponds to a grid cell, insert a bbox into every
+// place in the grid where the corresponding pixel is 1. The Pix is handled
+// upside-down to match the Tesseract coordinate system. (As created by
+// TraceOutlineOnReducedPix or TraceBlockOnReducedPix.)
+// (0, 0) in the pix corresponds to (left, bottom) in the
+// grid (in grid coords), and the pix works up the grid from there.
+// WARNING: InsertPixPtBBox may invalidate an active GridSearch. Call
+// RepositionIterator() on any GridSearches that are active on this grid.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+void BBGrid<BBC, BBC_CLIST, BBC_C_IT>::InsertPixPtBBox(int left, int bottom,
+ Pix* pix, BBC* bbox) {
+ int width = pixGetWidth(pix);
+ int height = pixGetHeight(pix);
+ for (int y = 0; y < height; ++y) {
+ l_uint32* data = pixGetData(pix) + y * pixGetWpl(pix);
+ for (int x = 0; x < width; ++x) {
+ if (GET_DATA_BIT(data, x)) {
+ grid_[(bottom + y) * gridwidth_ + x + left].
+ add_sorted(SortByBoxLeft<BBC>, true, bbox);
+ }
+ }
+ }
+}
+
+// Remove the bbox from the grid.
+// WARNING: Any GridSearch operating on this grid could be invalidated!
+// If a GridSearch is operating, call GridSearch::RemoveBBox() instead.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+void BBGrid<BBC, BBC_CLIST, BBC_C_IT>::RemoveBBox(BBC* bbox) {
+ TBOX box = bbox->bounding_box();
+ int start_x, start_y, end_x, end_y;
+ GridCoords(box.left(), box.bottom(), &start_x, &start_y);
+ GridCoords(box.right(), box.top(), &end_x, &end_y);
+ int grid_index = start_y * gridwidth_;
+ for (int y = start_y; y <= end_y; ++y, grid_index += gridwidth_) {
+ for (int x = start_x; x <= end_x; ++x) {
+ BBC_C_IT it(&grid_[grid_index + x]);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ if (it.data() == bbox)
+ it.extract();
+ }
+ }
+ }
+}
+
+// Returns true if the given rectangle has no overlapping elements.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+bool BBGrid<BBC, BBC_CLIST, BBC_C_IT>::RectangleEmpty(const TBOX& rect) {
+ GridSearch<BBC, BBC_CLIST, BBC_C_IT> rsearch(this);
+ rsearch.StartRectSearch(rect);
+ return rsearch.NextRectSearch() == nullptr;
+}
+
+// Returns an IntGrid showing the number of elements in each cell.
+// Returned IntGrid must be deleted after use.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+IntGrid* BBGrid<BBC, BBC_CLIST, BBC_C_IT>::CountCellElements() {
+ auto* intgrid = new IntGrid(gridsize(), bleft(), tright());
+ for (int y = 0; y < gridheight(); ++y) {
+ for (int x = 0; x < gridwidth(); ++x) {
+ int cell_count = grid_[y * gridwidth() + x].length();
+ intgrid->SetGridCell(x, y, cell_count);
+ }
+ }
+ return intgrid;
+}
+
+
+template<class G> class TabEventHandler : public SVEventHandler {
+ public:
+ explicit TabEventHandler(G* grid) : grid_(grid) {
+ }
+ void Notify(const SVEvent* sv_event) override {
+ if (sv_event->type == SVET_CLICK) {
+ grid_->HandleClick(sv_event->x, sv_event->y);
+ }
+ }
+ private:
+ G* grid_;
+};
+
+#ifndef GRAPHICS_DISABLED
+
+// Make a window of an appropriate size to display things in the grid.
+// Position the window at the given x,y.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+ScrollView* BBGrid<BBC, BBC_CLIST, BBC_C_IT>::MakeWindow(
+ int x, int y, const char* window_name) {
+ auto tab_win = new ScrollView(window_name, x, y,
+ tright_.x() - bleft_.x(),
+ tright_.y() - bleft_.y(),
+ tright_.x() - bleft_.x(),
+ tright_.y() - bleft_.y(),
+ true);
+ auto* handler =
+ new TabEventHandler<BBGrid<BBC, BBC_CLIST, BBC_C_IT> >(this);
+ tab_win->AddEventHandler(handler);
+ tab_win->Pen(ScrollView::GREY);
+ tab_win->Rectangle(0, 0, tright_.x() - bleft_.x(), tright_.y() - bleft_.y());
+ return tab_win;
+}
+
+// Create a window at (x,y) and display the bounding boxes of the
+// BLOBNBOXes in this grid.
+// Use of this function requires an additional member of the BBC class:
+// ScrollView::Color BBC::BoxColor() const.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+void BBGrid<BBC, BBC_CLIST, BBC_C_IT>::DisplayBoxes(ScrollView* tab_win) {
+ tab_win->Pen(ScrollView::BLUE);
+ tab_win->Brush(ScrollView::NONE);
+
+ // For every bbox in the grid, display it.
+ GridSearch<BBC, BBC_CLIST, BBC_C_IT> gsearch(this);
+ gsearch.StartFullSearch();
+ BBC* bbox;
+ while ((bbox = gsearch.NextFullSearch()) != nullptr) {
+ const TBOX& box = bbox->bounding_box();
+ int left_x = box.left();
+ int right_x = box.right();
+ int top_y = box.top();
+ int bottom_y = box.bottom();
+ ScrollView::Color box_color = bbox->BoxColor();
+ tab_win->Pen(box_color);
+ tab_win->Rectangle(left_x, bottom_y, right_x, top_y);
+ }
+ tab_win->Update();
+}
+
+#endif // !GRAPHICS_DISABLED
+
+// ASSERT_HOST that every cell contains no more than one copy of each entry.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+void BBGrid<BBC, BBC_CLIST, BBC_C_IT>::AssertNoDuplicates() {
+ // Process all grid cells.
+ for (int i = gridwidth_ * gridheight_ - 1; i >= 0; --i) {
+ // Iterate over all elements excent the last.
+ for (BBC_C_IT it(&grid_[i]); !it.at_last(); it.forward()) {
+ BBC* ptr = it.data();
+ BBC_C_IT it2(it);
+ // None of the rest of the elements in the list should equal ptr.
+ for (it2.forward(); !it2.at_first(); it2.forward()) {
+ ASSERT_HOST(it2.data() != ptr);
+ }
+ }
+ }
+}
+
+// Handle a click event in a display window.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+void BBGrid<BBC, BBC_CLIST, BBC_C_IT>::HandleClick(int x, int y) {
+ tprintf("Click at (%d, %d)\n", x, y);
+}
+
+///////////////////////////////////////////////////////////////////////
+// GridSearch IMPLEMENTATION.
+///////////////////////////////////////////////////////////////////////
+
+// Start a new full search. Will iterate all stored blobs.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+void GridSearch<BBC, BBC_CLIST, BBC_C_IT>::StartFullSearch() {
+ // Full search uses x_ and y_ as the current grid
+ // cell being searched.
+ CommonStart(grid_->bleft_.x(), grid_->tright_.y());
+}
+
+// Return the next bbox in the search or nullptr if done.
+// The other searches will return a box that overlaps the grid cell
+// thereby duplicating boxes, but NextFullSearch only returns each box once.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+BBC* GridSearch<BBC, BBC_CLIST, BBC_C_IT>::NextFullSearch() {
+ int x;
+ int y;
+ do {
+ while (it_.cycled_list()) {
+ ++x_;
+ if (x_ >= grid_->gridwidth_) {
+ --y_;
+ if (y_ < 0)
+ return CommonEnd();
+ x_ = 0;
+ }
+ SetIterator();
+ }
+ CommonNext();
+ TBOX box = previous_return_->bounding_box();
+ grid_->GridCoords(box.left(), box.bottom(), &x, &y);
+ } while (x != x_ || y != y_);
+ return previous_return_;
+}
+
+// Start a new radius search.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+void GridSearch<BBC, BBC_CLIST, BBC_C_IT>::StartRadSearch(int x, int y,
+ int max_radius) {
+ // Rad search uses x_origin_ and y_origin_ as the center of the circle.
+ // The radius_ is the radius of the (diamond-shaped) circle and
+ // rad_index/rad_dir_ combine to determine the position around it.
+ max_radius_ = max_radius;
+ radius_ = 0;
+ rad_index_ = 0;
+ rad_dir_ = 3;
+ CommonStart(x, y);
+}
+
+// Return the next bbox in the radius search or nullptr if the
+// maximum radius has been reached.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+BBC* GridSearch<BBC, BBC_CLIST, BBC_C_IT>::NextRadSearch() {
+ do {
+ while (it_.cycled_list()) {
+ ++rad_index_;
+ if (rad_index_ >= radius_) {
+ ++rad_dir_;
+ rad_index_ = 0;
+ if (rad_dir_ >= 4) {
+ ++radius_;
+ if (radius_ > max_radius_)
+ return CommonEnd();
+ rad_dir_ = 0;
+ }
+ }
+ ICOORD offset = C_OUTLINE::chain_step(rad_dir_);
+ offset *= radius_ - rad_index_;
+ offset += C_OUTLINE::chain_step(rad_dir_ + 1) * rad_index_;
+ x_ = x_origin_ + offset.x();
+ y_ = y_origin_ + offset.y();
+ if (x_ >= 0 && x_ < grid_->gridwidth_ &&
+ y_ >= 0 && y_ < grid_->gridheight_)
+ SetIterator();
+ }
+ CommonNext();
+ } while (unique_mode_ && returns_.find(previous_return_) != returns_.end());
+ if (unique_mode_)
+ returns_.insert(previous_return_);
+ return previous_return_;
+}
+
+// Start a new left or right-looking search. Will search to the side
+// for a box that vertically overlaps the given vertical line segment.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+void GridSearch<BBC, BBC_CLIST, BBC_C_IT>::StartSideSearch(int x,
+ int ymin, int ymax) {
+ // Right search records the x in x_origin_, the ymax in y_origin_
+ // and the size of the vertical strip to search in radius_.
+ // To guarantee finding overlapping objects of up to twice the
+ // given size, double the height.
+ radius_ = ((ymax - ymin) * 2 + grid_->gridsize_ - 1) / grid_->gridsize_;
+ rad_index_ = 0;
+ CommonStart(x, ymax);
+}
+
+// Return the next bbox in the side search or nullptr if the
+// edge has been reached. Searches left to right or right to left
+// according to the flag.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+BBC* GridSearch<BBC, BBC_CLIST, BBC_C_IT>::NextSideSearch(bool right_to_left) {
+ do {
+ while (it_.cycled_list()) {
+ ++rad_index_;
+ if (rad_index_ > radius_) {
+ if (right_to_left)
+ --x_;
+ else
+ ++x_;
+ rad_index_ = 0;
+ if (x_ < 0 || x_ >= grid_->gridwidth_)
+ return CommonEnd();
+ }
+ y_ = y_origin_ - rad_index_;
+ if (y_ >= 0 && y_ < grid_->gridheight_)
+ SetIterator();
+ }
+ CommonNext();
+ } while (unique_mode_ && returns_.find(previous_return_) != returns_.end());
+ if (unique_mode_)
+ returns_.insert(previous_return_);
+ return previous_return_;
+}
+
+// Start a vertical-looking search. Will search up or down
+// for a box that horizontally overlaps the given line segment.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+void GridSearch<BBC, BBC_CLIST, BBC_C_IT>::StartVerticalSearch(int xmin,
+ int xmax,
+ int y) {
+ // Right search records the xmin in x_origin_, the y in y_origin_
+ // and the size of the horizontal strip to search in radius_.
+ radius_ = (xmax - xmin + grid_->gridsize_ - 1) / grid_->gridsize_;
+ rad_index_ = 0;
+ CommonStart(xmin, y);
+}
+
+// Return the next bbox in the vertical search or nullptr if the
+// edge has been reached. Searches top to bottom or bottom to top
+// according to the flag.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+BBC* GridSearch<BBC, BBC_CLIST, BBC_C_IT>::NextVerticalSearch(
+ bool top_to_bottom) {
+ do {
+ while (it_.cycled_list()) {
+ ++rad_index_;
+ if (rad_index_ > radius_) {
+ if (top_to_bottom)
+ --y_;
+ else
+ ++y_;
+ rad_index_ = 0;
+ if (y_ < 0 || y_ >= grid_->gridheight_)
+ return CommonEnd();
+ }
+ x_ = x_origin_ + rad_index_;
+ if (x_ >= 0 && x_ < grid_->gridwidth_)
+ SetIterator();
+ }
+ CommonNext();
+ } while (unique_mode_ && returns_.find(previous_return_) != returns_.end());
+ if (unique_mode_)
+ returns_.insert(previous_return_);
+ return previous_return_;
+}
+
+// Start a rectangular search. Will search for a box that overlaps the
+// given rectangle.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+void GridSearch<BBC, BBC_CLIST, BBC_C_IT>::StartRectSearch(const TBOX& rect) {
+ // Rect search records the xmin in x_origin_, the ymin in y_origin_
+ // and the xmax in max_radius_.
+ // The search proceeds left to right, top to bottom.
+ rect_ = rect;
+ CommonStart(rect.left(), rect.top());
+ grid_->GridCoords(rect.right(), rect.bottom(), // - rect.height(),
+ &max_radius_, &y_origin_);
+}
+
+// Return the next bbox in the rectangular search or nullptr if complete.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+BBC* GridSearch<BBC, BBC_CLIST, BBC_C_IT>::NextRectSearch() {
+ do {
+ while (it_.cycled_list()) {
+ ++x_;
+ if (x_ > max_radius_) {
+ --y_;
+ x_ = x_origin_;
+ if (y_ < y_origin_)
+ return CommonEnd();
+ }
+ SetIterator();
+ }
+ CommonNext();
+ } while (!rect_.overlap(previous_return_->bounding_box()) ||
+ (unique_mode_ && returns_.find(previous_return_) != returns_.end()));
+ if (unique_mode_)
+ returns_.insert(previous_return_);
+ return previous_return_;
+}
+
+// Remove the last returned BBC. Will not invalidate this. May invalidate
+// any other concurrent GridSearch on the same grid. If any others are
+// in use, call RepositionIterator on those, to continue without harm.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+void GridSearch<BBC, BBC_CLIST, BBC_C_IT>::RemoveBBox() {
+ if (previous_return_ != nullptr) {
+ // Remove all instances of previous_return_ from the list, so the iterator
+ // remains valid after removal from the rest of the grid cells.
+ // if previous_return_ is not on the list, then it has been removed already.
+ BBC* prev_data = nullptr;
+ BBC* new_previous_return = nullptr;
+ it_.move_to_first();
+ for (it_.mark_cycle_pt(); !it_.cycled_list();) {
+ if (it_.data() == previous_return_) {
+ new_previous_return = prev_data;
+ it_.extract();
+ it_.forward();
+ next_return_ = it_.cycled_list() ? nullptr : it_.data();
+ } else {
+ prev_data = it_.data();
+ it_.forward();
+ }
+ }
+ grid_->RemoveBBox(previous_return_);
+ previous_return_ = new_previous_return;
+ RepositionIterator();
+ }
+}
+
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+void GridSearch<BBC, BBC_CLIST, BBC_C_IT>::RepositionIterator() {
+ // Something was deleted, so we have little choice but to clear the
+ // returns list.
+ returns_.clear();
+ // Reset the iterator back to one past the previous return.
+ // If the previous_return_ is no longer in the list, then
+ // next_return_ serves as a backup.
+ it_.move_to_first();
+ // Special case, the first element was removed and reposition
+ // iterator was called. In this case, the data is fine, but the
+ // cycle point is not. Detect it and return.
+ if (!it_.empty() && it_.data() == next_return_) {
+ it_.mark_cycle_pt();
+ return;
+ }
+ for (it_.mark_cycle_pt(); !it_.cycled_list(); it_.forward()) {
+ if (it_.data() == previous_return_ ||
+ it_.data_relative(1) == next_return_) {
+ CommonNext();
+ return;
+ }
+ }
+ // We ran off the end of the list. Move to a new cell next time.
+ previous_return_ = nullptr;
+ next_return_ = nullptr;
+}
+
+// Factored out helper to start a search.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+void GridSearch<BBC, BBC_CLIST, BBC_C_IT>::CommonStart(int x, int y) {
+ grid_->GridCoords(x, y, &x_origin_, &y_origin_);
+ x_ = x_origin_;
+ y_ = y_origin_;
+ SetIterator();
+ previous_return_ = nullptr;
+ next_return_ = it_.empty() ? nullptr : it_.data();
+ returns_.clear();
+}
+
+// Factored out helper to complete a next search.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+BBC* GridSearch<BBC, BBC_CLIST, BBC_C_IT>::CommonNext() {
+ previous_return_ = it_.data();
+ it_.forward();
+ next_return_ = it_.cycled_list() ? nullptr : it_.data();
+ return previous_return_;
+}
+
+// Factored out final return when search is exhausted.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+BBC* GridSearch<BBC, BBC_CLIST, BBC_C_IT>::CommonEnd() {
+ previous_return_ = nullptr;
+ next_return_ = nullptr;
+ return nullptr;
+}
+
+// Factored out function to set the iterator to the current x_, y_
+// grid coords and mark the cycle pt.
+template<class BBC, class BBC_CLIST, class BBC_C_IT>
+void GridSearch<BBC, BBC_CLIST, BBC_C_IT>::SetIterator() {
+ it_= &(grid_->grid_[y_ * grid_->gridwidth_ + x_]);
+ it_.mark_cycle_pt();
+}
+
+} // namespace tesseract.
+
+#endif // TESSERACT_TEXTORD_BBGRID_H_
diff --git a/tesseract/src/textord/blkocc.cpp b/tesseract/src/textord/blkocc.cpp
new file mode 100644
index 00000000..f63b8ef9
--- /dev/null
+++ b/tesseract/src/textord/blkocc.cpp
@@ -0,0 +1,165 @@
+/*****************************************************************************
+ *
+ * File: blkocc.cpp (Formerly blockocc.c)
+ * Description: Block Occupancy routines
+ * Author: Chris Newton
+ *
+ * (c) Copyright 1991, Hewlett-Packard Company.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ ******************************************************************************/
+
+#include "blkocc.h"
+
+#include "drawtord.h"
+#include "errcode.h"
+
+#include <cctype>
+#include <cmath>
+
+#include "helpers.h"
+
+namespace tesseract {
+
+double_VAR(textord_underline_threshold, 0.5, "Fraction of width occupied");
+
+// Forward declarations of static functions
+static void horizontal_cblob_projection(C_BLOB *blob, // blob to project
+ STATS *stats); // output
+static void horizontal_coutline_projection(C_OUTLINE *outline,
+ STATS *stats); // output
+
+/**
+ * test_underline
+ *
+ * Check to see if the blob is an underline.
+ * Return true if it is.
+ */
+
+bool test_underline( //look for underlines
+ bool testing_on, ///< drawing blob
+ C_BLOB* blob, ///< blob to test
+ int16_t baseline, ///< coords of baseline
+ int16_t xheight ///< height of line
+) {
+ int16_t occ;
+ int16_t blob_width; //width of blob
+ TBOX blob_box; //bounding box
+ int32_t desc_occ;
+ int32_t x_occ;
+ int32_t asc_occ;
+ STATS projection;
+
+ blob_box = blob->bounding_box ();
+ blob_width = blob->bounding_box ().width ();
+ projection.set_range (blob_box.bottom (), blob_box.top () + 1);
+ if (testing_on) {
+ // blob->plot(to_win,GOLDENROD,GOLDENROD);
+ // line_color_index(to_win,GOLDENROD);
+ // move2d(to_win,blob_box.left(),baseline);
+ // draw2d(to_win,blob_box.right(),baseline);
+ // move2d(to_win,blob_box.left(),baseline+xheight);
+ // draw2d(to_win,blob_box.right(),baseline+xheight);
+ tprintf
+ ("Testing underline on blob at (%d,%d)->(%d,%d), base=%d\nOccs:",
+ blob->bounding_box ().left (), blob->bounding_box ().bottom (),
+ blob->bounding_box ().right (), blob->bounding_box ().top (),
+ baseline);
+ }
+ horizontal_cblob_projection(blob, &projection);
+ desc_occ = 0;
+ for (occ = blob_box.bottom (); occ < baseline; occ++)
+ if (occ <= blob_box.top () && projection.pile_count (occ) > desc_occ)
+ //max in region
+ desc_occ = projection.pile_count (occ);
+ x_occ = 0;
+ for (occ = baseline; occ <= baseline + xheight; occ++)
+ if (occ >= blob_box.bottom () && occ <= blob_box.top ()
+ && projection.pile_count (occ) > x_occ)
+ //max in region
+ x_occ = projection.pile_count (occ);
+ asc_occ = 0;
+ for (occ = baseline + xheight + 1; occ <= blob_box.top (); occ++)
+ if (occ >= blob_box.bottom () && projection.pile_count (occ) > asc_occ)
+ asc_occ = projection.pile_count (occ);
+ if (testing_on) {
+ tprintf ("%d %d %d\n", desc_occ, x_occ, asc_occ);
+ }
+ if (desc_occ == 0 && x_occ == 0 && asc_occ == 0) {
+ tprintf ("Bottom=%d, top=%d, base=%d, x=%d\n",
+ blob_box.bottom (), blob_box.top (), baseline, xheight);
+ projection.print();
+ }
+ if (desc_occ > x_occ + x_occ
+ && desc_occ > blob_width * textord_underline_threshold)
+ return true; //real underline
+ return asc_occ > x_occ + x_occ &&
+ asc_occ > blob_width * textord_underline_threshold; //overline
+ //neither
+}
+
+
+/**
+ * horizontal_cblob_projection
+ *
+ * Compute the horizontal projection of a cblob from its outlines
+ * and add to the given STATS.
+ */
+
+static void horizontal_cblob_projection( //project outlines
+ C_BLOB *blob, ///< blob to project
+ STATS *stats ///< output
+ ) {
+ //outlines of blob
+ C_OUTLINE_IT out_it = blob->out_list ();
+
+ for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
+ horizontal_coutline_projection (out_it.data (), stats);
+ }
+}
+
+
+/**
+ * horizontal_coutline_projection
+ *
+ * Compute the horizontal projection of a outline from its outlines
+ * and add to the given STATS.
+ */
+
+static void horizontal_coutline_projection( //project outlines
+ C_OUTLINE *outline, ///< outline to project
+ STATS *stats ///< output
+ ) {
+ ICOORD pos; //current point
+ ICOORD step; //edge step
+ int32_t length; //of outline
+ int16_t stepindex; //current step
+ C_OUTLINE_IT out_it = outline->child ();
+
+ pos = outline->start_pos ();
+ length = outline->pathlength ();
+ for (stepindex = 0; stepindex < length; stepindex++) {
+ step = outline->step (stepindex);
+ if (step.y () > 0) {
+ stats->add (pos.y (), pos.x ());
+ }
+ else if (step.y () < 0) {
+ stats->add (pos.y () - 1, -pos.x ());
+ }
+ pos += step;
+ }
+
+ for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
+ horizontal_coutline_projection (out_it.data (), stats);
+ }
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/textord/blkocc.h b/tesseract/src/textord/blkocc.h
new file mode 100644
index 00000000..4dee5cd8
--- /dev/null
+++ b/tesseract/src/textord/blkocc.h
@@ -0,0 +1,253 @@
+/******************************************************************************
+ *
+ * File: blkocc.h (Formerly blockocc.h)
+ * Description: Block Occupancy routines
+ * Author: Chris Newton
+ *
+ * (c) Copyright 1991, Hewlett-Packard Company.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ ******************************************************************************/
+
+#ifndef BLKOCC_H
+#define BLKOCC_H
+
+#include "params.h"
+#include "elst.h"
+
+namespace tesseract {
+
+class C_BLOB;
+
+/***************************************************************************
+CLASS REGION_OCC
+
+ The class REGION_OCC defines a section of outline which exists entirely
+ within a single region. The only data held is the min and max x limits of
+ the outline within the region.
+
+ REGION_OCCs are held on lists, one list for each region. The lists are
+ built in sorted order of min x. Overlapping REGION_OCCs are not permitted on
+ a single list. An overlapping region to be added causes the existing region
+ to be extended. This extension may result in the following REGION_OCC on the
+ list overlapping the amended one. In this case the amended REGION_OCC is
+ further extended to include the range of the following one, so that the
+ following one can be deleted.
+
+****************************************************************************/
+
+class REGION_OCC : public ELIST_LINK
+{
+ public:
+ float min_x; //Lowest x in region
+ float max_x; //Highest x in region
+ int16_t region_type; //Type of crossing
+
+ REGION_OCC() = default; // constructor used
+ // only in COPIER etc
+ REGION_OCC( //constructor
+ float min,
+ float max,
+ int16_t region) {
+ min_x = min;
+ max_x = max;
+ region_type = region;
+ }
+};
+
+ELISTIZEH (REGION_OCC)
+#define RANGE_IN_BAND(band_max, band_min, range_max, range_min) \
+(((range_min) >= (band_min)) && ((range_max) < (band_max)))
+/************************************************************************
+Adapted from the following procedure so that it can be used in the bands
+class in an include file...
+
+bool range_in_band[
+ range within band?
+int16_t band_max,
+int16_t band_min,
+int16_t range_max,
+int16_t range_min]
+{
+ if ((range_min >= band_min) && (range_max < band_max))
+ return true;
+ else
+ return false;
+}
+***********************************************************************/
+#define RANGE_OVERLAPS_BAND(band_max, band_min, range_max, range_min) \
+(((range_max) >= (band_min)) && ((range_min) < (band_max)))
+/************************************************************************
+Adapted from the following procedure so that it can be used in the bands
+class in an include file...
+
+bool range_overlaps_band[
+ range crosses band?
+int16_t band_max,
+int16_t band_min,
+int16_t range_max,
+int16_t range_min]
+{
+ if ((range_max >= band_min) && (range_min < band_max))
+ return true;
+ else
+ return false;
+}
+***********************************************************************/
+/**********************************************************************
+ Bands
+ -----
+
+ BAND 4
+--------------------------------
+ BAND 3
+--------------------------------
+
+ BAND 2
+
+--------------------------------
+
+ BAND 1
+
+Band 0 is the dot band
+
+Each band has an error margin above and below. An outline is not considered to
+have significantly changed bands until it has moved out of the error margin.
+*************************************************************************/
+class BAND
+{
+ public:
+ int16_t max_max; //upper max
+ int16_t max; //nominal max
+ int16_t min_max; //lower max
+ int16_t max_min; //upper min
+ int16_t min; //nominal min
+ int16_t min_min; //lower min
+
+ BAND() = default; // constructor
+
+ void set( // initialise a band
+ int16_t new_max_max, // upper max
+ int16_t new_max, // new nominal max
+ int16_t new_min_max, // new lower max
+ int16_t new_max_min, // new upper min
+ int16_t new_min, // new nominal min
+ int16_t new_min_min) { // new lower min
+ max_max = new_max_max;
+ max = new_max;
+ min_max = new_min_max;
+ max_min = new_max_min;
+ min = new_min;
+ min_min = new_min_min;
+ }
+
+ bool in_minimal( //in minimal limits?
+ float y) { //y value
+ return (y >= max_min) && (y < min_max);
+ }
+
+ bool in_nominal( //in nominal limits?
+ float y) { //y value
+ return (y >= min) && (y < max);
+ }
+
+ bool in_maximal( //in maximal limits?
+ float y) { //y value
+ return (y >= min_min) && (y < max_max);
+ }
+
+ //overlaps min limits?
+ bool range_overlaps_minimal(float y1, //one range limit
+ float y2) { //other range limit
+ if (y1 > y2)
+ return RANGE_OVERLAPS_BAND (min_max, max_min, y1, y2);
+ else
+ return RANGE_OVERLAPS_BAND (min_max, max_min, y2, y1);
+ }
+
+ //overlaps nom limits?
+ bool range_overlaps_nominal(float y1, //one range limit
+ float y2) { //other range limit
+ if (y1 > y2)
+ return RANGE_OVERLAPS_BAND (max, min, y1, y2);
+ else
+ return RANGE_OVERLAPS_BAND (max, min, y2, y1);
+ }
+
+ //overlaps max limits?
+ bool range_overlaps_maximal(float y1, //one range limit
+ float y2) { //other range limit
+ if (y1 > y2)
+ return RANGE_OVERLAPS_BAND (max_max, min_min, y1, y2);
+ else
+ return RANGE_OVERLAPS_BAND (max_max, min_min, y2, y1);
+ }
+
+ bool range_in_minimal( //within min limits?
+ float y1, //one range limit
+ float y2) { //other range limit
+ if (y1 > y2)
+ return RANGE_IN_BAND (min_max, max_min, y1, y2);
+ else
+ return RANGE_IN_BAND (min_max, max_min, y2, y1);
+ }
+
+ bool range_in_nominal( //within nom limits?
+ float y1, //one range limit
+ float y2) { //other range limit
+ if (y1 > y2)
+ return RANGE_IN_BAND (max, min, y1, y2);
+ else
+ return RANGE_IN_BAND (max, min, y2, y1);
+ }
+
+ bool range_in_maximal( //within max limits?
+ float y1, //one range limit
+ float y2) { //other range limit
+ if (y1 > y2)
+ return RANGE_IN_BAND (max_max, min_min, y1, y2);
+ else
+ return RANGE_IN_BAND (max_max, min_min, y2, y1);
+ }
+};
+
+/* Standard positions */
+
+#define MAX_NUM_BANDS 5
+#define UNDEFINED_BAND 99
+#define NO_LOWER_LIMIT -9999
+#define NO_UPPER_LIMIT 9999
+
+#define DOT_BAND 0
+
+/* Special occupancy code emitted for the 0 region at the end of a word */
+
+#define END_OF_WERD_CODE 255
+
+extern BOOL_VAR_H (blockocc_show_result, false, "Show intermediate results");
+extern INT_VAR_H (blockocc_desc_height, 0,
+"Descender height after normalisation");
+extern INT_VAR_H (blockocc_asc_height, 255,
+"Ascender height after normalisation");
+extern INT_VAR_H (blockocc_band_count, 4, "Number of bands used");
+extern double_VAR_H (textord_underline_threshold, 0.9,
+"Fraction of width occupied");
+
+bool test_underline( //look for underlines
+ bool testing_on, //drawing blob
+ C_BLOB* blob, //blob to test
+ int16_t baseline, //coords of baseline
+ int16_t xheight //height of line
+);
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/textord/blobgrid.cpp b/tesseract/src/textord/blobgrid.cpp
new file mode 100644
index 00000000..53a1d7b3
--- /dev/null
+++ b/tesseract/src/textord/blobgrid.cpp
@@ -0,0 +1,45 @@
+///////////////////////////////////////////////////////////////////////
+// File: blobgrid.cpp
+// Description: BBGrid of BLOBNBOX with useful BLOBNBOX-specific methods.
+// Copyright 2011 Google Inc. All Rights Reserved.
+// Author: rays@google.com (Ray Smith)
+// Created: Sat Jun 11 10:30:01 PST 2011
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "blobgrid.h"
+
+namespace tesseract {
+
+BlobGrid::BlobGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright)
+ : BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>(gridsize, bleft, tright) {
+}
+
+// Destructor.
+// It is defined here, so the compiler can create a single vtable
+// instead of weak vtables in every compilation unit.
+BlobGrid::~BlobGrid() = default;
+
+// Inserts all the blobs from the given list, with x and y spreading,
+// without removing from the source list, so ownership remains with the
+// source list.
+void BlobGrid::InsertBlobList(BLOBNBOX_LIST* blobs) {
+ BLOBNBOX_IT blob_it(blobs);
+ for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+ BLOBNBOX* blob = blob_it.data();
+ if (!blob->joined_to_prev())
+ InsertBBox(true, true, blob);
+ }
+}
+
+} // namespace tesseract.
diff --git a/tesseract/src/textord/blobgrid.h b/tesseract/src/textord/blobgrid.h
new file mode 100644
index 00000000..54b19aeb
--- /dev/null
+++ b/tesseract/src/textord/blobgrid.h
@@ -0,0 +1,46 @@
+///////////////////////////////////////////////////////////////////////
+// File: blobgrid.h
+// Description: BBGrid of BLOBNBOX with useful BLOBNBOX-specific methods.
+// Copyright 2011 Google Inc. All Rights Reserved.
+// Author: rays@google.com (Ray Smith)
+// Created: Sat Jun 11 10:26:01 PST 2011
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+
+#ifndef TESSERACT_TEXTORD_BLOBGRID_H_
+#define TESSERACT_TEXTORD_BLOBGRID_H_
+
+#include "bbgrid.h"
+#include "blobbox.h"
+
+namespace tesseract {
+
+CLISTIZEH(BLOBNBOX)
+
+using BlobGridSearch = GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>;
+
+class TESS_API BlobGrid : public BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> {
+ public:
+ BlobGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright);
+ ~BlobGrid() override;
+
+ // Inserts all the blobs from the given list, with x and y spreading,
+ // without removing from the source list, so ownership remains with the
+ // source list.
+ void InsertBlobList(BLOBNBOX_LIST* blobs);
+};
+
+} // namespace tesseract.
+
+#endif // TESSERACT_TEXTORD_BLOBGRID_H_
diff --git a/tesseract/src/textord/ccnontextdetect.cpp b/tesseract/src/textord/ccnontextdetect.cpp
new file mode 100644
index 00000000..cfbbb95a
--- /dev/null
+++ b/tesseract/src/textord/ccnontextdetect.cpp
@@ -0,0 +1,323 @@
+///////////////////////////////////////////////////////////////////////
+// File: ccnontextdetect.cpp
+// Description: Connected-Component-based photo (non-text) detection.
+// Author: rays@google.com (Ray Smith)
+//
+// Copyright 2011 Google Inc. All Rights Reserved.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "ccnontextdetect.h"
+#include "imagefind.h"
+#include "strokewidth.h"
+
+namespace tesseract {
+
+// Max number of neighbour small objects per squared gridsize before a grid
+// cell becomes image.
+const double kMaxSmallNeighboursPerPix = 1.0 / 32;
+// Max number of small blobs a large blob may overlap before it is rejected
+// and determined to be image.
+const int kMaxLargeOverlapsWithSmall = 3;
+// Max number of small blobs a medium blob may overlap before it is rejected
+// and determined to be image. Larger than for large blobs as medium blobs
+// may be complex Chinese characters. Very large Chinese characters are going
+// to overlap more medium blobs than small.
+const int kMaxMediumOverlapsWithSmall = 12;
+// Max number of normal blobs a large blob may overlap before it is rejected
+// and determined to be image. This is set higher to allow for drop caps, which
+// may overlap a lot of good text blobs.
+const int kMaxLargeOverlapsWithMedium = 12;
+// Multiplier of original noise_count used to test for the case of spreading
+// noise beyond where it should really be.
+const int kOriginalNoiseMultiple = 8;
+// Pixel padding for noise blobs when rendering on the image
+// mask to encourage them to join together. Make it too big and images
+// will fatten out too much and have to be clipped to text.
+const int kNoisePadding = 4;
+// Fraction of max_noise_count_ to be added to the noise count if there is
+// photo mask in the background.
+const double kPhotoOffsetFraction = 0.375;
+// Min ratio of perimeter^2/16area for a "good" blob in estimating noise
+// density. Good blobs are supposed to be highly likely real text.
+// We consider a square to have unit ratio, where A=(p/4)^2, hence the factor
+// of 16. Digital circles are weird and have a minimum ratio of pi/64, not
+// the 1/(4pi) that you would expect.
+const double kMinGoodTextPARatio = 1.5;
+
+CCNonTextDetect::CCNonTextDetect(int gridsize,
+ const ICOORD& bleft, const ICOORD& tright)
+ : BlobGrid(gridsize, bleft, tright),
+ max_noise_count_(static_cast<int>(kMaxSmallNeighboursPerPix *
+ gridsize * gridsize)),
+ noise_density_(nullptr) {
+ // TODO(rays) break max_noise_count_ out into an area-proportional
+ // value, as now plus an additive constant for the number of text blobs
+ // in the 3x3 neighbourhood - maybe 9.
+}
+
+CCNonTextDetect::~CCNonTextDetect() {
+ delete noise_density_;
+}
+
+// Creates and returns a Pix with the same resolution as the original
+// in which 1 (black) pixels represent likely non text (photo, line drawing)
+// areas of the page, deleting from the blob_block the blobs that were
+// determined to be non-text.
+// The photo_map is used to bias the decision towards non-text, rather than
+// supplying definite decision.
+// The blob_block is the usual result of connected component analysis,
+// holding the detected blobs.
+// The returned Pix should be PixDestroyed after use.
+Pix* CCNonTextDetect::ComputeNonTextMask(bool debug, Pix* photo_map,
+ TO_BLOCK* blob_block) {
+ // Insert the smallest blobs into the grid.
+ InsertBlobList(&blob_block->small_blobs);
+ InsertBlobList(&blob_block->noise_blobs);
+ // Add the medium blobs that don't have a good strokewidth neighbour.
+ // Those that do go into good_grid as an antidote to spreading beyond the
+ // real reaches of a noise region.
+ BlobGrid good_grid(gridsize(), bleft(), tright());
+ BLOBNBOX_IT blob_it(&blob_block->blobs);
+ for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+ BLOBNBOX* blob = blob_it.data();
+ double perimeter_area_ratio = blob->cblob()->perimeter() / 4.0;
+ perimeter_area_ratio *= perimeter_area_ratio / blob->enclosed_area();
+ if (blob->GoodTextBlob() == 0 || perimeter_area_ratio < kMinGoodTextPARatio)
+ InsertBBox(true, true, blob);
+ else
+ good_grid.InsertBBox(true, true, blob);
+ }
+ noise_density_ = ComputeNoiseDensity(debug, photo_map, &good_grid);
+ good_grid.Clear(); // Not needed any more.
+ Pix* pix = noise_density_->ThresholdToPix(max_noise_count_);
+ if (debug) {
+ pixWrite("junknoisemask.png", pix, IFF_PNG);
+ }
+ ScrollView* win = nullptr;
+ #ifndef GRAPHICS_DISABLED
+ if (debug) {
+ win = MakeWindow(0, 400, "Photo Mask Blobs");
+ }
+ #endif // !GRAPHICS_DISABLED
+ // Large and medium blobs are not text if they overlap with "a lot" of small
+ // blobs.
+ MarkAndDeleteNonTextBlobs(&blob_block->large_blobs,
+ kMaxLargeOverlapsWithSmall,
+ win, ScrollView::DARK_GREEN, pix);
+ MarkAndDeleteNonTextBlobs(&blob_block->blobs, kMaxMediumOverlapsWithSmall,
+ win, ScrollView::WHITE, pix);
+ // Clear the grid of small blobs and insert the medium blobs.
+ Clear();
+ InsertBlobList(&blob_block->blobs);
+ MarkAndDeleteNonTextBlobs(&blob_block->large_blobs,
+ kMaxLargeOverlapsWithMedium,
+ win, ScrollView::DARK_GREEN, pix);
+ // Clear again before we start deleting the blobs in the grid.
+ Clear();
+ MarkAndDeleteNonTextBlobs(&blob_block->noise_blobs, -1,
+ win, ScrollView::CORAL, pix);
+ MarkAndDeleteNonTextBlobs(&blob_block->small_blobs, -1,
+ win, ScrollView::GOLDENROD, pix);
+ MarkAndDeleteNonTextBlobs(&blob_block->blobs, -1,
+ win, ScrollView::WHITE, pix);
+ if (debug) {
+ #ifndef GRAPHICS_DISABLED
+ win->Update();
+ #endif // !GRAPHICS_DISABLED
+ pixWrite("junkccphotomask.png", pix, IFF_PNG);
+ #ifndef GRAPHICS_DISABLED
+ delete win->AwaitEvent(SVET_DESTROY);
+ delete win;
+ #endif // !GRAPHICS_DISABLED
+ }
+ return pix;
+}
+
+// Computes and returns the noise_density IntGrid, at the same gridsize as
+// this by summing the number of small elements in a 3x3 neighbourhood of
+// each grid cell. good_grid is filled with blobs that are considered most
+// likely good text, and this is filled with small and medium blobs that are
+// more likely non-text.
+// The photo_map is used to bias the decision towards non-text, rather than
+// supplying definite decision.
+IntGrid* CCNonTextDetect::ComputeNoiseDensity(bool debug, Pix* photo_map,
+ BlobGrid* good_grid) {
+ IntGrid* noise_counts = CountCellElements();
+ IntGrid* noise_density = noise_counts->NeighbourhoodSum();
+ IntGrid* good_counts = good_grid->CountCellElements();
+ // Now increase noise density in photo areas, to bias the decision and
+ // minimize hallucinated text on image, but trim the noise_density where
+ // there are good blobs and the original count is low in non-photo areas,
+ // indicating that most of the result came from neighbouring cells.
+ int height = pixGetHeight(photo_map);
+ int photo_offset = IntCastRounded(max_noise_count_ * kPhotoOffsetFraction);
+ for (int y = 0; y < gridheight(); ++y) {
+ for (int x = 0; x < gridwidth(); ++x) {
+ int noise = noise_density->GridCellValue(x, y);
+ if (max_noise_count_ < noise + photo_offset &&
+ noise <= max_noise_count_) {
+ // Test for photo.
+ int left = x * gridsize();
+ int right = left + gridsize();
+ int bottom = height - y * gridsize();
+ int top = bottom - gridsize();
+ if (ImageFind::BoundsWithinRect(photo_map, &left, &top, &right,
+ &bottom)) {
+ noise_density->SetGridCell(x, y, noise + photo_offset);
+ }
+ }
+ if (debug && noise > max_noise_count_ &&
+ good_counts->GridCellValue(x, y) > 0) {
+ tprintf("At %d, %d, noise = %d, good=%d, orig=%d, thr=%d\n",
+ x * gridsize(), y * gridsize(),
+ noise_density->GridCellValue(x, y),
+ good_counts->GridCellValue(x, y),
+ noise_counts->GridCellValue(x, y), max_noise_count_);
+ }
+ if (noise > max_noise_count_ &&
+ good_counts->GridCellValue(x, y) > 0 &&
+ noise_counts->GridCellValue(x, y) * kOriginalNoiseMultiple <=
+ max_noise_count_) {
+ noise_density->SetGridCell(x, y, 0);
+ }
+ }
+ }
+ delete noise_counts;
+ delete good_counts;
+ return noise_density;
+}
+
+// Helper to expand a box in one of the 4 directions by the given pad,
+// provided it does not expand into any cell with a zero noise density.
+// If that is not possible, try expanding all round by a small constant.
+static TBOX AttemptBoxExpansion(const TBOX& box, const IntGrid& noise_density,
+ int pad) {
+ TBOX expanded_box(box);
+ expanded_box.set_right(box.right() + pad);
+ if (!noise_density.AnyZeroInRect(expanded_box))
+ return expanded_box;
+ expanded_box = box;
+ expanded_box.set_left(box.left() - pad);
+ if (!noise_density.AnyZeroInRect(expanded_box))
+ return expanded_box;
+ expanded_box = box;
+ expanded_box.set_top(box.top() + pad);
+ if (!noise_density.AnyZeroInRect(expanded_box))
+ return expanded_box;
+ expanded_box = box;
+ expanded_box.set_bottom(box.bottom() + pad);
+ if (!noise_density.AnyZeroInRect(expanded_box))
+ return expanded_box;
+ expanded_box = box;
+ expanded_box.pad(kNoisePadding, kNoisePadding);
+ if (!noise_density.AnyZeroInRect(expanded_box))
+ return expanded_box;
+ return box;
+}
+
+// Tests each blob in the list to see if it is certain non-text using 2
+// conditions:
+// 1. blob overlaps a cell with high value in noise_density_ (previously set
+// by ComputeNoiseDensity).
+// OR 2. The blob overlaps more than max_blob_overlaps in *this grid. This
+// condition is disabled with max_blob_overlaps == -1.
+// If it does, the blob is declared non-text, and is used to mark up the
+// nontext_mask. Such blobs are fully deleted, and non-noise blobs have their
+// neighbours reset, as they may now point to deleted data.
+// WARNING: The blobs list blobs may be in the *this grid, but they are
+// not removed. If any deleted blobs might be in *this, then this must be
+// Clear()ed immediately after MarkAndDeleteNonTextBlobs is called.
+// If the win is not nullptr, deleted blobs are drawn on it in red, and kept
+// blobs are drawn on it in ok_color.
+void CCNonTextDetect::MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST* blobs,
+ int max_blob_overlaps,
+ ScrollView* win,
+ ScrollView::Color ok_color,
+ Pix* nontext_mask) {
+ int imageheight = tright().y() - bleft().x();
+ BLOBNBOX_IT blob_it(blobs);
+ BLOBNBOX_LIST dead_blobs;
+ BLOBNBOX_IT dead_it(&dead_blobs);
+ for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+ BLOBNBOX* blob = blob_it.data();
+ TBOX box = blob->bounding_box();
+ if (!noise_density_->RectMostlyOverThreshold(box, max_noise_count_) &&
+ (max_blob_overlaps < 0 ||
+ !BlobOverlapsTooMuch(blob, max_blob_overlaps))) {
+ blob->ClearNeighbours();
+ #ifndef GRAPHICS_DISABLED
+ if (win != nullptr)
+ blob->plot(win, ok_color, ok_color);
+ #endif // !GRAPHICS_DISABLED
+ } else {
+ if (noise_density_->AnyZeroInRect(box)) {
+ // There is a danger that the bounding box may overlap real text, so
+ // we need to render the outline.
+ Pix* blob_pix = blob->cblob()->render_outline();
+ pixRasterop(nontext_mask, box.left(), imageheight - box.top(),
+ box.width(), box.height(), PIX_SRC | PIX_DST,
+ blob_pix, 0, 0);
+ pixDestroy(&blob_pix);
+ } else {
+ if (box.area() < gridsize() * gridsize()) {
+ // It is a really bad idea to make lots of small components in the
+ // photo mask, so try to join it to a bigger area by expanding the
+ // box in a way that does not touch any zero noise density cell.
+ box = AttemptBoxExpansion(box, *noise_density_, gridsize());
+ }
+ // All overlapped cells are non-zero, so just mark the rectangle.
+ pixRasterop(nontext_mask, box.left(), imageheight - box.top(),
+ box.width(), box.height(), PIX_SET, nullptr, 0, 0);
+ }
+ #ifndef GRAPHICS_DISABLED
+ if (win != nullptr)
+ blob->plot(win, ScrollView::RED, ScrollView::RED);
+ #endif // !GRAPHICS_DISABLED
+ // It is safe to delete the cblob now, as it isn't used by the grid
+ // or BlobOverlapsTooMuch, and the BLOBNBOXes will go away with the
+ // dead_blobs list.
+ // TODO(rays) delete the delete when the BLOBNBOX destructor deletes
+ // the cblob.
+ delete blob->cblob();
+ dead_it.add_to_end(blob_it.extract());
+ }
+ }
+}
+
+// Returns true if the given blob overlaps more than max_overlaps blobs
+// in the current grid.
+bool CCNonTextDetect::BlobOverlapsTooMuch(BLOBNBOX* blob, int max_overlaps) {
+ // Search the grid to see what intersects it.
+ // Setup a Rectangle search for overlapping this blob.
+ BlobGridSearch rsearch(this);
+ const TBOX& box = blob->bounding_box();
+ rsearch.StartRectSearch(box);
+ rsearch.SetUniqueMode(true);
+ BLOBNBOX* neighbour;
+ int overlap_count = 0;
+ while (overlap_count <= max_overlaps &&
+ (neighbour = rsearch.NextRectSearch()) != nullptr) {
+ if (box.major_overlap(neighbour->bounding_box())) {
+ ++overlap_count;
+ if (overlap_count > max_overlaps)
+ return true;
+ }
+ }
+ return false;
+}
+
+} // namespace tesseract.
diff --git a/tesseract/src/textord/ccnontextdetect.h b/tesseract/src/textord/ccnontextdetect.h
new file mode 100644
index 00000000..e1f1ca2d
--- /dev/null
+++ b/tesseract/src/textord/ccnontextdetect.h
@@ -0,0 +1,87 @@
+///////////////////////////////////////////////////////////////////////
+// File: ccnontextdetect.h
+// Description: Connected-Component-based non-text detection.
+// Copyright 2011 Google Inc. All Rights Reserved.
+// Author: rays@google.com (Ray Smith)
+// Created: Sat Jun 11 09:52:01 PST 2011
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TEXTORD_CCPHOTODETECT_H_
+#define TESSERACT_TEXTORD_CCPHOTODETECT_H_
+
+#include "blobgrid.h"
+#include "scrollview.h"
+
+namespace tesseract {
+
+// The CCNonTextDetect class contains grid-based operations on blobs to create
+// a full-resolution image mask analogous yet complementary to
+// pixGenHalftoneMask as it is better at line-drawings, graphs and charts.
+class CCNonTextDetect : public BlobGrid {
+ public:
+ CCNonTextDetect(int gridsize, const ICOORD& bleft, const ICOORD& tright);
+ ~CCNonTextDetect() override;
+
+ // Creates and returns a Pix with the same resolution as the original
+ // in which 1 (black) pixels represent likely non text (photo, line drawing)
+ // areas of the page, deleting from the blob_block the blobs that were
+ // determined to be non-text.
+ // The photo_map (binary image mask) is used to bias the decision towards
+ // non-text, rather than supplying a definite decision.
+ // The blob_block is the usual result of connected component analysis,
+ // holding the detected blobs.
+ // The returned Pix should be PixDestroyed after use.
+ Pix* ComputeNonTextMask(bool debug, Pix* photo_map, TO_BLOCK* blob_block);
+
+ private:
+ // Computes and returns the noise_density IntGrid, at the same gridsize as
+ // this by summing the number of small elements in a 3x3 neighbourhood of
+ // each grid cell. good_grid is filled with blobs that are considered most
+ // likely good text, and this is filled with small and medium blobs that are
+ // more likely non-text.
+ // The photo_map is used to bias the decision towards non-text, rather than
+ // supplying definite decision.
+ IntGrid* ComputeNoiseDensity(bool debug, Pix* photo_map, BlobGrid* good_grid);
+
+ // Tests each blob in the list to see if it is certain non-text using 2
+ // conditions:
+ // 1. blob overlaps a cell with high value in noise_density_ (previously set
+ // by ComputeNoiseDensity).
+ // OR 2. The blob overlaps more than max_blob_overlaps in *this grid. This
+ // condition is disabled with max_blob_overlaps == -1.
+ // If it does, the blob is declared non-text, and is used to mark up the
+ // nontext_mask. Such blobs are fully deleted, and non-noise blobs have their
+ // neighbours reset, as they may now point to deleted data.
+ // WARNING: The blobs list blobs may be in the *this grid, but they are
+ // not removed. If any deleted blobs might be in *this, then this must be
+ // Clear()ed immediately after MarkAndDeleteNonTextBlobs is called.
+ // If the win is not nullptr, deleted blobs are drawn on it in red, and kept
+ void MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST* blobs,
+ int max_blob_overlaps,
+ ScrollView* win, ScrollView::Color ok_color,
+ Pix* nontext_mask);
+ // Returns true if the given blob overlaps more than max_overlaps blobs
+ // in the current grid.
+ bool BlobOverlapsTooMuch(BLOBNBOX* blob, int max_overlaps);
+
+ // Max entry in noise_density_ before the cell is declared noisy.
+ int max_noise_count_;
+ // Completed noise density map, which we keep around to use for secondary
+ // noise detection.
+ IntGrid* noise_density_;
+};
+
+} // namespace tesseract.
+
+#endif // TESSERACT_TEXTORD_CCPHOTODETECT_H_
diff --git a/tesseract/src/textord/cjkpitch.cpp b/tesseract/src/textord/cjkpitch.cpp
new file mode 100644
index 00000000..3d547396
--- /dev/null
+++ b/tesseract/src/textord/cjkpitch.cpp
@@ -0,0 +1,1070 @@
+///////////////////////////////////////////////////////////////////////
+// File: cjkpitch.cpp
+// Description: Code to determine fixed pitchness and the pitch if fixed,
+// for CJK text.
+// Author: takenaka@google.com (Hiroshi Takenaka)
+//
+// Copyright 2011 Google Inc. All Rights Reserved.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "cjkpitch.h"
+#include "genericvector.h"
+#include "topitch.h"
+#include "tovars.h"
+
+#include <algorithm> // for std::sort
+#include <vector> // for std::vector
+
+namespace tesseract {
+
+static BOOL_VAR(textord_space_size_is_variable, false,
+ "If true, word delimiter spaces are assumed to have "
+ "variable width, even though characters have fixed pitch.");
+
+// Allow +/-10% error for character pitch / body size.
+static const float kFPTolerance = 0.1f;
+
+// Minimum ratio of "good" character pitch for a row to be considered
+// to be fixed-pitch.
+static const float kFixedPitchThreshold = 0.35f;
+
+// rank statistics for a small collection of float values.
+class SimpleStats {
+ public:
+ SimpleStats() = default;
+ ~SimpleStats() = default;
+
+ void Clear() {
+ values_.clear();
+ finalized_ = false;
+ }
+
+ void Add(float value) {
+ values_.push_back(value);
+ finalized_ = false;
+ }
+
+ void Finish() {
+ std::sort(values_.begin(), values_.end());
+ finalized_ = true;
+ }
+
+ float ile(double frac) {
+ if (!finalized_) Finish();
+ if (values_.empty()) return 0.0f;
+ if (frac >= 1.0) return values_.back();
+ if (frac <= 0.0 || values_.size() == 1) return values_[0];
+ int index = static_cast<int>((values_.size() - 1) * frac);
+ float reminder = (values_.size() - 1) * frac - index;
+
+ return values_[index] * (1.0f - reminder) +
+ values_[index + 1] * reminder;
+ }
+
+ float median() {
+ return ile(0.5);
+ }
+
+ float minimum() {
+ if (!finalized_) Finish();
+ if (values_.empty()) return 0.0f;
+ return values_[0];
+ }
+
+ int size() const {
+ return values_.size();
+ }
+
+ private:
+ bool finalized_ = false;
+ std::vector<float> values_;
+};
+
+// statistics for a small collection of float pairs (x, y).
+// EstimateYFor(x, r) returns the estimated y at x, based on
+// existing samples between x*(1-r) ~ x*(1+r).
+class LocalCorrelation {
+ public:
+ struct float_pair {
+ float x, y;
+ int vote;
+ };
+
+ LocalCorrelation(): finalized_(false) { }
+ ~LocalCorrelation() { }
+
+ void Finish() {
+ values_.sort(float_pair_compare);
+ finalized_ = true;
+ }
+
+ void Clear() {
+ finalized_ = false;
+ }
+
+ void Add(float x, float y, int v) {
+ struct float_pair value;
+ value.x = x;
+ value.y = y;
+ value.vote = v;
+ values_.push_back(value);
+ finalized_ = false;
+ }
+
+ float EstimateYFor(float x, float r) {
+ ASSERT_HOST(finalized_);
+ int start = 0, end = values_.size();
+ // Because the number of samples (used_) is assumed to be small,
+ // just use linear search to find values within the range.
+ while (start < values_.size() && values_[start].x < x * (1.0 - r)) start++;
+ while (end - 1 >= 0 && values_[end - 1].x > x * (1.0 + r)) end--;
+
+ // Fall back to the global average if there are no data within r
+ // of x.
+ if (start >= end) {
+ start = 0;
+ end = values_.size();
+ }
+
+ // Compute weighted average of the values.
+ float rc = 0;
+ int vote = 0;
+ for (int i = start; i < end; i++) {
+ rc += values_[i].vote * x * values_[i].y / values_[i].x;
+ vote += values_[i].vote;
+ }
+
+ return rc / vote;
+ }
+
+ private:
+ static int float_pair_compare(const void* a, const void* b) {
+ const auto* f_a = static_cast<const float_pair*>(a);
+ const auto* f_b = static_cast<const float_pair*>(b);
+ return (f_a->x > f_b->x) ? 1 : ((f_a->x < f_b->x) ? -1 : 0);
+ }
+
+ bool finalized_;
+ GenericVector<struct float_pair> values_;
+};
+
+// Class to represent a character on a fixed pitch row. A FPChar may
+// consist of multiple blobs (BLOBNBOX's).
+class FPChar {
+ public:
+ enum Alignment {
+ ALIGN_UNKNOWN, ALIGN_GOOD, ALIGN_BAD
+ };
+
+ FPChar(): box_(), real_body_(),
+ from_(nullptr), to_(nullptr), num_blobs_(0), max_gap_(0),
+ final_(false), alignment_(ALIGN_UNKNOWN),
+ merge_to_prev_(false), delete_flag_(false) {
+ }
+
+ // Initialize from blob.
+ void Init(BLOBNBOX *blob) {
+ box_ = blob->bounding_box();
+ real_body_ = box_;
+ from_ = to_ = blob;
+ num_blobs_ = 1;
+ }
+
+ // Merge this character with "next". The "next" character should
+ // consist of succeeding blobs on the same row.
+ void Merge(const FPChar &next) {
+ int gap = real_body_.x_gap(next.real_body_);
+ if (gap > max_gap_) max_gap_ = gap;
+
+ box_ += next.box_;
+ real_body_ += next.real_body_;
+ to_ = next.to_;
+ num_blobs_ += next.num_blobs_;
+ }
+
+ // Accessors.
+ const TBOX &box() const { return box_; }
+ void set_box(const TBOX &box) {
+ box_ = box;
+ }
+ const TBOX &real_body() const { return real_body_; }
+
+ bool is_final() const { return final_; }
+ void set_final(bool flag) {
+ final_ = flag;
+ }
+
+ const Alignment& alignment() const {
+ return alignment_;
+ }
+ void set_alignment(Alignment alignment) {
+ alignment_ = alignment;
+ }
+
+ bool merge_to_prev() const {
+ return merge_to_prev_;
+ }
+ void set_merge_to_prev(bool flag) {
+ merge_to_prev_ = flag;
+ }
+
+ bool delete_flag() const {
+ return delete_flag_;
+ }
+ void set_delete_flag(bool flag) {
+ delete_flag_ = flag;
+ }
+
+ int max_gap() const {
+ return max_gap_;
+ }
+
+ int num_blobs() const {
+ return num_blobs_;
+ }
+
+ private:
+ TBOX box_; // Rectangle region considered to be occupied by this
+ // character. It could be bigger than the bounding box.
+ TBOX real_body_; // Real bounding box of this character.
+ BLOBNBOX *from_; // The first blob of this character.
+ BLOBNBOX *to_; // The last blob of this character.
+ int num_blobs_; // Number of blobs that belong to this character.
+ int max_gap_; // Maximum x gap between the blobs.
+
+ bool final_; // True if alignment/fragmentation decision for this
+ // character is finalized.
+
+ Alignment alignment_; // Alignment status.
+ bool merge_to_prev_; // True if this is a fragmented blob that
+ // needs to be merged to the previous
+ // character.
+
+ int delete_flag_; // True if this character is merged to another
+ // one and needs to be deleted.
+};
+
+// Class to represent a fixed pitch row, as a linear collection of
+// FPChar's.
+class FPRow {
+ public:
+ FPRow() : all_pitches_(), all_gaps_(), good_pitches_(), good_gaps_(),
+ heights_(), characters_() {
+ }
+
+ ~FPRow() { }
+
+ // Initialize from TD_ROW.
+ void Init(TO_ROW *row);
+
+ // Estimate character pitch of this row, based on current alignment
+ // status of underlying FPChar's. The argument pass1 can be set to
+ // true if the function is called after Pass1Analyze(), to eliminate
+ // some redundant computation.
+ void EstimatePitch(bool pass1);
+
+ // Check each character if it has good character pitches between its
+ // predecessor and its successor and set its alignment status. If
+ // we already calculated the estimated pitch for this row, the value
+ // is used. If we didn't, a character is considered to be good, if
+ // the pitches between its predecessor and its successor are almost
+ // equal.
+ void Pass1Analyze();
+
+ // Find characters that fit nicely into one imaginary body next to a
+ // character which is already finalized. Then mark them as character
+ // fragments.
+ bool Pass2Analyze();
+
+ // Merge FPChars marked as character fragments into one.
+ void MergeFragments();
+
+ // Finalize characters that are already large enough and cannot be
+ // merged with others any more.
+ void FinalizeLargeChars();
+
+ // Output pitch estimation results to attributes of TD_ROW.
+ void OutputEstimations();
+
+ void DebugOutputResult(int row_index);
+
+ int good_pitches() {
+ return good_pitches_.size();
+ }
+
+ float pitch() {
+ return pitch_;
+ }
+
+ float estimated_pitch() {
+ return estimated_pitch_;
+ }
+
+ void set_estimated_pitch(float v) {
+ estimated_pitch_ = v;
+ }
+
+ float height() {
+ return height_;
+ }
+
+ float height_pitch_ratio() {
+ if (good_pitches_.size() < 2) return -1.0;
+ return height_ / good_pitches_.median();
+ }
+
+ float gap() {
+ return gap_;
+ }
+
+ size_t num_chars() {
+ return characters_.size();
+ }
+ FPChar *character(int i) {
+ return &characters_[i];
+ }
+
+ const TBOX &box(int i) {
+ return characters_[i].box();
+ }
+
+ const TBOX &real_body(int i) {
+ return characters_[i].real_body();
+ }
+
+ bool is_box_modified(int i) {
+ return !(characters_[i].box() == characters_[i].real_body());
+ }
+
+ float center_x(int i) {
+ return (characters_[i].box().left() + characters_[i].box().right()) / 2.0;
+ }
+
+ bool is_final(int i) {
+ return characters_[i].is_final();
+ }
+
+ void finalize(int i) {
+ characters_[i].set_final(true);
+ }
+
+ bool is_good(int i) {
+ return characters_[i].alignment() == FPChar::ALIGN_GOOD;
+ }
+
+ void mark_good(int i) {
+ characters_[i].set_alignment(FPChar::ALIGN_GOOD);
+ }
+
+ void mark_bad(int i) {
+ characters_[i].set_alignment(FPChar::ALIGN_BAD);
+ }
+
+ void clear_alignment(int i) {
+ characters_[i].set_alignment(FPChar::ALIGN_UNKNOWN);
+ }
+
+ private:
+ static float x_overlap_fraction(const TBOX& box1, const TBOX& box2) {
+ if (std::min(box1.width(), box2.width()) == 0) return 0.0;
+ return -box1.x_gap(box2) / static_cast<float>(std::min(box1.width(), box2.width()));
+ }
+
+ static bool mostly_overlap(const TBOX& box1, const TBOX& box2) {
+ return x_overlap_fraction(box1, box2) > 0.9;
+ }
+
+ static bool significant_overlap(const TBOX& box1, const TBOX& box2) {
+ if (std::min(box1.width(), box2.width()) == 0) return false;
+ int overlap = -box1.x_gap(box2);
+ return overlap > 1 || x_overlap_fraction(box1, box2) > 0.1;
+ }
+
+ static float box_pitch(const TBOX& ref, const TBOX& box) {
+ return abs(ref.left() + ref.right() - box.left() - box.right()) / 2.0;
+ }
+
+ // Check if two neighboring characters satisfy the fixed pitch model.
+ static bool is_good_pitch(float pitch, const TBOX& box1, const TBOX& box2) {
+ // Character box shouldn't exceed pitch.
+ if (box1.width() >= pitch * (1.0 + kFPTolerance) ||
+ box2.width() >= pitch * (1.0 + kFPTolerance) ||
+ box1.height() >= pitch * (1.0 + kFPTolerance) ||
+ box2.height() >= pitch * (1.0 + kFPTolerance)) return false;
+
+ const float real_pitch = box_pitch(box1, box2);
+ if (fabs(real_pitch - pitch) < pitch * kFPTolerance) return true;
+
+ if (textord_space_size_is_variable) {
+ // Hangul characters usually have fixed pitch, but words are
+ // delimited by space which can be narrower than characters.
+ if (real_pitch > pitch && real_pitch < pitch * 2.0 &&
+ real_pitch - box1.x_gap(box2) < pitch) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ static bool is_interesting_blob(const BLOBNBOX *blob) {
+ return !blob->joined_to_prev() && blob->flow() != BTFT_LEADER;
+ }
+
+ // Cleanup chars that are already merged to others.
+ void DeleteChars() {
+ int index = 0;
+ for (int i = 0; i < characters_.size(); ++i) {
+ if (!characters_[i].delete_flag()) {
+ if (index != i) characters_[index] = characters_[i];
+ index++;
+ }
+ }
+ characters_.truncate(index);
+ }
+
+ float pitch_ = 0.0f; // Character pitch.
+ float estimated_pitch_ = 0.0f; // equal to pitch_ if pitch_ is considered
+ // to be good enough.
+ float height_ = 0.0f; // Character height.
+ float gap_ = 0.0f; // Minimum gap between characters.
+
+ // Pitches between any two successive characters.
+ SimpleStats all_pitches_;
+ // Gaps between any two successive characters.
+ SimpleStats all_gaps_;
+ // Pitches between any two successive characters that are consistent
+ // with the fixed pitch model.
+ SimpleStats good_pitches_;
+ // Gaps between any two successive characters that are consistent
+ // with the fixed pitch model.
+ SimpleStats good_gaps_;
+
+ SimpleStats heights_;
+
+ GenericVector<FPChar> characters_;
+ TO_ROW *real_row_ = nullptr; // Underlying TD_ROW for this row.
+};
+
+void FPRow::Init(TO_ROW *row) {
+ ASSERT_HOST(row != nullptr);
+ ASSERT_HOST(row->xheight > 0);
+ real_row_ = row;
+ real_row_->pitch_decision = PITCH_CORR_PROP; // Default decision.
+
+ BLOBNBOX_IT blob_it = row->blob_list();
+ // Initialize characters_ and compute the initial estimation of
+ // character height.
+ for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+ if (is_interesting_blob(blob_it.data())) {
+ FPChar fp_char;
+ fp_char.Init(blob_it.data());
+ // Merge unconditionally if two blobs overlap.
+ if (!characters_.empty() &&
+ significant_overlap(fp_char.box(), characters_.back().box())) {
+ characters_.back().Merge(fp_char);
+ } else {
+ characters_.push_back(fp_char);
+ }
+ TBOX bound = blob_it.data()->bounding_box();
+ if (bound.height() * 3.0 > bound.width()) {
+ heights_.Add(bound.height());
+ }
+ }
+ }
+ heights_.Finish();
+ height_ = heights_.ile(0.875);
+}
+
+void FPRow::OutputEstimations() {
+ if (good_pitches_.size() == 0) {
+ pitch_ = 0.0f;
+ real_row_->pitch_decision = PITCH_CORR_PROP;
+ return;
+ }
+
+ pitch_ = good_pitches_.median();
+ real_row_->fixed_pitch = pitch_;
+ // good_gaps_.ile(0.125) can be large if most characters on the row
+ // are skinny. Use pitch_ - height_ instead if it's smaller, but
+ // positive.
+ real_row_->kern_size = real_row_->pr_nonsp =
+ std::min(good_gaps_.ile(0.125), std::max(pitch_ - height_, 0.0f));
+ real_row_->body_size = pitch_ - real_row_->kern_size;
+
+ if (good_pitches_.size() < all_pitches_.size() * kFixedPitchThreshold) {
+ // If more than half of the characters of a line don't fit to the
+ // fixed pitch model, consider the line to be proportional. 50%
+ // seems to be a good threshold in practice as well.
+ // Anyway we store estimated values (fixed_pitch, kern_size, etc.) in
+ // real_row_ as a partial estimation result and try to use them in the
+ // normalization process.
+ real_row_->pitch_decision = PITCH_CORR_PROP;
+ return;
+ } else if (good_pitches_.size() > all_pitches_.size() * 0.75) {
+ real_row_->pitch_decision = PITCH_DEF_FIXED;
+ } else {
+ real_row_->pitch_decision = PITCH_CORR_FIXED;
+ }
+
+ real_row_->space_size = real_row_->pr_space = pitch_;
+ // Set min_space to 50% of character pitch so that we can break CJK
+ // text at a half-width space after punctuation.
+ real_row_->min_space = (pitch_ + good_gaps_.minimum()) * 0.5;
+
+ // Don't consider a quarter space as a real space, because it's used
+ // for line justification in traditional Japanese books.
+ real_row_->max_nonspace = std::max(pitch_ * 0.25 + good_gaps_.minimum(),
+ static_cast<double>(good_gaps_.ile(0.875)));
+
+ int space_threshold =
+ std::min((real_row_->max_nonspace + real_row_->min_space) / 2,
+ static_cast<int>(real_row_->xheight));
+
+ // Make max_nonspace larger than any intra-character gap so that
+ // make_prop_words() won't break a row at the middle of a character.
+ for (size_t i = 0; i < num_chars(); ++i) {
+ if (characters_[i].max_gap() > real_row_->max_nonspace) {
+ real_row_->max_nonspace = characters_[i].max_gap();
+ }
+ }
+ real_row_->space_threshold =
+ std::min((real_row_->max_nonspace + real_row_->min_space) / 2,
+ static_cast<int>(real_row_->xheight));
+ real_row_->used_dm_model = false;
+
+ // Setup char_cells.
+ ICOORDELT_IT cell_it = &real_row_->char_cells;
+ auto *cell = new ICOORDELT(real_body(0).left(), 0);
+ cell_it.add_after_then_move(cell);
+
+ int right = real_body(0).right();
+ for (size_t i = 1; i < num_chars(); ++i) {
+ // Put a word break if gap between two characters is bigger than
+ // space_threshold. Don't break if none of two characters
+ // couldn't be "finalized", because maybe they need to be merged
+ // to one character.
+ if ((is_final(i - 1) || is_final(i)) &&
+ real_body(i - 1).x_gap(real_body(i)) > space_threshold) {
+ cell = new ICOORDELT(right + 1, 0);
+ cell_it.add_after_then_move(cell);
+ while (right + pitch_ < box(i).left()) {
+ right += pitch_;
+ cell = new ICOORDELT(right + 1, 0);
+ cell_it.add_after_then_move(cell);
+ }
+ right = box(i).left();
+ }
+ cell = new ICOORDELT((right + real_body(i).left()) / 2, 0);
+ cell_it.add_after_then_move(cell);
+ right = real_body(i).right();
+ }
+
+ cell = new ICOORDELT(right + 1, 0);
+ cell_it.add_after_then_move(cell);
+
+ // TODO(takenaka): add code to store alignment/fragmentation
+ // information to blobs so that it can be reused later, e.g. in
+ // recognition phase.
+}
+
+void FPRow::EstimatePitch(bool pass1) {
+ good_pitches_.Clear();
+ all_pitches_.Clear();
+ good_gaps_.Clear();
+ all_gaps_.Clear();
+ heights_.Clear();
+ if (num_chars() == 0) return;
+
+ int32_t cx0, cx1;
+ bool prev_was_good = is_good(0);
+ cx0 = center_x(0);
+
+ heights_.Add(box(0).height());
+ for (size_t i = 1; i < num_chars(); i++) {
+ cx1 = center_x(i);
+ int32_t pitch = cx1 - cx0;
+ int32_t gap = std::max(0, real_body(i - 1).x_gap(real_body(i)));
+
+ heights_.Add(box(i).height());
+ // Ignore if the pitch is too close. But don't ignore wide pitch
+ // may be the result of large tracking.
+ if (pitch > height_ * 0.5) {
+ all_pitches_.Add(pitch);
+ all_gaps_.Add(gap);
+ if (is_good(i)) {
+ // In pass1 (after Pass1Analyze()), all characters marked as
+ // "good" have a good consistent pitch with their previous
+ // characters. However, it's not true in pass2 and a good
+ // character may have a good pitch only between its successor.
+ // So we collect only pitch values between two good
+ // characters. and within tolerance in pass2.
+ if (pass1 || (prev_was_good &&
+ fabs(estimated_pitch_ - pitch) <
+ kFPTolerance * estimated_pitch_)) {
+ good_pitches_.Add(pitch);
+ if (!is_box_modified(i - 1) && !is_box_modified(i)) {
+ good_gaps_.Add(gap);
+ }
+ }
+ prev_was_good = true;
+ } else {
+ prev_was_good = false;
+ }
+ }
+ cx0 = cx1;
+ }
+
+ good_pitches_.Finish();
+ all_pitches_.Finish();
+ good_gaps_.Finish();
+ all_gaps_.Finish();
+ heights_.Finish();
+
+ height_ = heights_.ile(0.875);
+ if (all_pitches_.size() == 0) {
+ pitch_ = 0.0f;
+ gap_ = 0.0f;
+ } else if (good_pitches_.size() < 2) {
+ // We don't have enough data to estimate the pitch of this row yet.
+ // Use median of all pitches as the initial guess.
+ pitch_ = all_pitches_.median();
+ ASSERT_HOST(pitch_ > 0.0f);
+ gap_ = all_gaps_.ile(0.125);
+ } else {
+ pitch_ = good_pitches_.median();
+ ASSERT_HOST(pitch_ > 0.0f);
+ gap_ = good_gaps_.ile(0.125);
+ }
+}
+
+void FPRow::DebugOutputResult(int row_index) {
+ if (num_chars() > 0) {
+ tprintf("Row %d: pitch_decision=%d, fixed_pitch=%f, max_nonspace=%d, "
+ "space_size=%f, space_threshold=%d, xheight=%f\n",
+ row_index, static_cast<int>(real_row_->pitch_decision),
+ real_row_->fixed_pitch, real_row_->max_nonspace,
+ real_row_->space_size, real_row_->space_threshold,
+ real_row_->xheight);
+
+ for (unsigned i = 0; i < num_chars(); i++) {
+ tprintf("Char %u: is_final=%d is_good=%d num_blobs=%d: ",
+ i, is_final(i), is_good(i), character(i)->num_blobs());
+ box(i).print();
+ }
+ }
+}
+
+void FPRow::Pass1Analyze() {
+ if (num_chars() < 2) return;
+
+ if (estimated_pitch_ > 0.0f) {
+ for (size_t i = 2; i < num_chars(); i++) {
+ if (is_good_pitch(estimated_pitch_, box(i - 2), box(i-1)) &&
+ is_good_pitch(estimated_pitch_, box(i - 1), box(i))) {
+ mark_good(i - 1);
+ }
+ }
+ } else {
+ for (size_t i = 2; i < num_chars(); i++) {
+ if (is_good_pitch(box_pitch(box(i-2), box(i-1)), box(i - 1), box(i))) {
+ mark_good(i - 1);
+ }
+ }
+ }
+ character(0)->set_alignment(character(1)->alignment());
+ character(num_chars() - 1)->set_alignment(
+ character(num_chars() - 2)->alignment());
+}
+
+bool FPRow::Pass2Analyze() {
+ bool changed = false;
+ if (num_chars() <= 1 || estimated_pitch_ == 0.0f) {
+ return false;
+ }
+ for (size_t i = 0; i < num_chars(); i++) {
+ if (is_final(i)) continue;
+
+ FPChar::Alignment alignment = character(i)->alignment();
+ bool intersecting = false;
+ bool not_intersecting = false;
+
+ if (i < num_chars() - 1 && is_final(i + 1)) {
+ // Next character is already finalized. Estimate the imaginary
+ // body including this character based on the character. Skip
+ // whitespace if necessary.
+ bool skipped_whitespaces = false;
+ float c1 = center_x(i + 1) - 1.5 * estimated_pitch_;
+ while (c1 > box(i).right()) {
+ skipped_whitespaces = true;
+ c1 -= estimated_pitch_;
+ }
+ TBOX ibody(c1, box(i).bottom(), c1 + estimated_pitch_, box(i).top());
+
+ // Collect all characters that mostly fit in the region.
+ // Also, their union height shouldn't be too big.
+ int j = i;
+ TBOX merged;
+ while (j >= 0 && !is_final(j) && mostly_overlap(ibody, box(j)) &&
+ merged.bounding_union(box(j)).height() <
+ estimated_pitch_ * (1 + kFPTolerance)) {
+ merged += box(j);
+ j--;
+ }
+
+ if (j >= 0 && significant_overlap(ibody, box(j))) {
+ // character(j) lies on the character boundary and doesn't fit
+ // well into the imaginary body.
+ if (!is_final(j)) intersecting = true;
+ } else {
+ not_intersecting = true;
+ if (i - j > 0) {
+ // Merge character(j+1) ... character(i) because they fit
+ // into the body nicely.
+ if (i - j == 1) {
+ // Only one char in the imaginary body.
+ if (!skipped_whitespaces) mark_good(i);
+ // set ibody as bounding box of this character to get
+ // better pitch analysis result for halfwidth glyphs
+ // followed by a halfwidth space.
+ if (box(i).width() <= estimated_pitch_ * 0.5) {
+ ibody += box(i);
+ character(i)->set_box(ibody);
+ }
+ character(i)->set_merge_to_prev(false);
+ finalize(i);
+ } else {
+ for (int k = i; k > j + 1; k--) {
+ character(k)->set_merge_to_prev(true);
+ }
+ }
+ }
+ }
+ }
+ if (i > 0 && is_final(i - 1)) {
+ // Now we repeat everything from the opposite side. Previous
+ // character is already finalized. Estimate the imaginary body
+ // including this character based on the character.
+ bool skipped_whitespaces = false;
+ float c1 = center_x(i - 1) + 1.5 * estimated_pitch_;
+ while (c1 < box(i).left()) {
+ skipped_whitespaces = true;
+ c1 += estimated_pitch_;
+ }
+ TBOX ibody(c1 - estimated_pitch_, box(i).bottom(), c1, box(i).top());
+
+ size_t j = i;
+ TBOX merged;
+ while (j < num_chars() && !is_final(j) && mostly_overlap(ibody, box(j)) &&
+ merged.bounding_union(box(j)).height() <
+ estimated_pitch_ * (1 + kFPTolerance)) {
+ merged += box(j);
+ j++;
+ }
+
+ if (j < num_chars() && significant_overlap(ibody, box(j))) {
+ if (!is_final(j)) intersecting = true;
+ } else {
+ not_intersecting = true;
+ if (j - i > 0) {
+ if (j - i == 1) {
+ if (!skipped_whitespaces) mark_good(i);
+ if (box(i).width() <= estimated_pitch_ * 0.5) {
+ ibody += box(i);
+ character(i)->set_box(ibody);
+ }
+ character(i)->set_merge_to_prev(false);
+ finalize(i);
+ } else {
+ for (size_t k = i + 1; k < j; k++) {
+ character(k)->set_merge_to_prev(true);
+ }
+ }
+ }
+ }
+ }
+
+ // This character doesn't fit well into the estimated imaginary
+ // bodies. Mark it as bad.
+ if (intersecting && !not_intersecting) mark_bad(i);
+ if (character(i)->alignment() != alignment ||
+ character(i)->merge_to_prev()) {
+ changed = true;
+ }
+ }
+
+ return changed;
+}
+
+void FPRow::MergeFragments() {
+ int last_char = 0;
+
+ for (size_t j = 0; j < num_chars(); ++j) {
+ if (character(j)->merge_to_prev()) {
+ character(last_char)->Merge(*character(j));
+ character(j)->set_delete_flag(true);
+ clear_alignment(last_char);
+ character(j-1)->set_merge_to_prev(false);
+ } else {
+ last_char = j;
+ }
+ }
+ DeleteChars();
+}
+
+void FPRow::FinalizeLargeChars() {
+ float row_pitch = estimated_pitch();
+ for (size_t i = 0; i < num_chars(); i++) {
+ if (is_final(i)) continue;
+
+ // Finalize if both neighbors are finalized. We have no other choice.
+ if (i > 0 && is_final(i - 1) && i < num_chars() - 1 && is_final(i + 1)) {
+ finalize(i);
+ continue;
+ }
+
+ float cx = center_x(i);
+ TBOX ibody(cx - 0.5 * row_pitch, 0, cx + 0.5 * row_pitch, 1);
+ if (i > 0) {
+ // The preceding character significantly intersects with the
+ // imaginary body of this character. Let Pass2Analyze() handle
+ // this case.
+ if (x_overlap_fraction(ibody, box(i - 1)) > 0.1) continue;
+ if (!is_final(i - 1)) {
+ TBOX merged = box(i);
+ merged += box(i - 1);
+ if (merged.width() < row_pitch) continue;
+ // This character cannot be finalized yet because it can be
+ // merged with the previous one. Again, let Pass2Analyze()
+ // handle this case.
+ }
+ }
+ if (i < num_chars() - 1) {
+ if (x_overlap_fraction(ibody, box(i + 1)) > 0.1) continue;
+ if (!is_final(i + 1)) {
+ TBOX merged = box(i);
+ merged += box(i + 1);
+ if (merged.width() < row_pitch) continue;
+ }
+ }
+ finalize(i);
+ }
+
+ // Update alignment decision. We only consider finalized characters
+ // in pass2. E.g. if a finalized character C has another finalized
+ // character L on its left and a not-finalized character R on its
+ // right, we mark C as good if the pitch between C and L is good,
+ // regardless of the pitch between C and R.
+ for (size_t i = 0; i < num_chars(); i++) {
+ if (!is_final(i)) continue;
+ bool good_pitch = false;
+ bool bad_pitch = false;
+ if (i > 0 && is_final(i - 1)) {
+ if (is_good_pitch(row_pitch, box(i - 1), box(i))) {
+ good_pitch = true;
+ } else {
+ bad_pitch = true;
+ }
+ }
+ if (i < num_chars() - 1 && is_final(i + 1)) {
+ if (is_good_pitch(row_pitch, box(i), box(i + 1))) {
+ good_pitch = true;
+ } else {
+ bad_pitch = true;
+ }
+ }
+ if (good_pitch && !bad_pitch) mark_good(i);
+ else if (!good_pitch && bad_pitch) mark_bad(i);
+ }
+}
+
+class FPAnalyzer {
+ public:
+ FPAnalyzer(ICOORD page_tr, TO_BLOCK_LIST *port_blocks);
+ ~FPAnalyzer() { }
+
+ void Pass1Analyze() {
+ for (auto & row : rows_) row.Pass1Analyze();
+ }
+
+ // Estimate character pitch for each row. The argument pass1 can be
+ // set to true if the function is called after Pass1Analyze(), to
+ // eliminate some redundant computation.
+ void EstimatePitch(bool pass1);
+
+ bool maybe_fixed_pitch() {
+ if (rows_.empty() ||
+ rows_.size() <= num_bad_rows_ + num_tall_rows_ + 1) return false;
+ return true;
+ }
+
+ void MergeFragments() {
+ for (auto & row : rows_) row.MergeFragments();
+ }
+
+ void FinalizeLargeChars() {
+ for (auto & row : rows_) row.FinalizeLargeChars();
+ }
+
+ bool Pass2Analyze() {
+ bool changed = false;
+ for (auto & row : rows_) {
+ if (row.Pass2Analyze()) {
+ changed = true;
+ }
+ }
+ return changed;
+ }
+
+ void OutputEstimations() {
+ for (auto & row : rows_) row.OutputEstimations();
+ // Don't we need page-level estimation of gaps/spaces?
+ }
+
+ void DebugOutputResult() {
+ tprintf("FPAnalyzer: final result\n");
+ for (size_t i = 0; i < rows_.size(); i++) rows_[i].DebugOutputResult(i);
+ }
+
+ size_t num_rows() {
+ return rows_.size();
+ }
+
+ // Returns the upper limit for pass2 loop iteration.
+ unsigned max_iteration() {
+ // We're fixing at least one character per iteration. So basically
+ // we shouldn't require more than max_chars_per_row_ iterations.
+ return max_chars_per_row_ + 100;
+ }
+
+ private:
+ ICOORD page_tr_;
+ std::vector<FPRow> rows_;
+ unsigned num_tall_rows_;
+ unsigned num_bad_rows_;
+ // TODO: num_empty_rows_ is incremented, but never used otherwise.
+ unsigned num_empty_rows_;
+ unsigned max_chars_per_row_;
+};
+
+FPAnalyzer::FPAnalyzer(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
+: page_tr_(page_tr),
+ num_tall_rows_(0),
+ num_bad_rows_(0),
+ num_empty_rows_(0),
+ max_chars_per_row_(0)
+{
+ TO_BLOCK_IT block_it(port_blocks);
+
+ for (block_it.mark_cycle_pt(); !block_it.cycled_list();
+ block_it.forward()) {
+ TO_BLOCK *block = block_it.data();
+ if (!block->get_rows()->empty()) {
+ ASSERT_HOST(block->xheight > 0);
+ find_repeated_chars(block, false);
+ }
+ }
+
+ for (block_it.mark_cycle_pt(); !block_it.cycled_list();
+ block_it.forward()) {
+ TO_ROW_IT row_it = block_it.data()->get_rows();
+ for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+ FPRow row;
+ row.Init(row_it.data());
+ rows_.push_back(row);
+ size_t num_chars = rows_.back().num_chars();
+ if (num_chars <= 1) num_empty_rows_++;
+ if (num_chars > max_chars_per_row_) max_chars_per_row_ = num_chars;
+ }
+ }
+}
+
+void FPAnalyzer::EstimatePitch(bool pass1) {
+ LocalCorrelation pitch_height_stats;
+
+ num_tall_rows_ = 0;
+ num_bad_rows_ = 0;
+ pitch_height_stats.Clear();
+ for (auto & row : rows_) {
+ row.EstimatePitch(pass1);
+ if (row.good_pitches()) {
+ pitch_height_stats.Add(row.height() + row.gap(),
+ row.pitch(), row.good_pitches());
+ if (row.height_pitch_ratio() > 1.1) num_tall_rows_++;
+ } else {
+ num_bad_rows_++;
+ }
+ }
+
+ pitch_height_stats.Finish();
+ for (auto & row : rows_) {
+ if (row.good_pitches() >= 5) {
+ // We have enough evidences. Just use the pitch estimation
+ // from this row.
+ row.set_estimated_pitch(row.pitch());
+ } else if (row.num_chars() > 1) {
+ float estimated_pitch =
+ pitch_height_stats.EstimateYFor(row.height() + row.gap(),
+ 0.1f);
+ // CJK characters are more likely to be fragmented than poorly
+ // chopped. So trust the page-level estimation of character
+ // pitch only if it's larger than row-level estimation or
+ // row-level estimation is too large (2x bigger than row height).
+ if (estimated_pitch > row.pitch() ||
+ row.pitch() > row.height() * 2.0) {
+ row.set_estimated_pitch(estimated_pitch);
+ } else {
+ row.set_estimated_pitch(row.pitch());
+ }
+ }
+ }
+}
+
+void compute_fixed_pitch_cjk(ICOORD page_tr,
+ TO_BLOCK_LIST *port_blocks) {
+ FPAnalyzer analyzer(page_tr, port_blocks);
+ if (analyzer.num_rows() == 0) return;
+
+ analyzer.Pass1Analyze();
+ analyzer.EstimatePitch(true);
+
+ // Perform pass1 analysis again with the initial estimation of row
+ // pitches, for better estimation.
+ analyzer.Pass1Analyze();
+ analyzer.EstimatePitch(true);
+
+ // Early exit if the page doesn't seem to contain fixed pitch rows.
+ if (!analyzer.maybe_fixed_pitch()) {
+ if (textord_debug_pitch_test) {
+ tprintf("Page doesn't seem to contain fixed pitch rows\n");
+ }
+ return;
+ }
+
+ unsigned iteration = 0;
+ do {
+ analyzer.MergeFragments();
+ analyzer.FinalizeLargeChars();
+ analyzer.EstimatePitch(false);
+ iteration++;
+ } while (analyzer.Pass2Analyze() && iteration < analyzer.max_iteration());
+
+ if (textord_debug_pitch_test) {
+ tprintf("compute_fixed_pitch_cjk finished after %u iteration (limit=%u)\n",
+ iteration, analyzer.max_iteration());
+ }
+
+ analyzer.OutputEstimations();
+ if (textord_debug_pitch_test) analyzer.DebugOutputResult();
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/textord/cjkpitch.h b/tesseract/src/textord/cjkpitch.h
new file mode 100644
index 00000000..d42ab79f
--- /dev/null
+++ b/tesseract/src/textord/cjkpitch.h
@@ -0,0 +1,75 @@
+///////////////////////////////////////////////////////////////////////
+// File: cjkpitch.h
+// Description: Code to determine fixed pitchness and the pitch if fixed,
+// for CJK text.
+// Copyright 2011 Google Inc. All Rights Reserved.
+// Author: takenaka@google.com (Hiroshi Takenaka)
+// Created: Mon Jun 27 12:48:35 JST 2011
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+#ifndef CJKPITCH_H_
+#define CJKPITCH_H_
+
+#include "blobbox.h"
+
+namespace tesseract {
+
+// Function to test "fixed-pitchness" of the input text and estimating
+// character pitch parameters for it, based on CJK fixed-pitch layout
+// model.
+//
+// This function assumes that a fixed-pitch CJK text has following
+// characteristics:
+//
+// - Most glyphs are designed to fit within the same sized square
+// (imaginary body). Also they are aligned to the center of their
+// imaginary bodies.
+// - The imaginary body is always a regular rectangle.
+// - There may be some extra space between character bodies
+// (tracking).
+// - There may be some extra space after punctuations.
+// - The text is *not* space-delimited. Thus spaces are rare.
+// - Character may consists of multiple unconnected blobs.
+//
+// And the function works in two passes. On pass 1, it looks for such
+// "good" blobs that has the pitch same pitch on the both side and
+// looks like a complete CJK character. Then estimates the character
+// pitch for every row, based on those good blobs. If we couldn't find
+// enough good blobs for a row, then the pitch is estimated from other
+// rows with similar character height instead.
+//
+// Pass 2 is an iterative process to fit the blobs into fixed-pitch
+// character cells. Once we have estimated the character pitch, blobs
+// that are almost as large as the pitch can be considered to be
+// complete characters. And once we know that some characters are
+// complete characters, we can estimate the region occupied by its
+// neighbors. And so on.
+//
+// We repeat the process until all ambiguities are resolved. Then make
+// the final decision about fixed-pitchness of each row and compute
+// pitch and spacing parameters.
+//
+// (If a row is considered to be proportional, pitch_decision for the
+// row is set to PITCH_CORR_PROP and the later phase
+// (i.e. Textord::to_spacing()) should determine its spacing
+// parameters)
+//
+// This function doesn't provide all information required by
+// fixed_pitch_words() and the rows need to be processed with
+// make_prop_words() even if they are fixed pitched.
+void compute_fixed_pitch_cjk(ICOORD page_tr, // top right
+ TO_BLOCK_LIST *port_blocks); // input list
+
+} // namespace tesseract
+
+#endif // CJKPITCH_H_
diff --git a/tesseract/src/textord/colfind.cpp b/tesseract/src/textord/colfind.cpp
new file mode 100644
index 00000000..e305a2c3
--- /dev/null
+++ b/tesseract/src/textord/colfind.cpp
@@ -0,0 +1,1642 @@
+///////////////////////////////////////////////////////////////////////
+// File: colfind.cpp
+// Description: Class to hold BLOBNBOXs in a grid for fast access
+// to neighbours.
+// Author: Ray Smith
+//
+// (C) Copyright 2007, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+// Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "colfind.h"
+
+#include "ccnontextdetect.h"
+#include "colpartition.h"
+#include "colpartitionset.h"
+#ifndef DISABLED_LEGACY_ENGINE
+#include "equationdetectbase.h"
+#endif
+#include "linefind.h"
+#include "normalis.h"
+#include "strokewidth.h"
+#include "blobbox.h"
+#include "scrollview.h"
+#include "tablefind.h"
+#include "params.h"
+#include "workingpartset.h"
+
+#include <algorithm>
+
+namespace tesseract {
+
+// When assigning columns, the max number of misfit grid rows/ColPartitionSets
+// that can be ignored.
+const int kMaxIncompatibleColumnCount = 2;
+// Max fraction of mean_column_gap_ for the gap between two partitions within a
+// column to allow them to merge.
+const double kHorizontalGapMergeFraction = 0.5;
+// Minimum gutter width as a fraction of gridsize
+const double kMinGutterWidthGrid = 0.5;
+// Max multiple of a partition's median size as a distance threshold for
+// adding noise blobs.
+const double kMaxDistToPartSizeRatio = 1.5;
+
+#ifndef GRAPHICS_DISABLED
+static BOOL_VAR(textord_tabfind_show_initial_partitions,
+ false, "Show partition bounds");
+static BOOL_VAR(textord_tabfind_show_reject_blobs,
+ false, "Show blobs rejected as noise");
+static INT_VAR(textord_tabfind_show_partitions, 0,
+ "Show partition bounds, waiting if >1 (ScrollView)");
+static BOOL_VAR(textord_tabfind_show_columns, false, "Show column bounds (ScrollView)");
+static BOOL_VAR(textord_tabfind_show_blocks, false, "Show final block bounds (ScrollView)");
+#endif
+static BOOL_VAR(textord_tabfind_find_tables, true, "run table detection");
+
+#ifndef GRAPHICS_DISABLED
+ScrollView* ColumnFinder::blocks_win_ = nullptr;
+#endif
+
+// Gridsize is an estimate of the text size in the image. A suitable value
+// is in TO_BLOCK::line_size after find_components has been used to make
+// the blobs.
+// bleft and tright are the bounds of the image (or rectangle) being processed.
+// vlines is a (possibly empty) list of TabVector and vertical_x and y are
+// the sum logical vertical vector produced by LineFinder::FindVerticalLines.
+ColumnFinder::ColumnFinder(int gridsize,
+ const ICOORD& bleft, const ICOORD& tright,
+ int resolution, bool cjk_script,
+ double aligned_gap_fraction,
+ TabVector_LIST* vlines, TabVector_LIST* hlines,
+ int vertical_x, int vertical_y)
+ : TabFind(gridsize, bleft, tright, vlines, vertical_x, vertical_y,
+ resolution),
+ cjk_script_(cjk_script),
+ min_gutter_width_(static_cast<int>(kMinGutterWidthGrid * gridsize)),
+ mean_column_gap_(tright.x() - bleft.x()),
+ tabfind_aligned_gap_fraction_(aligned_gap_fraction),
+ deskew_(0.0f, 0.0f),
+ reskew_(1.0f, 0.0f), rotation_(1.0f, 0.0f), rerotate_(1.0f, 0.0f),
+ text_rotation_(0.0f, 0.0f),
+ best_columns_(nullptr), stroke_width_(nullptr),
+ part_grid_(gridsize, bleft, tright), nontext_map_(nullptr),
+ projection_(resolution),
+ denorm_(nullptr), input_blobs_win_(nullptr), equation_detect_(nullptr) {
+ TabVector_IT h_it(&horizontal_lines_);
+ h_it.add_list_after(hlines);
+}
+
+ColumnFinder::~ColumnFinder() {
+ column_sets_.delete_data_pointers();
+ delete [] best_columns_;
+ delete stroke_width_;
+ delete input_blobs_win_;
+ pixDestroy(&nontext_map_);
+ while (denorm_ != nullptr) {
+ DENORM* dead_denorm = denorm_;
+ denorm_ = const_cast<DENORM*>(denorm_->predecessor());
+ delete dead_denorm;
+ }
+
+ // The ColPartitions are destroyed automatically, but any boxes in
+ // the noise_parts_ list are owned and need to be deleted explicitly.
+ ColPartition_IT part_it(&noise_parts_);
+ for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) {
+ ColPartition* part = part_it.data();
+ part->DeleteBoxes();
+ }
+ // Likewise any boxes in the good_parts_ list need to be deleted.
+ // These are just the image parts. Text parts have already given their
+ // boxes on to the TO_BLOCK, and have empty lists.
+ part_it.set_to_list(&good_parts_);
+ for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) {
+ ColPartition* part = part_it.data();
+ part->DeleteBoxes();
+ }
+ // Also, any blobs on the image_bblobs_ list need to have their cblobs
+ // deleted. This only happens if there has been an early return from
+ // FindColumns, as in a normal return, the blobs go into the grid and
+ // end up in noise_parts_, good_parts_ or the output blocks.
+ BLOBNBOX_IT bb_it(&image_bblobs_);
+ for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
+ BLOBNBOX* bblob = bb_it.data();
+ delete bblob->cblob();
+ }
+}
+
+// Performs initial processing on the blobs in the input_block:
+// Setup the part_grid, stroke_width_, nontext_map.
+// Obvious noise blobs are filtered out and used to mark the nontext_map_.
+// Initial stroke-width analysis is used to get local text alignment
+// direction, so the textline projection_ map can be setup.
+// On return, IsVerticallyAlignedText may be called (now optionally) to
+// determine the gross textline alignment of the page.
+void ColumnFinder::SetupAndFilterNoise(PageSegMode pageseg_mode,
+ Pix* photo_mask_pix,
+ TO_BLOCK* input_block) {
+ part_grid_.Init(gridsize(), bleft(), tright());
+ delete stroke_width_;
+ stroke_width_ = new StrokeWidth(gridsize(), bleft(), tright());
+ min_gutter_width_ = static_cast<int>(kMinGutterWidthGrid * gridsize());
+ input_block->ReSetAndReFilterBlobs();
+ #ifndef GRAPHICS_DISABLED
+ if (textord_tabfind_show_blocks) {
+ input_blobs_win_ = MakeWindow(0, 0, "Filtered Input Blobs");
+ input_block->plot_graded_blobs(input_blobs_win_);
+ }
+ #endif // !GRAPHICS_DISABLED
+ SetBlockRuleEdges(input_block);
+ pixDestroy(&nontext_map_);
+ // Run a preliminary strokewidth neighbour detection on the medium blobs.
+ stroke_width_->SetNeighboursOnMediumBlobs(input_block);
+ CCNonTextDetect nontext_detect(gridsize(), bleft(), tright());
+ // Remove obvious noise and make the initial non-text map.
+ nontext_map_ = nontext_detect.ComputeNonTextMask(textord_debug_tabfind,
+ photo_mask_pix, input_block);
+ stroke_width_->FindTextlineDirectionAndFixBrokenCJK(pageseg_mode, cjk_script_,
+ input_block);
+ // Clear the strokewidth grid ready for rotation or leader finding.
+ stroke_width_->Clear();
+}
+
+// Tests for vertical alignment of text (returning true if so), and generates
+// a list of blobs of moderate aspect ratio, in the most frequent writing
+// direction (in osd_blobs) for orientation and script detection to test
+// the character orientation.
+// block is the single block for the whole page or rectangle to be OCRed.
+// Note that the vertical alignment may be due to text whose writing direction
+// is vertical, like say Japanese, or due to text whose writing direction is
+// horizontal but whose text appears vertically aligned because the image is
+// not the right way up.
+bool ColumnFinder::IsVerticallyAlignedText(double find_vertical_text_ratio,
+ TO_BLOCK* block,
+ BLOBNBOX_CLIST* osd_blobs) {
+ return stroke_width_->TestVerticalTextDirection(find_vertical_text_ratio,
+ block, osd_blobs);
+}
+
+// Rotates the blobs and the TabVectors so that the gross writing direction
+// (text lines) are horizontal and lines are read down the page.
+// Applied rotation stored in rotation_.
+// A second rotation is calculated for application during recognition to
+// make the rotated blobs upright for recognition.
+// Subsequent rotation stored in text_rotation_.
+//
+// Arguments:
+// vertical_text_lines true if the text lines are vertical.
+// recognition_rotation [0..3] is the number of anti-clockwise 90 degree
+// rotations from osd required for the text to be upright and readable.
+void ColumnFinder::CorrectOrientation(TO_BLOCK* block,
+ bool vertical_text_lines,
+ int recognition_rotation) {
+ const FCOORD anticlockwise90(0.0f, 1.0f);
+ const FCOORD clockwise90(0.0f, -1.0f);
+ const FCOORD rotation180(-1.0f, 0.0f);
+ const FCOORD norotation(1.0f, 0.0f);
+
+ text_rotation_ = norotation;
+ // Rotate the page to make the text upright, as implied by
+ // recognition_rotation.
+ rotation_ = norotation;
+ if (recognition_rotation == 1) {
+ rotation_ = anticlockwise90;
+ } else if (recognition_rotation == 2) {
+ rotation_ = rotation180;
+ } else if (recognition_rotation == 3) {
+ rotation_ = clockwise90;
+ }
+ // We infer text writing direction to be vertical if there are several
+ // vertical text lines detected, and horizontal if not. But if the page
+ // orientation was determined to be 90 or 270 degrees, the true writing
+ // direction is the opposite of what we inferred.
+ if (recognition_rotation & 1) {
+ vertical_text_lines = !vertical_text_lines;
+ }
+ // If we still believe the writing direction is vertical, we use the
+ // convention of rotating the page ccw 90 degrees to make the text lines
+ // horizontal, and mark the blobs for rotation cw 90 degrees for
+ // classification so that the text order is correct after recognition.
+ if (vertical_text_lines) {
+ rotation_.rotate(anticlockwise90);
+ text_rotation_.rotate(clockwise90);
+ }
+ // Set rerotate_ to the inverse of rotation_.
+ rerotate_ = FCOORD(rotation_.x(), -rotation_.y());
+ if (rotation_.x() != 1.0f || rotation_.y() != 0.0f) {
+ // Rotate all the blobs and tab vectors.
+ RotateBlobList(rotation_, &block->large_blobs);
+ RotateBlobList(rotation_, &block->blobs);
+ RotateBlobList(rotation_, &block->small_blobs);
+ RotateBlobList(rotation_, &block->noise_blobs);
+ TabFind::ResetForVerticalText(rotation_, rerotate_, &horizontal_lines_,
+ &min_gutter_width_);
+ part_grid_.Init(gridsize(), bleft(), tright());
+ // Reset all blobs to initial state and filter by size.
+ // Since they have rotated, the list they belong on could have changed.
+ block->ReSetAndReFilterBlobs();
+ SetBlockRuleEdges(block);
+ stroke_width_->CorrectForRotation(rerotate_, &part_grid_);
+ }
+ if (textord_debug_tabfind) {
+ tprintf("Vertical=%d, orientation=%d, final rotation=(%f, %f)+(%f,%f)\n",
+ vertical_text_lines, recognition_rotation,
+ rotation_.x(), rotation_.y(),
+ text_rotation_.x(), text_rotation_.y());
+ }
+ // Setup the denormalization.
+ ASSERT_HOST(denorm_ == nullptr);
+ denorm_ = new DENORM;
+ denorm_->SetupNormalization(nullptr, &rotation_, nullptr,
+ 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f);
+}
+
+// Finds blocks of text, image, rule line, table etc, returning them in the
+// blocks and to_blocks
+// (Each TO_BLOCK points to the basic BLOCK and adds more information.)
+// Image blocks are generated by a combination of photo_mask_pix (which may
+// NOT be nullptr) and the rejected text found during preliminary textline
+// finding.
+// The input_block is the result of a call to find_components, and contains
+// the blobs found in the image or rectangle to be OCRed. These blobs will be
+// removed and placed in the output blocks, while unused ones will be deleted.
+// If single_column is true, the input is treated as single column, but
+// it is still divided into blocks of equal line spacing/text size.
+// scaled_color is scaled down by scaled_factor from the input color image,
+// and may be nullptr if the input was not color.
+// grey_pix is optional, but if present must match the photo_mask_pix in size,
+// and must be a *real* grey image instead of binary_pix * 255.
+// thresholds_pix is expected to be present iff grey_pix is present and
+// can be an integer factor reduction of the grey_pix. It represents the
+// thresholds that were used to create the binary_pix from the grey_pix.
+// If diacritic_blobs is non-null, then diacritics/noise blobs, that would
+// confuse layout analysis by causing textline overlap, are placed there,
+// with the expectation that they will be reassigned to words later and
+// noise/diacriticness determined via classification.
+// Returns -1 if the user hits the 'd' key in the blocks window while running
+// in debug mode, which requests a retry with more debug info.
+int ColumnFinder::FindBlocks(PageSegMode pageseg_mode, Pix* scaled_color,
+ int scaled_factor, TO_BLOCK* input_block,
+ Pix* photo_mask_pix, Pix* thresholds_pix,
+ Pix* grey_pix, DebugPixa* pixa_debug,
+ BLOCK_LIST* blocks, BLOBNBOX_LIST* diacritic_blobs,
+ TO_BLOCK_LIST* to_blocks) {
+ pixOr(photo_mask_pix, photo_mask_pix, nontext_map_);
+ stroke_width_->FindLeaderPartitions(input_block, &part_grid_);
+ stroke_width_->RemoveLineResidue(&big_parts_);
+ FindInitialTabVectors(nullptr, min_gutter_width_, tabfind_aligned_gap_fraction_,
+ input_block);
+ SetBlockRuleEdges(input_block);
+ stroke_width_->GradeBlobsIntoPartitions(
+ pageseg_mode, rerotate_, input_block, nontext_map_, denorm_, cjk_script_,
+ &projection_, diacritic_blobs, &part_grid_, &big_parts_);
+ if (!PSM_SPARSE(pageseg_mode)) {
+ ImageFind::FindImagePartitions(photo_mask_pix, rotation_, rerotate_,
+ input_block, this, pixa_debug, &part_grid_,
+ &big_parts_);
+ ImageFind::TransferImagePartsToImageMask(rerotate_, &part_grid_,
+ photo_mask_pix);
+ ImageFind::FindImagePartitions(photo_mask_pix, rotation_, rerotate_,
+ input_block, this, pixa_debug, &part_grid_,
+ &big_parts_);
+ }
+ part_grid_.ReTypeBlobs(&image_bblobs_);
+ TidyBlobs(input_block);
+ Reset();
+ // TODO(rays) need to properly handle big_parts_.
+ ColPartition_IT p_it(&big_parts_);
+ for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward())
+ p_it.data()->DisownBoxesNoAssert();
+ big_parts_.clear();
+ delete stroke_width_;
+ stroke_width_ = nullptr;
+ // Compute the edge offsets whether or not there is a grey_pix. It is done
+ // here as the c_blobs haven't been touched by rotation or anything yet,
+ // so no denorm is required, yet the text has been separated from image, so
+ // no time is wasted running it on image blobs.
+ input_block->ComputeEdgeOffsets(thresholds_pix, grey_pix);
+
+ // A note about handling right-to-left scripts (Hebrew/Arabic):
+ // The columns must be reversed and come out in right-to-left instead of
+ // the normal left-to-right order. Because the left-to-right ordering
+ // is implicit in many data structures, it is simpler to fool the algorithms
+ // into thinking they are dealing with left-to-right text.
+ // To do this, we reflect the needed data in the y-axis and then reflect
+ // the blocks back after they have been created. This is a temporary
+ // arrangement that is confined to this function only, so the reflection
+ // is completely invisible in the output blocks.
+ // The only objects reflected are:
+ // The vertical separator lines that have already been found;
+ // The bounding boxes of all BLOBNBOXES on all lists on the input_block
+ // plus the image_bblobs. The outlines are not touched, since they are
+ // not looked at.
+ bool input_is_rtl = input_block->block->right_to_left();
+ if (input_is_rtl) {
+ // Reflect the vertical separator lines (member of TabFind).
+ ReflectInYAxis();
+ // Reflect the blob boxes.
+ ReflectForRtl(input_block, &image_bblobs_);
+ part_grid_.ReflectInYAxis();
+ }
+
+ if (!PSM_SPARSE(pageseg_mode)) {
+ if (!PSM_COL_FIND_ENABLED(pageseg_mode)) {
+ // No tab stops needed. Just the grid that FindTabVectors makes.
+ DontFindTabVectors(&image_bblobs_, input_block, &deskew_, &reskew_);
+ } else {
+ SetBlockRuleEdges(input_block);
+ // Find the tab stops, estimate skew, and deskew the tabs, blobs and
+ // part_grid_.
+ FindTabVectors(&horizontal_lines_, &image_bblobs_, input_block,
+ min_gutter_width_, tabfind_aligned_gap_fraction_,
+ &part_grid_, &deskew_, &reskew_);
+ // Add the deskew to the denorm_.
+ auto* new_denorm = new DENORM;
+ new_denorm->SetupNormalization(nullptr, &deskew_, denorm_,
+ 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f);
+ denorm_ = new_denorm;
+ }
+ SetBlockRuleEdges(input_block);
+ part_grid_.SetTabStops(this);
+
+ // Make the column_sets_.
+ if (!MakeColumns(false)) {
+ tprintf("Empty page!!\n");
+ part_grid_.DeleteParts();
+ return 0; // This is an empty page.
+ }
+
+ // Refill the grid using rectangular spreading, and get the benefit
+ // of the completed tab vectors marking the rule edges of each blob.
+ Clear();
+ #ifndef GRAPHICS_DISABLED
+ if (textord_tabfind_show_reject_blobs) {
+ ScrollView* rej_win = MakeWindow(500, 300, "Rejected blobs");
+ input_block->plot_graded_blobs(rej_win);
+ }
+ #endif // !GRAPHICS_DISABLED
+ InsertBlobsToGrid(false, false, &image_bblobs_, this);
+ InsertBlobsToGrid(true, true, &input_block->blobs, this);
+
+ part_grid_.GridFindMargins(best_columns_);
+ // Split and merge the partitions by looking at local neighbours.
+ GridSplitPartitions();
+ // Resolve unknown partitions by adding to an existing partition, fixing
+ // the type, or declaring them noise.
+ part_grid_.GridFindMargins(best_columns_);
+ GridMergePartitions();
+ // Insert any unused noise blobs that are close enough to an appropriate
+ // partition.
+ InsertRemainingNoise(input_block);
+ // Add horizontal line separators as partitions.
+ GridInsertHLinePartitions();
+ GridInsertVLinePartitions();
+ // Recompute margins based on a local neighbourhood search.
+ part_grid_.GridFindMargins(best_columns_);
+ SetPartitionTypes();
+ }
+#ifndef GRAPHICS_DISABLED
+ if (textord_tabfind_show_initial_partitions) {
+ ScrollView* part_win = MakeWindow(100, 300, "InitialPartitions");
+ part_grid_.DisplayBoxes(part_win);
+ DisplayTabVectors(part_win);
+ }
+#endif
+ if (!PSM_SPARSE(pageseg_mode)) {
+ #ifndef DISABLED_LEGACY_ENGINE
+ if (equation_detect_) {
+ equation_detect_->FindEquationParts(&part_grid_, best_columns_);
+ }
+ #endif
+ if (textord_tabfind_find_tables) {
+ TableFinder table_finder;
+ table_finder.Init(gridsize(), bleft(), tright());
+ table_finder.set_resolution(resolution_);
+ table_finder.set_left_to_right_language(
+ !input_block->block->right_to_left());
+ // Copy cleaned partitions from part_grid_ to clean_part_grid_ and
+ // insert dot-like noise into period_grid_
+ table_finder.InsertCleanPartitions(&part_grid_, input_block);
+ // Get Table Regions
+ table_finder.LocateTables(&part_grid_, best_columns_, WidthCB(), reskew_);
+ }
+ GridRemoveUnderlinePartitions();
+ part_grid_.DeleteUnknownParts(input_block);
+
+ // Build the partitions into chains that belong in the same block and
+ // refine into one-to-one links, then smooth the types within each chain.
+ part_grid_.FindPartitionPartners();
+ part_grid_.FindFigureCaptions();
+ part_grid_.RefinePartitionPartners(true);
+ SmoothPartnerRuns();
+
+ #ifndef GRAPHICS_DISABLED
+ if (textord_tabfind_show_partitions) {
+ ScrollView* window = MakeWindow(400, 300, "Partitions");
+ if (window != nullptr) {
+ part_grid_.DisplayBoxes(window);
+ if (!textord_debug_printable)
+ DisplayTabVectors(window);
+ if (window != nullptr && textord_tabfind_show_partitions > 1) {
+ delete window->AwaitEvent(SVET_DESTROY);
+ }
+ }
+ }
+ #endif // !GRAPHICS_DISABLED
+ part_grid_.AssertNoDuplicates();
+ }
+ // Ownership of the ColPartitions moves from part_sets_ to part_grid_ here,
+ // and ownership of the BLOBNBOXes moves to the ColPartitions.
+ // (They were previously owned by the block or the image_bblobs list.)
+ ReleaseBlobsAndCleanupUnused(input_block);
+ // Ownership of the ColPartitions moves from part_grid_ to good_parts_ and
+ // noise_parts_ here. In text blocks, ownership of the BLOBNBOXes moves
+ // from the ColPartitions to the output TO_BLOCK. In non-text, the
+ // BLOBNBOXes stay with the ColPartitions and get deleted in the destructor.
+ if (PSM_SPARSE(pageseg_mode))
+ part_grid_.ExtractPartitionsAsBlocks(blocks, to_blocks);
+ else
+ TransformToBlocks(blocks, to_blocks);
+ if (textord_debug_tabfind) {
+ tprintf("Found %d blocks, %d to_blocks\n",
+ blocks->length(), to_blocks->length());
+ }
+
+#ifndef GRAPHICS_DISABLED
+ DisplayBlocks(blocks);
+#endif
+ RotateAndReskewBlocks(input_is_rtl, to_blocks);
+ int result = 0;
+ #ifndef GRAPHICS_DISABLED
+ if (blocks_win_ != nullptr) {
+ bool waiting = false;
+ do {
+ waiting = false;
+ SVEvent* event = blocks_win_->AwaitEvent(SVET_ANY);
+ if (event->type == SVET_INPUT && event->parameter != nullptr) {
+ if (*event->parameter == 'd')
+ result = -1;
+ else
+ blocks->clear();
+ } else if (event->type == SVET_DESTROY) {
+ blocks_win_ = nullptr;
+ } else {
+ waiting = true;
+ }
+ delete event;
+ } while (waiting);
+ }
+ #endif // !GRAPHICS_DISABLED
+ return result;
+}
+
+// Get the rotation required to deskew, and its inverse rotation.
+void ColumnFinder::GetDeskewVectors(FCOORD* deskew, FCOORD* reskew) {
+ *reskew = reskew_;
+ *deskew = reskew_;
+ deskew->set_y(-deskew->y());
+}
+
+#ifndef DISABLED_LEGACY_ENGINE
+void ColumnFinder::SetEquationDetect(EquationDetectBase* detect) {
+ equation_detect_ = detect;
+}
+#endif
+
+//////////////// PRIVATE CODE /////////////////////////
+
+#ifndef GRAPHICS_DISABLED
+
+// Displays the blob and block bounding boxes in a window called Blocks.
+void ColumnFinder::DisplayBlocks(BLOCK_LIST* blocks) {
+ if (textord_tabfind_show_blocks) {
+ if (blocks_win_ == nullptr)
+ blocks_win_ = MakeWindow(700, 300, "Blocks");
+ else
+ blocks_win_->Clear();
+ DisplayBoxes(blocks_win_);
+ BLOCK_IT block_it(blocks);
+ int serial = 1;
+ for (block_it.mark_cycle_pt(); !block_it.cycled_list();
+ block_it.forward()) {
+ BLOCK* block = block_it.data();
+ block->pdblk.plot(blocks_win_, serial++,
+ textord_debug_printable ? ScrollView::BLUE
+ : ScrollView::GREEN);
+ }
+ blocks_win_->Update();
+ }
+}
+
+// Displays the column edges at each grid y coordinate defined by
+// best_columns_.
+void ColumnFinder::DisplayColumnBounds(PartSetVector* sets) {
+ ScrollView* col_win = MakeWindow(50, 300, "Columns");
+ DisplayBoxes(col_win);
+ col_win->Pen(textord_debug_printable ? ScrollView::BLUE : ScrollView::GREEN);
+ for (int i = 0; i < gridheight_; ++i) {
+ ColPartitionSet* columns = best_columns_[i];
+ if (columns != nullptr)
+ columns->DisplayColumnEdges(i * gridsize_, (i + 1) * gridsize_, col_win);
+ }
+}
+
+#endif // !GRAPHICS_DISABLED
+
+// Sets up column_sets_ (the determined column layout at each horizontal
+// slice). Returns false if the page is empty.
+bool ColumnFinder::MakeColumns(bool single_column) {
+ // The part_sets_ are a temporary structure used during column creation,
+ // and is a vector of ColPartitionSets, representing ColPartitions found
+ // at horizontal slices through the page.
+ PartSetVector part_sets;
+ if (!single_column) {
+ if (!part_grid_.MakeColPartSets(&part_sets))
+ return false; // Empty page.
+ ASSERT_HOST(part_grid_.gridheight() == gridheight_);
+ // Try using only the good parts first.
+ bool good_only = true;
+ do {
+ for (int i = 0; i < gridheight_; ++i) {
+ ColPartitionSet* line_set = part_sets.get(i);
+ if (line_set != nullptr && line_set->LegalColumnCandidate()) {
+ ColPartitionSet* column_candidate = line_set->Copy(good_only);
+ if (column_candidate != nullptr)
+ column_candidate->AddToColumnSetsIfUnique(&column_sets_, WidthCB());
+ }
+ }
+ good_only = !good_only;
+ } while (column_sets_.empty() && !good_only);
+ if (textord_debug_tabfind)
+ PrintColumnCandidates("Column candidates");
+ // Improve the column candidates against themselves.
+ ImproveColumnCandidates(&column_sets_, &column_sets_);
+ if (textord_debug_tabfind)
+ PrintColumnCandidates("Improved columns");
+ // Improve the column candidates using the part_sets_.
+ ImproveColumnCandidates(&part_sets, &column_sets_);
+ }
+ ColPartitionSet* single_column_set =
+ part_grid_.MakeSingleColumnSet(WidthCB());
+ if (single_column_set != nullptr) {
+ // Always add the single column set as a backup even if not in
+ // single column mode.
+ single_column_set->AddToColumnSetsIfUnique(&column_sets_, WidthCB());
+ }
+ if (textord_debug_tabfind)
+ PrintColumnCandidates("Final Columns");
+ bool has_columns = !column_sets_.empty();
+ if (has_columns) {
+ // Divide the page into sections of uniform column layout.
+ bool any_multi_column = AssignColumns(part_sets);
+#ifndef GRAPHICS_DISABLED
+ if (textord_tabfind_show_columns) {
+ DisplayColumnBounds(&part_sets);
+ }
+#endif
+ ComputeMeanColumnGap(any_multi_column);
+ }
+ for (int i = 0; i < part_sets.size(); ++i) {
+ ColPartitionSet* line_set = part_sets.get(i);
+ if (line_set != nullptr) {
+ line_set->RelinquishParts();
+ delete line_set;
+ }
+ }
+ return has_columns;
+}
+
+// Attempt to improve the column_candidates by expanding the columns
+// and adding new partitions from the partition sets in src_sets.
+// Src_sets may be equal to column_candidates, in which case it will
+// use them as a source to improve themselves.
+void ColumnFinder::ImproveColumnCandidates(PartSetVector* src_sets,
+ PartSetVector* column_sets) {
+ PartSetVector temp_cols;
+ temp_cols.move(column_sets);
+ if (src_sets == column_sets)
+ src_sets = &temp_cols;
+ int set_size = temp_cols.size();
+ // Try using only the good parts first.
+ bool good_only = true;
+ do {
+ for (int i = 0; i < set_size; ++i) {
+ ColPartitionSet* column_candidate = temp_cols.get(i);
+ ASSERT_HOST(column_candidate != nullptr);
+ ColPartitionSet* improved = column_candidate->Copy(good_only);
+ if (improved != nullptr) {
+ improved->ImproveColumnCandidate(WidthCB(), src_sets);
+ improved->AddToColumnSetsIfUnique(column_sets, WidthCB());
+ }
+ }
+ good_only = !good_only;
+ } while (column_sets->empty() && !good_only);
+ if (column_sets->empty())
+ column_sets->move(&temp_cols);
+ else
+ temp_cols.delete_data_pointers();
+}
+
+// Prints debug information on the column candidates.
+void ColumnFinder::PrintColumnCandidates(const char* title) {
+ int set_size = column_sets_.size();
+ tprintf("Found %d %s:\n", set_size, title);
+ if (textord_debug_tabfind >= 3) {
+ for (int i = 0; i < set_size; ++i) {
+ ColPartitionSet* column_set = column_sets_.get(i);
+ column_set->Print();
+ }
+ }
+}
+
+// Finds the optimal set of columns that cover the entire image with as
+// few changes in column partition as possible.
+// NOTE: this could be thought of as an optimization problem, but a simple
+// greedy algorithm is used instead. The algorithm repeatedly finds the modal
+// compatible column in an unassigned region and uses that with the extra
+// tweak of extending the modal region over small breaks in compatibility.
+// Where modal regions overlap, the boundary is chosen so as to minimize
+// the cost in terms of ColPartitions not fitting an approved column.
+// Returns true if any part of the page is multi-column.
+bool ColumnFinder::AssignColumns(const PartSetVector& part_sets) {
+ int set_count = part_sets.size();
+ ASSERT_HOST(set_count == gridheight());
+ // Allocate and init the best_columns_.
+ best_columns_ = new ColPartitionSet*[set_count];
+ for (int y = 0; y < set_count; ++y)
+ best_columns_[y] = nullptr;
+ int column_count = column_sets_.size();
+ // column_set_costs[part_sets_ index][column_sets_ index] is
+ // < INT32_MAX if the partition set is compatible with the column set,
+ // in which case its value is the cost for that set used in deciding
+ // which competing set to assign.
+ // any_columns_possible[part_sets_ index] is true if any of
+ // possible_column_sets[part_sets_ index][*] is < INT32_MAX.
+ // assigned_costs[part_sets_ index] is set to the column_set_costs
+ // of the assigned column_sets_ index or INT32_MAX if none is set.
+ // On return the best_columns_ member is set.
+ bool* any_columns_possible = new bool[set_count];
+ int* assigned_costs = new int[set_count];
+ int** column_set_costs = new int*[set_count];
+ // Set possible column_sets to indicate whether each set is compatible
+ // with each column.
+ for (int part_i = 0; part_i < set_count; ++part_i) {
+ ColPartitionSet* line_set = part_sets.get(part_i);
+ bool debug = line_set != nullptr &&
+ WithinTestRegion(2, line_set->bounding_box().left(),
+ line_set->bounding_box().bottom());
+ column_set_costs[part_i] = new int[column_count];
+ any_columns_possible[part_i] = false;
+ assigned_costs[part_i] = INT32_MAX;
+ for (int col_i = 0; col_i < column_count; ++col_i) {
+ if (line_set != nullptr &&
+ column_sets_.get(col_i)->CompatibleColumns(debug, line_set,
+ WidthCB())) {
+ column_set_costs[part_i][col_i] =
+ column_sets_.get(col_i)->UnmatchedWidth(line_set);
+ any_columns_possible[part_i] = true;
+ } else {
+ column_set_costs[part_i][col_i] = INT32_MAX;
+ if (debug)
+ tprintf("Set id %d did not match at y=%d, lineset =%p\n",
+ col_i, part_i, line_set);
+ }
+ }
+ }
+ bool any_multi_column = false;
+ // Assign a column set to each vertical grid position.
+ // While there is an unassigned range, find its mode.
+ int start, end;
+ while (BiggestUnassignedRange(set_count, any_columns_possible,
+ &start, &end)) {
+ if (textord_debug_tabfind >= 2)
+ tprintf("Biggest unassigned range = %d- %d\n", start, end);
+ // Find the modal column_set_id in the range.
+ int column_set_id = RangeModalColumnSet(column_set_costs,
+ assigned_costs, start, end);
+ if (textord_debug_tabfind >= 2) {
+ tprintf("Range modal column id = %d\n", column_set_id);
+ column_sets_.get(column_set_id)->Print();
+ }
+ // Now find the longest run of the column_set_id in the range.
+ ShrinkRangeToLongestRun(column_set_costs, assigned_costs,
+ any_columns_possible,
+ column_set_id, &start, &end);
+ if (textord_debug_tabfind >= 2)
+ tprintf("Shrunk range = %d- %d\n", start, end);
+ // Extend the start and end past the longest run, while there are
+ // only small gaps in compatibility that can be overcome by larger
+ // regions of compatibility beyond.
+ ExtendRangePastSmallGaps(column_set_costs, assigned_costs,
+ any_columns_possible,
+ column_set_id, -1, -1, &start);
+ --end;
+ ExtendRangePastSmallGaps(column_set_costs, assigned_costs,
+ any_columns_possible,
+ column_set_id, 1, set_count, &end);
+ ++end;
+ if (textord_debug_tabfind)
+ tprintf("Column id %d applies to range = %d - %d\n",
+ column_set_id, start, end);
+ // Assign the column to the range, which now may overlap with other ranges.
+ AssignColumnToRange(column_set_id, start, end, column_set_costs,
+ assigned_costs);
+ if (column_sets_.get(column_set_id)->GoodColumnCount() > 1)
+ any_multi_column = true;
+ }
+ // If anything remains unassigned, the whole lot is unassigned, so
+ // arbitrarily assign id 0.
+ if (best_columns_[0] == nullptr) {
+ AssignColumnToRange(0, 0, gridheight_, column_set_costs, assigned_costs);
+ }
+ // Free memory.
+ for (int i = 0; i < set_count; ++i) {
+ delete [] column_set_costs[i];
+ }
+ delete [] assigned_costs;
+ delete [] any_columns_possible;
+ delete [] column_set_costs;
+ return any_multi_column;
+}
+
+// Finds the biggest range in part_sets_ that has no assigned column, but
+// column assignment is possible.
+bool ColumnFinder::BiggestUnassignedRange(int set_count,
+ const bool* any_columns_possible,
+ int* best_start, int* best_end) {
+ int best_range_size = 0;
+ *best_start = set_count;
+ *best_end = set_count;
+ int end = set_count;
+ for (int start = 0; start < gridheight_; start = end) {
+ // Find the first unassigned index in start.
+ while (start < set_count) {
+ if (best_columns_[start] == nullptr && any_columns_possible[start])
+ break;
+ ++start;
+ }
+ // Find the first past the end and count the good ones in between.
+ int range_size = 1; // Number of non-null, but unassigned line sets.
+ end = start + 1;
+ while (end < set_count) {
+ if (best_columns_[end] != nullptr)
+ break;
+ if (any_columns_possible[end])
+ ++range_size;
+ ++end;
+ }
+ if (start < set_count && range_size > best_range_size) {
+ best_range_size = range_size;
+ *best_start = start;
+ *best_end = end;
+ }
+ }
+ return *best_start < *best_end;
+}
+
+// Finds the modal compatible column_set_ index within the given range.
+int ColumnFinder::RangeModalColumnSet(int** column_set_costs,
+ const int* assigned_costs,
+ int start, int end) {
+ int column_count = column_sets_.size();
+ STATS column_stats(0, column_count);
+ for (int part_i = start; part_i < end; ++part_i) {
+ for (int col_j = 0; col_j < column_count; ++col_j) {
+ if (column_set_costs[part_i][col_j] < assigned_costs[part_i])
+ column_stats.add(col_j, 1);
+ }
+ }
+ ASSERT_HOST(column_stats.get_total() > 0);
+ return column_stats.mode();
+}
+
+// Given that there are many column_set_id compatible columns in the range,
+// shrinks the range to the longest contiguous run of compatibility, allowing
+// gaps where no columns are possible, but not where competing columns are
+// possible.
+void ColumnFinder::ShrinkRangeToLongestRun(int** column_set_costs,
+ const int* assigned_costs,
+ const bool* any_columns_possible,
+ int column_set_id,
+ int* best_start, int* best_end) {
+ // orig_start and orig_end are the maximum range we will look at.
+ int orig_start = *best_start;
+ int orig_end = *best_end;
+ int best_range_size = 0;
+ *best_start = orig_end;
+ *best_end = orig_end;
+ int end = orig_end;
+ for (int start = orig_start; start < orig_end; start = end) {
+ // Find the first possible
+ while (start < orig_end) {
+ if (column_set_costs[start][column_set_id] < assigned_costs[start] ||
+ !any_columns_possible[start])
+ break;
+ ++start;
+ }
+ // Find the first past the end.
+ end = start + 1;
+ while (end < orig_end) {
+ if (column_set_costs[end][column_set_id] >= assigned_costs[start] &&
+ any_columns_possible[end])
+ break;
+ ++end;
+ }
+ if (start < orig_end && end - start > best_range_size) {
+ best_range_size = end - start;
+ *best_start = start;
+ *best_end = end;
+ }
+ }
+}
+
+// Moves start in the direction of step, up to, but not including end while
+// the only incompatible regions are no more than kMaxIncompatibleColumnCount
+// in size, and the compatible regions beyond are bigger.
+void ColumnFinder::ExtendRangePastSmallGaps(int** column_set_costs,
+ const int* assigned_costs,
+ const bool* any_columns_possible,
+ int column_set_id,
+ int step, int end, int* start) {
+ if (textord_debug_tabfind > 2)
+ tprintf("Starting expansion at %d, step=%d, limit=%d\n",
+ *start, step, end);
+ if (*start == end)
+ return; // Cannot be expanded.
+
+ int barrier_size = 0;
+ int good_size = 0;
+ do {
+ // Find the size of the incompatible barrier.
+ barrier_size = 0;
+ int i;
+ for (i = *start + step; i != end; i += step) {
+ if (column_set_costs[i][column_set_id] < assigned_costs[i])
+ break; // We are back on.
+ // Locations where none are possible don't count.
+ if (any_columns_possible[i])
+ ++barrier_size;
+ }
+ if (textord_debug_tabfind > 2)
+ tprintf("At %d, Barrier size=%d\n", i, barrier_size);
+ if (barrier_size > kMaxIncompatibleColumnCount)
+ return; // Barrier too big.
+ if (i == end) {
+ // We can't go any further, but the barrier was small, so go to the end.
+ *start = i - step;
+ return;
+ }
+ // Now find the size of the good region on the other side.
+ good_size = 1;
+ for (i += step; i != end; i += step) {
+ if (column_set_costs[i][column_set_id] < assigned_costs[i])
+ ++good_size;
+ else if (any_columns_possible[i])
+ break;
+ }
+ if (textord_debug_tabfind > 2)
+ tprintf("At %d, good size = %d\n", i, good_size);
+ // If we had enough good ones we can extend the start and keep looking.
+ if (good_size >= barrier_size)
+ *start = i - step;
+ } while (good_size >= barrier_size);
+}
+
+// Assigns the given column_set_id to the given range.
+void ColumnFinder::AssignColumnToRange(int column_set_id, int start, int end,
+ int** column_set_costs,
+ int* assigned_costs) {
+ ColPartitionSet* column_set = column_sets_.get(column_set_id);
+ for (int i = start; i < end; ++i) {
+ assigned_costs[i] = column_set_costs[i][column_set_id];
+ best_columns_[i] = column_set;
+ }
+}
+
+// Computes the mean_column_gap_.
+void ColumnFinder::ComputeMeanColumnGap(bool any_multi_column) {
+ int total_gap = 0;
+ int total_width = 0;
+ int gap_samples = 0;
+ int width_samples = 0;
+ for (int i = 0; i < gridheight_; ++i) {
+ ASSERT_HOST(best_columns_[i] != nullptr);
+ best_columns_[i]->AccumulateColumnWidthsAndGaps(&total_width,
+ &width_samples,
+ &total_gap,
+ &gap_samples);
+ }
+ mean_column_gap_ = any_multi_column && gap_samples > 0
+ ? total_gap / gap_samples : width_samples > 0
+ ? total_width / width_samples : 0;
+}
+
+//////// Functions that manipulate ColPartitions in the part_grid_ /////
+//////// to split, merge, find margins, and find types. //////////////
+
+// Helper to delete all the deletable blobs on the list. Owned blobs are
+// extracted from the list, but not deleted, leaving them owned by the owner().
+static void ReleaseAllBlobsAndDeleteUnused(BLOBNBOX_LIST* blobs) {
+ for (BLOBNBOX_IT blob_it(blobs); !blob_it.empty(); blob_it.forward()) {
+ BLOBNBOX* blob = blob_it.extract();
+ if (blob->owner() == nullptr) {
+ delete blob->cblob();
+ delete blob;
+ }
+ }
+}
+
+// Hoovers up all un-owned blobs and deletes them.
+// The rest get released from the block so the ColPartitions can pass
+// ownership to the output blocks.
+void ColumnFinder::ReleaseBlobsAndCleanupUnused(TO_BLOCK* block) {
+ ReleaseAllBlobsAndDeleteUnused(&block->blobs);
+ ReleaseAllBlobsAndDeleteUnused(&block->small_blobs);
+ ReleaseAllBlobsAndDeleteUnused(&block->noise_blobs);
+ ReleaseAllBlobsAndDeleteUnused(&block->large_blobs);
+ ReleaseAllBlobsAndDeleteUnused(&image_bblobs_);
+}
+
+// Splits partitions that cross columns where they have nothing in the gap.
+void ColumnFinder::GridSplitPartitions() {
+ // Iterate the ColPartitions in the grid.
+ GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
+ gsearch(&part_grid_);
+ gsearch.StartFullSearch();
+ ColPartition* dont_repeat = nullptr;
+ ColPartition* part;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ if (part->blob_type() < BRT_UNKNOWN || part == dont_repeat)
+ continue; // Only applies to text partitions.
+ ColPartitionSet* column_set = best_columns_[gsearch.GridY()];
+ int first_col = -1;
+ int last_col = -1;
+ // Find which columns the partition spans.
+ part->ColumnRange(resolution_, column_set, &first_col, &last_col);
+ if (first_col > 0)
+ --first_col;
+ // Convert output column indices to physical column indices.
+ first_col /= 2;
+ last_col /= 2;
+ // We will only consider cases where a partition spans two columns,
+ // since a heading that spans more columns than that is most likely
+ // genuine.
+ if (last_col != first_col + 1)
+ continue;
+ // Set up a rectangle search x-bounded by the column gap and y by the part.
+ int y = part->MidY();
+ TBOX margin_box = part->bounding_box();
+ bool debug = AlignedBlob::WithinTestRegion(2, margin_box.left(),
+ margin_box.bottom());
+ if (debug) {
+ tprintf("Considering partition for GridSplit:");
+ part->Print();
+ }
+ ColPartition* column = column_set->GetColumnByIndex(first_col);
+ if (column == nullptr)
+ continue;
+ margin_box.set_left(column->RightAtY(y) + 2);
+ column = column_set->GetColumnByIndex(last_col);
+ if (column == nullptr)
+ continue;
+ margin_box.set_right(column->LeftAtY(y) - 2);
+ // TODO(rays) Decide whether to keep rectangular filling or not in the
+ // main grid and therefore whether we need a fancier search here.
+ // Now run the rect search on the main blob grid.
+ GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> rectsearch(this);
+ if (debug) {
+ tprintf("Searching box (%d,%d)->(%d,%d)\n",
+ margin_box.left(), margin_box.bottom(),
+ margin_box.right(), margin_box.top());
+ part->Print();
+ }
+ rectsearch.StartRectSearch(margin_box);
+ BLOBNBOX* bbox;
+ while ((bbox = rectsearch.NextRectSearch()) != nullptr) {
+ if (bbox->bounding_box().overlap(margin_box))
+ break;
+ }
+ if (bbox == nullptr) {
+ // There seems to be nothing in the hole, so split the partition.
+ gsearch.RemoveBBox();
+ int x_middle = (margin_box.left() + margin_box.right()) / 2;
+ if (debug) {
+ tprintf("Splitting part at %d:", x_middle);
+ part->Print();
+ }
+ ColPartition* split_part = part->SplitAt(x_middle);
+ if (split_part != nullptr) {
+ if (debug) {
+ tprintf("Split result:");
+ part->Print();
+ split_part->Print();
+ }
+ part_grid_.InsertBBox(true, true, split_part);
+ } else {
+ // Split had no effect
+ if (debug)
+ tprintf("Split had no effect\n");
+ dont_repeat = part;
+ }
+ part_grid_.InsertBBox(true, true, part);
+ gsearch.RepositionIterator();
+ } else if (debug) {
+ tprintf("Part cannot be split: blob (%d,%d)->(%d,%d) in column gap\n",
+ bbox->bounding_box().left(), bbox->bounding_box().bottom(),
+ bbox->bounding_box().right(), bbox->bounding_box().top());
+ }
+ }
+}
+
+// Merges partitions where there is vertical overlap, within a single column,
+// and the horizontal gap is small enough.
+void ColumnFinder::GridMergePartitions() {
+ // Iterate the ColPartitions in the grid.
+ GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
+ gsearch(&part_grid_);
+ gsearch.StartFullSearch();
+ ColPartition* part;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ if (part->IsUnMergeableType())
+ continue;
+ // Set up a rectangle search x-bounded by the column and y by the part.
+ ColPartitionSet* columns = best_columns_[gsearch.GridY()];
+ TBOX box = part->bounding_box();
+ bool debug = AlignedBlob::WithinTestRegion(1, box.left(), box.bottom());
+ if (debug) {
+ tprintf("Considering part for merge at:");
+ part->Print();
+ }
+ int y = part->MidY();
+ ColPartition* left_column = columns->ColumnContaining(box.left(), y);
+ ColPartition* right_column = columns->ColumnContaining(box.right(), y);
+ if (left_column == nullptr || right_column != left_column) {
+ if (debug)
+ tprintf("In different columns\n");
+ continue;
+ }
+ box.set_left(left_column->LeftAtY(y));
+ box.set_right(right_column->RightAtY(y));
+ // Now run the rect search.
+ bool modified_box = false;
+ GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
+ rsearch(&part_grid_);
+ rsearch.SetUniqueMode(true);
+ rsearch.StartRectSearch(box);
+ ColPartition* neighbour;
+
+ while ((neighbour = rsearch.NextRectSearch()) != nullptr) {
+ if (neighbour == part || neighbour->IsUnMergeableType())
+ continue;
+ const TBOX& neighbour_box = neighbour->bounding_box();
+ if (debug) {
+ tprintf("Considering merge with neighbour at:");
+ neighbour->Print();
+ }
+ if (neighbour_box.right() < box.left() ||
+ neighbour_box.left() > box.right())
+ continue; // Not within the same column.
+ if (part->VSignificantCoreOverlap(*neighbour) &&
+ part->TypesMatch(*neighbour)) {
+ // There is vertical overlap and the gross types match, but only
+ // merge if the horizontal gap is small enough, as one of the
+ // partitions may be a figure caption within a column.
+ // If there is only one column, then the mean_column_gap_ is large
+ // enough to allow almost any merge, by being the mean column width.
+ const TBOX& part_box = part->bounding_box();
+ // Don't merge if there is something else in the way. Use the margin
+ // to decide, and check both to allow a bit of overlap.
+ if (neighbour_box.left() > part->right_margin() &&
+ part_box.right() < neighbour->left_margin())
+ continue; // Neighbour is too far to the right.
+ if (neighbour_box.right() < part->left_margin() &&
+ part_box.left() > neighbour->right_margin())
+ continue; // Neighbour is too far to the left.
+ int h_gap = std::max(part_box.left(), neighbour_box.left()) -
+ std::min(part_box.right(), neighbour_box.right());
+ if (h_gap < mean_column_gap_ * kHorizontalGapMergeFraction ||
+ part_box.width() < mean_column_gap_ ||
+ neighbour_box.width() < mean_column_gap_) {
+ if (debug) {
+ tprintf("Running grid-based merge between:\n");
+ part->Print();
+ neighbour->Print();
+ }
+ rsearch.RemoveBBox();
+ if (!modified_box) {
+ // We are going to modify part, so remove it and re-insert it after.
+ gsearch.RemoveBBox();
+ rsearch.RepositionIterator();
+ modified_box = true;
+ }
+ part->Absorb(neighbour, WidthCB());
+ } else if (debug) {
+ tprintf("Neighbour failed hgap test\n");
+ }
+ } else if (debug) {
+ tprintf("Neighbour failed overlap or typesmatch test\n");
+ }
+ }
+ if (modified_box) {
+ // We modified the box of part, so re-insert it into the grid.
+ // This does no harm in the current cell, as it already exists there,
+ // but it needs to exist in all the cells covered by its bounding box,
+ // or it will never be found by a full search.
+ // Because the box has changed, it has to be removed first, otherwise
+ // add_sorted may fail to keep a single copy of the pointer.
+ part_grid_.InsertBBox(true, true, part);
+ gsearch.RepositionIterator();
+ }
+ }
+}
+
+// Inserts remaining noise blobs into the most applicable partition if any.
+// If there is no applicable partition, then the blobs are deleted.
+void ColumnFinder::InsertRemainingNoise(TO_BLOCK* block) {
+ BLOBNBOX_IT blob_it(&block->noise_blobs);
+ for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+ BLOBNBOX* blob = blob_it.data();
+ if (blob->owner() != nullptr) continue;
+ TBOX search_box(blob->bounding_box());
+ bool debug = WithinTestRegion(2, search_box.left(), search_box.bottom());
+ search_box.pad(gridsize(), gridsize());
+ // Setup a rectangle search to find the best partition to merge with.
+ ColPartitionGridSearch rsearch(&part_grid_);
+ rsearch.SetUniqueMode(true);
+ rsearch.StartRectSearch(search_box);
+ ColPartition* part;
+ ColPartition* best_part = nullptr;
+ int best_distance = 0;
+ while ((part = rsearch.NextRectSearch()) != nullptr) {
+ if (part->IsUnMergeableType())
+ continue;
+ int distance = projection_.DistanceOfBoxFromPartition(
+ blob->bounding_box(), *part, denorm_, debug);
+ if (best_part == nullptr || distance < best_distance) {
+ best_part = part;
+ best_distance = distance;
+ }
+ }
+ if (best_part != nullptr &&
+ best_distance < kMaxDistToPartSizeRatio * best_part->median_height()) {
+ // Close enough to merge.
+ if (debug) {
+ tprintf("Adding noise blob with distance %d, thr=%g:box:",
+ best_distance,
+ kMaxDistToPartSizeRatio * best_part->median_height());
+ blob->bounding_box().print();
+ tprintf("To partition:");
+ best_part->Print();
+ }
+ part_grid_.RemoveBBox(best_part);
+ best_part->AddBox(blob);
+ part_grid_.InsertBBox(true, true, best_part);
+ blob->set_owner(best_part);
+ blob->set_flow(best_part->flow());
+ blob->set_region_type(best_part->blob_type());
+ } else {
+ // Mark the blob for deletion.
+ blob->set_region_type(BRT_NOISE);
+ }
+ }
+ // Delete the marked blobs, clearing neighbour references.
+ block->DeleteUnownedNoise();
+}
+
+// Helper makes a box from a horizontal line.
+static TBOX BoxFromHLine(const TabVector* hline) {
+ int top = std::max(hline->startpt().y(), hline->endpt().y());
+ int bottom = std::min(hline->startpt().y(), hline->endpt().y());
+ top += hline->mean_width();
+ if (top == bottom) {
+ if (bottom > 0)
+ --bottom;
+ else
+ ++top;
+ }
+ return TBOX(hline->startpt().x(), bottom, hline->endpt().x(), top);
+}
+
+// Remove partitions that come from horizontal lines that look like
+// underlines, but are not part of a table.
+void ColumnFinder::GridRemoveUnderlinePartitions() {
+ TabVector_IT hline_it(&horizontal_lines_);
+ for (hline_it.mark_cycle_pt(); !hline_it.cycled_list(); hline_it.forward()) {
+ TabVector* hline = hline_it.data();
+ if (hline->intersects_other_lines())
+ continue;
+ TBOX line_box = BoxFromHLine(hline);
+ TBOX search_box = line_box;
+ search_box.pad(0, line_box.height());
+ ColPartitionGridSearch part_search(&part_grid_);
+ part_search.SetUniqueMode(true);
+ part_search.StartRectSearch(search_box);
+ ColPartition* covered;
+ bool touched_table = false;
+ bool touched_text = false;
+ ColPartition* line_part = nullptr;
+ while ((covered = part_search.NextRectSearch()) != nullptr) {
+ if (covered->type() == PT_TABLE) {
+ touched_table = true;
+ break;
+ } else if (covered->IsTextType()) {
+ // TODO(rays) Add a list of underline sections to ColPartition.
+ int text_bottom = covered->median_bottom();
+ if (line_box.bottom() <= text_bottom && text_bottom <= search_box.top())
+ touched_text = true;
+ } else if (covered->blob_type() == BRT_HLINE &&
+ line_box.contains(covered->bounding_box()) &&
+ // not if same instance (identical to hline)
+ !TBOX(covered->bounding_box()).contains(line_box)) {
+ line_part = covered;
+ }
+ }
+ if (line_part != nullptr && !touched_table && touched_text) {
+ part_grid_.RemoveBBox(line_part);
+ delete line_part;
+ }
+ }
+}
+
+// Add horizontal line separators as partitions.
+void ColumnFinder::GridInsertHLinePartitions() {
+ TabVector_IT hline_it(&horizontal_lines_);
+ for (hline_it.mark_cycle_pt(); !hline_it.cycled_list(); hline_it.forward()) {
+ TabVector* hline = hline_it.data();
+ TBOX line_box = BoxFromHLine(hline);
+ ColPartition* part = ColPartition::MakeLinePartition(
+ BRT_HLINE, vertical_skew_,
+ line_box.left(), line_box.bottom(), line_box.right(), line_box.top());
+ part->set_type(PT_HORZ_LINE);
+ bool any_image = false;
+ ColPartitionGridSearch part_search(&part_grid_);
+ part_search.SetUniqueMode(true);
+ part_search.StartRectSearch(line_box);
+ ColPartition* covered;
+ while ((covered = part_search.NextRectSearch()) != nullptr) {
+ if (covered->IsImageType()) {
+ any_image = true;
+ break;
+ }
+ }
+ if (!any_image)
+ part_grid_.InsertBBox(true, true, part);
+ else
+ delete part;
+ }
+}
+
+// Add horizontal line separators as partitions.
+void ColumnFinder::GridInsertVLinePartitions() {
+ TabVector_IT vline_it(dead_vectors());
+ for (vline_it.mark_cycle_pt(); !vline_it.cycled_list(); vline_it.forward()) {
+ TabVector* vline = vline_it.data();
+ if (!vline->IsSeparator())
+ continue;
+ int left = std::min(vline->startpt().x(), vline->endpt().x());
+ int right = std::max(vline->startpt().x(), vline->endpt().x());
+ right += vline->mean_width();
+ if (left == right) {
+ if (left > 0)
+ --left;
+ else
+ ++right;
+ }
+ ColPartition* part = ColPartition::MakeLinePartition(
+ BRT_VLINE, vertical_skew_,
+ left, vline->startpt().y(), right, vline->endpt().y());
+ part->set_type(PT_VERT_LINE);
+ bool any_image = false;
+ ColPartitionGridSearch part_search(&part_grid_);
+ part_search.SetUniqueMode(true);
+ part_search.StartRectSearch(part->bounding_box());
+ ColPartition* covered;
+ while ((covered = part_search.NextRectSearch()) != nullptr) {
+ if (covered->IsImageType()) {
+ any_image = true;
+ break;
+ }
+ }
+ if (!any_image)
+ part_grid_.InsertBBox(true, true, part);
+ else
+ delete part;
+ }
+}
+
+// For every ColPartition in the grid, sets its type based on position
+// in the columns.
+void ColumnFinder::SetPartitionTypes() {
+ GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
+ gsearch(&part_grid_);
+ gsearch.StartFullSearch();
+ ColPartition* part;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ part->SetPartitionType(resolution_, best_columns_[gsearch.GridY()]);
+ }
+}
+
+// Only images remain with multiple types in a run of partners.
+// Sets the type of all in the group to the maximum of the group.
+void ColumnFinder::SmoothPartnerRuns() {
+ // Iterate the ColPartitions in the grid.
+ GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
+ gsearch(&part_grid_);
+ gsearch.StartFullSearch();
+ ColPartition* part;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ ColPartition* partner = part->SingletonPartner(true);
+ if (partner != nullptr) {
+ if (partner->SingletonPartner(false) != part) {
+ tprintf("Ooops! Partition:(%d partners)",
+ part->upper_partners()->length());
+ part->Print();
+ tprintf("has singleton partner:(%d partners",
+ partner->lower_partners()->length());
+ partner->Print();
+ tprintf("but its singleton partner is:");
+ if (partner->SingletonPartner(false) == nullptr)
+ tprintf("NULL\n");
+ else
+ partner->SingletonPartner(false)->Print();
+ }
+ ASSERT_HOST(partner->SingletonPartner(false) == part);
+ } else if (part->SingletonPartner(false) != nullptr) {
+ ColPartitionSet* column_set = best_columns_[gsearch.GridY()];
+ int column_count = column_set->ColumnCount();
+ part->SmoothPartnerRun(column_count * 2 + 1);
+ }
+ }
+}
+
+// Helper functions for TransformToBlocks.
+// Add the part to the temp list in the correct order.
+void ColumnFinder::AddToTempPartList(ColPartition* part,
+ ColPartition_CLIST* temp_list) {
+ int mid_y = part->MidY();
+ ColPartition_C_IT it(temp_list);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ ColPartition* test_part = it.data();
+ if (part->type() == PT_NOISE || test_part->type() == PT_NOISE)
+ continue; // Noise stays in sequence.
+ if (test_part == part->SingletonPartner(false))
+ break; // Insert before its lower partner.
+ int neighbour_bottom = test_part->median_bottom();
+ int neighbour_top = test_part->median_top();
+ int neighbour_y = (neighbour_bottom + neighbour_top) / 2;
+ if (neighbour_y < mid_y)
+ break; // part is above test_part so insert it.
+ if (!part->HOverlaps(*test_part) && !part->WithinSameMargins(*test_part))
+ continue; // Incompatibles stay in order
+ }
+ if (it.cycled_list()) {
+ it.add_to_end(part);
+ } else {
+ it.add_before_stay_put(part);
+ }
+}
+
+// Add everything from the temp list to the work_set assuming correct order.
+void ColumnFinder::EmptyTempPartList(ColPartition_CLIST* temp_list,
+ WorkingPartSet_LIST* work_set) {
+ ColPartition_C_IT it(temp_list);
+ while (!it.empty()) {
+ it.extract()->AddToWorkingSet(bleft_, tright_, resolution_,
+ &good_parts_, work_set);
+ it.forward();
+ }
+}
+
+// Transform the grid of partitions to the output blocks.
+void ColumnFinder::TransformToBlocks(BLOCK_LIST* blocks,
+ TO_BLOCK_LIST* to_blocks) {
+ WorkingPartSet_LIST work_set;
+ ColPartitionSet* column_set = nullptr;
+ ColPartition_IT noise_it(&noise_parts_);
+ // The temp_part_list holds a list of parts at the same grid y coord
+ // so they can be added in the correct order. This prevents thin objects
+ // like horizontal lines going before the text lines above them.
+ ColPartition_CLIST temp_part_list;
+ // Iterate the ColPartitions in the grid. It starts at the top
+ GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
+ gsearch(&part_grid_);
+ gsearch.StartFullSearch();
+ int prev_grid_y = -1;
+ ColPartition* part;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ int grid_y = gsearch.GridY();
+ if (grid_y != prev_grid_y) {
+ EmptyTempPartList(&temp_part_list, &work_set);
+ prev_grid_y = grid_y;
+ }
+ if (best_columns_[grid_y] != column_set) {
+ column_set = best_columns_[grid_y];
+ // Every line should have a non-null best column.
+ ASSERT_HOST(column_set != nullptr);
+ column_set->ChangeWorkColumns(bleft_, tright_, resolution_,
+ &good_parts_, &work_set);
+ if (textord_debug_tabfind)
+ tprintf("Changed column groups at grid index %d, y=%d\n",
+ gsearch.GridY(), gsearch.GridY() * gridsize());
+ }
+ if (part->type() == PT_NOISE) {
+ noise_it.add_to_end(part);
+ } else {
+ AddToTempPartList(part, &temp_part_list);
+ }
+ }
+ EmptyTempPartList(&temp_part_list, &work_set);
+ // Now finish all working sets and transfer ColPartitionSets to block_sets.
+ WorkingPartSet_IT work_it(&work_set);
+ while (!work_it.empty()) {
+ WorkingPartSet* working_set = work_it.extract();
+ working_set->ExtractCompletedBlocks(bleft_, tright_, resolution_,
+ &good_parts_, blocks, to_blocks);
+ delete working_set;
+ work_it.forward();
+ }
+}
+
+// Helper reflects a list of blobs in the y-axis.
+// Only reflects the BLOBNBOX bounding box. Not the blobs or outlines below.
+static void ReflectBlobList(BLOBNBOX_LIST* bblobs) {
+ BLOBNBOX_IT it(bblobs);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ it.data()->reflect_box_in_y_axis();
+ }
+}
+
+// Reflect the blob boxes (but not the outlines) in the y-axis so that
+// the blocks get created in the correct RTL order. Reflects the blobs
+// in the input_block and the bblobs list.
+// The reflection is undone in RotateAndReskewBlocks by
+// reflecting the blocks themselves, and then recomputing the blob bounding
+// boxes.
+void ColumnFinder::ReflectForRtl(TO_BLOCK* input_block, BLOBNBOX_LIST* bblobs) {
+ ReflectBlobList(bblobs);
+ ReflectBlobList(&input_block->blobs);
+ ReflectBlobList(&input_block->small_blobs);
+ ReflectBlobList(&input_block->noise_blobs);
+ ReflectBlobList(&input_block->large_blobs);
+ // Update the denorm with the reflection.
+ auto* new_denorm = new DENORM;
+ new_denorm->SetupNormalization(nullptr, nullptr, denorm_,
+ 0.0f, 0.0f, -1.0f, 1.0f, 0.0f, 0.0f);
+ denorm_ = new_denorm;
+}
+
+// Helper fixes up blobs and cblobs to match the desired rotation,
+// exploding multi-outline blobs back to single blobs and accumulating
+// the bounding box widths and heights.
+static void RotateAndExplodeBlobList(const FCOORD& blob_rotation,
+ BLOBNBOX_LIST* bblobs,
+ STATS* widths,
+ STATS* heights) {
+ BLOBNBOX_IT it(bblobs);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ BLOBNBOX* blob = it.data();
+ C_BLOB* cblob = blob->cblob();
+ C_OUTLINE_LIST* outlines = cblob->out_list();
+ C_OUTLINE_IT ol_it(outlines);
+ if (!outlines->singleton()) {
+ // This blob has multiple outlines from CJK repair.
+ // Explode the blob back into individual outlines.
+ for (;!ol_it.empty(); ol_it.forward()) {
+ C_OUTLINE* outline = ol_it.extract();
+ BLOBNBOX* new_blob = BLOBNBOX::RealBlob(outline);
+ // This blob will be revisited later since we add_after_stay_put here.
+ // This means it will get rotated and have its width/height added to
+ // the stats below.
+ it.add_after_stay_put(new_blob);
+ }
+ it.extract();
+ delete cblob;
+ delete blob;
+ } else {
+ if (blob_rotation.x() != 1.0f || blob_rotation.y() != 0.0f) {
+ cblob->rotate(blob_rotation);
+ }
+ blob->compute_bounding_box();
+ widths->add(blob->bounding_box().width(), 1);
+ heights->add(blob->bounding_box().height(), 1);
+ }
+ }
+}
+
+// Undo the deskew that was done in FindTabVectors, as recognition is done
+// without correcting blobs or blob outlines for skew.
+// Reskew the completed blocks to put them back to the original rotated coords
+// that were created by CorrectOrientation.
+// If the input_is_rtl, then reflect the blocks in the y-axis to undo the
+// reflection that was done before FindTabVectors.
+// Blocks that were identified as vertical text (relative to the rotated
+// coordinates) are further rotated so the text lines are horizontal.
+// blob polygonal outlines are rotated to match the position of the blocks
+// that they are in, and their bounding boxes are recalculated to be accurate.
+// Record appropriate inverse transformations and required
+// classifier transformation in the blocks.
+void ColumnFinder::RotateAndReskewBlocks(bool input_is_rtl,
+ TO_BLOCK_LIST* blocks) {
+ if (input_is_rtl) {
+ // The skew is backwards because of the reflection.
+ FCOORD tmp = deskew_;
+ deskew_ = reskew_;
+ reskew_ = tmp;
+ }
+ TO_BLOCK_IT it(blocks);
+ int block_index = 1;
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ TO_BLOCK* to_block = it.data();
+ BLOCK* block = to_block->block;
+ // Blocks are created on the deskewed blob outlines in TransformToBlocks()
+ // so we need to reskew them back to page coordinates.
+ if (input_is_rtl) {
+ block->reflect_polygon_in_y_axis();
+ }
+ block->rotate(reskew_);
+ // Copy the right_to_left flag to the created block.
+ block->set_right_to_left(input_is_rtl);
+ // Save the skew angle in the block for baseline computations.
+ block->set_skew(reskew_);
+ block->pdblk.set_index(block_index++);
+ FCOORD blob_rotation = ComputeBlockAndClassifyRotation(block);
+ // Rotate all the blobs if needed and recompute the bounding boxes.
+ // Compute the block median blob width and height as we go.
+ STATS widths(0, block->pdblk.bounding_box().width());
+ STATS heights(0, block->pdblk.bounding_box().height());
+ RotateAndExplodeBlobList(blob_rotation, &to_block->blobs,
+ &widths, &heights);
+ TO_ROW_IT row_it(to_block->get_rows());
+ for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+ TO_ROW* row = row_it.data();
+ RotateAndExplodeBlobList(blob_rotation, row->blob_list(),
+ &widths, &heights);
+ }
+ block->set_median_size(static_cast<int>(widths.median() + 0.5),
+ static_cast<int>(heights.median() + 0.5));
+ if (textord_debug_tabfind >= 2)
+ tprintf("Block median size = (%d, %d)\n",
+ block->median_size().x(), block->median_size().y());
+ }
+}
+
+// Computes the rotations for the block (to make textlines horizontal) and
+// for the blobs (for classification) and sets the appropriate members
+// of the given block.
+// Returns the rotation that needs to be applied to the blobs to make
+// them sit in the rotated block.
+FCOORD ColumnFinder::ComputeBlockAndClassifyRotation(BLOCK* block) {
+ // The text_rotation_ tells us the gross page text rotation that needs
+ // to be applied for classification
+ // TODO(rays) find block-level classify rotation by orientation detection.
+ // In the mean time, assume that "up" for text printed in the minority
+ // direction (PT_VERTICAL_TEXT) is perpendicular to the line of reading.
+ // Accomplish this by zero-ing out the text rotation. This covers the
+ // common cases of image credits in documents written in Latin scripts
+ // and page headings for predominantly vertically written CJK books.
+ FCOORD classify_rotation(text_rotation_);
+ FCOORD block_rotation(1.0f, 0.0f);
+ if (block->pdblk.poly_block()->isA() == PT_VERTICAL_TEXT) {
+ // Vertical text needs to be 90 degrees rotated relative to the rest.
+ // If the rest has a 90 degree rotation already, use the inverse, making
+ // the vertical text the original way up. Otherwise use 90 degrees
+ // clockwise.
+ if (rerotate_.x() == 0.0f)
+ block_rotation = rerotate_;
+ else
+ block_rotation = FCOORD(0.0f, -1.0f);
+ block->rotate(block_rotation);
+ classify_rotation = FCOORD(1.0f, 0.0f);
+ }
+ block_rotation.rotate(rotation_);
+ // block_rotation is now what we have done to the blocks. Now do the same
+ // thing to the blobs, but save the inverse rotation in the block, as that
+ // is what we need to DENORM back to the image coordinates.
+ FCOORD blob_rotation(block_rotation);
+ block_rotation.set_y(-block_rotation.y());
+ block->set_re_rotation(block_rotation);
+ block->set_classify_rotation(classify_rotation);
+ if (textord_debug_tabfind) {
+ tprintf("Blk %d, type %d rerotation(%.2f, %.2f), char(%.2f,%.2f), box:",
+ block->pdblk.index(), block->pdblk.poly_block()->isA(),
+ block->re_rotation().x(), block->re_rotation().y(),
+ classify_rotation.x(), classify_rotation.y());
+ block->pdblk.bounding_box().print();
+ }
+ return blob_rotation;
+}
+
+} // namespace tesseract.
diff --git a/tesseract/src/textord/colfind.h b/tesseract/src/textord/colfind.h
new file mode 100644
index 00000000..b7d5b672
--- /dev/null
+++ b/tesseract/src/textord/colfind.h
@@ -0,0 +1,366 @@
+///////////////////////////////////////////////////////////////////////
+// File: colfind.h
+// Description: Class to find columns in the grid of BLOBNBOXes.
+// Author: Ray Smith
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TEXTORD_COLFIND_H_
+#define TESSERACT_TEXTORD_COLFIND_H_
+
+#include "colpartitiongrid.h"
+#include "colpartitionset.h"
+#include "debugpixa.h"
+#include "imagefind.h"
+#include "ocrblock.h"
+#include "tabfind.h"
+#include "textlineprojection.h"
+
+class BLOCK_LIST;
+struct Boxa;
+struct Pixa;
+class DENORM;
+class ScrollView;
+class STATS;
+class TO_BLOCK;
+
+namespace tesseract {
+
+class ColPartitionSet;
+class ColPartitionSet_LIST;
+class ColSegment_LIST;
+class ColumnGroup_LIST;
+class LineSpacing;
+class StrokeWidth;
+class TempColumn_LIST;
+class EquationDetectBase;
+
+// The ColumnFinder class finds columns in the grid.
+class TESS_API ColumnFinder : public TabFind {
+ public:
+ // Gridsize is an estimate of the text size in the image. A suitable value
+ // is in TO_BLOCK::line_size after find_components has been used to make
+ // the blobs.
+ // bleft and tright are the bounds of the image (rectangle) being processed.
+ // vlines is a (possibly empty) list of TabVector and vertical_x and y are
+ // the sum logical vertical vector produced by LineFinder::FindVerticalLines.
+ // If cjk_script is true, then broken CJK characters are fixed during
+ // layout analysis to assist in detecting horizontal vs vertically written
+ // textlines.
+ ColumnFinder(int gridsize, const ICOORD& bleft, const ICOORD& tright,
+ int resolution, bool cjk_script, double aligned_gap_fraction,
+ TabVector_LIST* vlines, TabVector_LIST* hlines,
+ int vertical_x, int vertical_y);
+ ~ColumnFinder() override;
+
+ // Accessors for testing
+ const DENORM* denorm() const {
+ return denorm_;
+ }
+ const TextlineProjection* projection() const {
+ return &projection_;
+ }
+ void set_cjk_script(bool is_cjk) {
+ cjk_script_ = is_cjk;
+ }
+
+ // ======================================================================
+ // The main function of ColumnFinder is broken into pieces to facilitate
+ // optional insertion of orientation and script detection in an efficient
+ // way. The calling sequence IS MANDATORY however, whether or not
+ // OSD is being used:
+ // 1. Construction.
+ // 2. SetupAndFilterNoise.
+ // 3. IsVerticallyAlignedText.
+ // 4. CorrectOrientation.
+ // 5. FindBlocks.
+ // 6. Destruction. Use of a single column finder for multiple images does not
+ // make sense.
+ // Throughout these steps, the ColPartitions are owned by part_grid_, which
+ // means that that it must be kept correct. Exception: big_parts_ owns its
+ // own ColPartitions.
+ // The BLOBNBOXes are owned by the input TO_BLOCK for the whole time, except
+ // for a phase in FindBlocks before TransformToBlocks, when they become
+ // owned by the ColPartitions. The owner() ColPartition of a BLOBNBOX
+ // indicates more of a betrothal for the majority of layout analysis, ie
+ // which ColPartition will take ownership when the blobs are release from
+ // the input TO_BLOCK. Exception: image_bblobs_ owns the fake blobs that
+ // are part of the image regions, as they are not on any TO_BLOCK list.
+ // TODO(rays) break up column finder further into smaller classes, as
+ // there is a lot more to it than column finding now.
+ // ======================================================================
+
+ // Performs initial processing on the blobs in the input_block:
+ // Setup the part_grid, stroke_width_, nontext_map_.
+ // Obvious noise blobs are filtered out and used to mark the nontext_map_.
+ // Initial stroke-width analysis is used to get local text alignment
+ // direction, so the textline projection_ map can be setup.
+ // On return, IsVerticallyAlignedText may be called (now optionally) to
+ // determine the gross textline alignment of the page.
+ void SetupAndFilterNoise(PageSegMode pageseg_mode, Pix* photo_mask_pix,
+ TO_BLOCK* input_block);
+
+ // Tests for vertical alignment of text (returning true if so), and generates
+ // a list of blobs (in osd_blobs) for orientation and script detection.
+ // block is the single block for the whole page or rectangle to be OCRed.
+ // Note that the vertical alignment may be due to text whose writing direction
+ // is vertical, like say Japanese, or due to text whose writing direction is
+ // horizontal but whose text appears vertically aligned because the image is
+ // not the right way up.
+ // find_vertical_text_ratio should be textord_tabfind_vertical_text_ratio.
+ bool IsVerticallyAlignedText(double find_vertical_text_ratio,
+ TO_BLOCK* block, BLOBNBOX_CLIST* osd_blobs);
+
+ // Rotates the blobs and the TabVectors so that the gross writing direction
+ // (text lines) are horizontal and lines are read down the page.
+ // Applied rotation stored in rotation_.
+ // A second rotation is calculated for application during recognition to
+ // make the rotated blobs upright for recognition.
+ // Subsequent rotation stored in text_rotation_.
+ //
+ // Arguments:
+ // vertical_text_lines is true if the text lines are vertical.
+ // recognition_rotation [0..3] is the number of anti-clockwise 90 degree
+ // rotations from osd required for the text to be upright and readable.
+ void CorrectOrientation(TO_BLOCK* block, bool vertical_text_lines,
+ int recognition_rotation);
+
+ // Finds blocks of text, image, rule line, table etc, returning them in the
+ // blocks and to_blocks
+ // (Each TO_BLOCK points to the basic BLOCK and adds more information.)
+ // Image blocks are generated by a combination of photo_mask_pix (which may
+ // NOT be nullptr) and the rejected text found during preliminary textline
+ // finding.
+ // The input_block is the result of a call to find_components, and contains
+ // the blobs found in the image or rectangle to be OCRed. These blobs will be
+ // removed and placed in the output blocks, while unused ones will be deleted.
+ // If single_column is true, the input is treated as single column, but
+ // it is still divided into blocks of equal line spacing/text size.
+ // scaled_color is scaled down by scaled_factor from the input color image,
+ // and may be nullptr if the input was not color.
+ // grey_pix is optional, but if present must match the photo_mask_pix in size,
+ // and must be a *real* grey image instead of binary_pix * 255.
+ // thresholds_pix is expected to be present iff grey_pix is present and
+ // can be an integer factor reduction of the grey_pix. It represents the
+ // thresholds that were used to create the binary_pix from the grey_pix.
+ // Small blobs that confuse the segmentation into lines are placed into
+ // diacritic_blobs, with the intention that they be put into the most
+ // appropriate word after the rest of layout analysis.
+ // Returns -1 if the user hits the 'd' key in the blocks window while running
+ // in debug mode, which requests a retry with more debug info.
+ int FindBlocks(PageSegMode pageseg_mode, Pix* scaled_color, int scaled_factor,
+ TO_BLOCK* block, Pix* photo_mask_pix, Pix* thresholds_pix,
+ Pix* grey_pix, DebugPixa* pixa_debug, BLOCK_LIST* blocks,
+ BLOBNBOX_LIST* diacritic_blobs, TO_BLOCK_LIST* to_blocks);
+
+ // Get the rotation required to deskew, and its inverse rotation.
+ void GetDeskewVectors(FCOORD* deskew, FCOORD* reskew);
+
+ // Set the equation detection pointer.
+ void SetEquationDetect(EquationDetectBase* detect);
+
+ private:
+ // Displays the blob and block bounding boxes in a window called Blocks.
+ void DisplayBlocks(BLOCK_LIST* blocks);
+ // Displays the column edges at each grid y coordinate defined by
+ // best_columns_.
+ void DisplayColumnBounds(PartSetVector* sets);
+
+ ////// Functions involved in determining the columns used on the page. /////
+
+ // Sets up column_sets_ (the determined column layout at each horizontal
+ // slice). Returns false if the page is empty.
+ bool MakeColumns(bool single_column);
+ // Attempt to improve the column_candidates by expanding the columns
+ // and adding new partitions from the partition sets in src_sets.
+ // Src_sets may be equal to column_candidates, in which case it will
+ // use them as a source to improve themselves.
+ void ImproveColumnCandidates(PartSetVector* src_sets,
+ PartSetVector* column_sets);
+ // Prints debug information on the column candidates.
+ void PrintColumnCandidates(const char* title);
+ // Finds the optimal set of columns that cover the entire image with as
+ // few changes in column partition as possible.
+ // Returns true if any part of the page is multi-column.
+ bool AssignColumns(const PartSetVector& part_sets);
+ // Finds the biggest range in part_sets_ that has no assigned column, but
+ // column assignment is possible.
+ bool BiggestUnassignedRange(int set_count, const bool* any_columns_possible,
+ int* start, int* end);
+ // Finds the modal compatible column_set_ index within the given range.
+ int RangeModalColumnSet(int** column_set_costs, const int* assigned_costs,
+ int start, int end);
+ // Given that there are many column_set_id compatible columns in the range,
+ // shrinks the range to the longest contiguous run of compatibility, allowing
+ // gaps where no columns are possible, but not where competing columns are
+ // possible.
+ void ShrinkRangeToLongestRun(int** column_set_costs,
+ const int* assigned_costs,
+ const bool* any_columns_possible,
+ int column_set_id,
+ int* best_start, int* best_end);
+ // Moves start in the direction of step, up to, but not including end while
+ // the only incompatible regions are no more than kMaxIncompatibleColumnCount
+ // in size, and the compatible regions beyond are bigger.
+ void ExtendRangePastSmallGaps(int** column_set_costs,
+ const int* assigned_costs,
+ const bool* any_columns_possible,
+ int column_set_id,
+ int step, int end, int* start);
+ // Assigns the given column_set_id to the part_sets_ in the given range.
+ void AssignColumnToRange(int column_set_id, int start, int end,
+ int** column_set_costs, int* assigned_costs);
+
+ // Computes the mean_column_gap_.
+ void ComputeMeanColumnGap(bool any_multi_column);
+
+ //////// Functions that manipulate ColPartitions in the part_grid_ /////
+ //////// to split, merge, find margins, and find types. //////////////
+
+ // Hoovers up all un-owned blobs and deletes them.
+ // The rest get released from the block so the ColPartitions can pass
+ // ownership to the output blocks.
+ void ReleaseBlobsAndCleanupUnused(TO_BLOCK* block);
+ // Splits partitions that cross columns where they have nothing in the gap.
+ void GridSplitPartitions();
+ // Merges partitions where there is vertical overlap, within a single column,
+ // and the horizontal gap is small enough.
+ void GridMergePartitions();
+ // Inserts remaining noise blobs into the most applicable partition if any.
+ // If there is no applicable partition, then the blobs are deleted.
+ void InsertRemainingNoise(TO_BLOCK* block);
+ // Remove partitions that come from horizontal lines that look like
+ // underlines, but are not part of a table.
+ void GridRemoveUnderlinePartitions();
+ // Add horizontal line separators as partitions.
+ void GridInsertHLinePartitions();
+ // Add vertical line separators as partitions.
+ void GridInsertVLinePartitions();
+ // For every ColPartition in the grid, sets its type based on position
+ // in the columns.
+ void SetPartitionTypes();
+ // Only images remain with multiple types in a run of partners.
+ // Sets the type of all in the group to the maximum of the group.
+ void SmoothPartnerRuns();
+
+ //////// Functions that make the final output blocks ///////
+
+ // Helper functions for TransformToBlocks.
+ // Add the part to the temp list in the correct order.
+ void AddToTempPartList(ColPartition* part, ColPartition_CLIST* temp_list);
+ // Add everything from the temp list to the work_set assuming correct order.
+ void EmptyTempPartList(ColPartition_CLIST* temp_list,
+ WorkingPartSet_LIST* work_set);
+
+ // Transform the grid of partitions to the output blocks.
+ void TransformToBlocks(BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks);
+
+ // Reflect the blob boxes (but not the outlines) in the y-axis so that
+ // the blocks get created in the correct RTL order. Rotates the blobs
+ // in the input_block and the bblobs list.
+ // The reflection is undone in RotateAndReskewBlocks by
+ // reflecting the blocks themselves, and then recomputing the blob bounding
+ // boxes.
+ void ReflectForRtl(TO_BLOCK* input_block, BLOBNBOX_LIST* bblobs);
+
+ // Undo the deskew that was done in FindTabVectors, as recognition is done
+ // without correcting blobs or blob outlines for skew.
+ // Reskew the completed blocks to put them back to the original rotated coords
+ // that were created by CorrectOrientation.
+ // If the input_is_rtl, then reflect the blocks in the y-axis to undo the
+ // reflection that was done before FindTabVectors.
+ // Blocks that were identified as vertical text (relative to the rotated
+ // coordinates) are further rotated so the text lines are horizontal.
+ // blob polygonal outlines are rotated to match the position of the blocks
+ // that they are in, and their bounding boxes are recalculated to be accurate.
+ // Record appropriate inverse transformations and required
+ // classifier transformation in the blocks.
+ void RotateAndReskewBlocks(bool input_is_rtl, TO_BLOCK_LIST* to_blocks);
+
+ // Computes the rotations for the block (to make textlines horizontal) and
+ // for the blobs (for classification) and sets the appropriate members
+ // of the given block.
+ // Returns the rotation that needs to be applied to the blobs to make
+ // them sit in the rotated block.
+ FCOORD ComputeBlockAndClassifyRotation(BLOCK* block);
+
+ // If true then the page language is cjk, so it is safe to perform
+ // FixBrokenCJK.
+ bool cjk_script_;
+ // The minimum gutter width to apply for finding columns.
+ // Modified when vertical text is detected to prevent detection of
+ // vertical text lines as columns.
+ int min_gutter_width_;
+ // The mean gap between columns over the page.
+ int mean_column_gap_;
+ // Config param saved at construction time. Modifies min_gutter_width_ with
+ // vertical text to prevent detection of vertical text as columns.
+ double tabfind_aligned_gap_fraction_;
+ // The rotation vector needed to convert original coords to deskewed.
+ FCOORD deskew_;
+ // The rotation vector needed to convert deskewed back to original coords.
+ FCOORD reskew_;
+ // The rotation vector used to rotate vertically oriented pages.
+ FCOORD rotation_;
+ // The rotation vector needed to convert the rotated back to original coords.
+ FCOORD rerotate_;
+ // The additional rotation vector needed to rotate text for recognition.
+ FCOORD text_rotation_;
+ // The column_sets_ contain the ordered candidate ColPartitionSets that
+ // define the possible divisions of the page into columns.
+ PartSetVector column_sets_;
+ // A simple array of pointers to the best assigned column division at
+ // each grid y coordinate.
+ ColPartitionSet** best_columns_;
+ // The grid used for creating initial partitions with strokewidth.
+ StrokeWidth* stroke_width_;
+ // The grid used to hold ColPartitions after the columns have been determined.
+ ColPartitionGrid part_grid_;
+ // List of ColPartitions that are no longer needed after they have been
+ // turned into regions, but are kept around because they are referenced
+ // by the part_grid_.
+ ColPartition_LIST good_parts_;
+ // List of ColPartitions that are big and might be dropcap or vertically
+ // joined.
+ ColPartition_LIST big_parts_;
+ // List of ColPartitions that have been declared noise.
+ ColPartition_LIST noise_parts_;
+ // The fake blobs that are made from the images.
+ BLOBNBOX_LIST image_bblobs_;
+ // Horizontal line separators.
+ TabVector_LIST horizontal_lines_;
+ // Image map of photo/noise areas on the page.
+ Pix* nontext_map_;
+ // Textline projection map.
+ TextlineProjection projection_;
+ // Sequence of DENORMS that indicate how to get back to the original image
+ // coordinate space. The destructor must delete all the DENORMs in the chain.
+ DENORM* denorm_;
+
+ // Various debug windows that automatically go away on completion.
+ ScrollView* input_blobs_win_;
+
+ // The equation region detector pointer. Note: This pointer is passed in by
+ // member function SetEquationDetect, and releasing it is NOT owned by this
+ // class.
+ EquationDetectBase* equation_detect_;
+
+ // Allow a subsequent instance to reuse the blocks window.
+ // Not thread-safe, but multiple threads shouldn't be using windows anyway.
+ static ScrollView* blocks_win_;
+};
+
+} // namespace tesseract.
+
+#endif // TESSERACT_TEXTORD_COLFIND_H_
diff --git a/tesseract/src/textord/colpartition.cpp b/tesseract/src/textord/colpartition.cpp
new file mode 100644
index 00000000..6dcdda74
--- /dev/null
+++ b/tesseract/src/textord/colpartition.cpp
@@ -0,0 +1,2597 @@
+///////////////////////////////////////////////////////////////////////
+// File: colpartition.cpp
+// Description: Class to hold partitions of the page that correspond
+// roughly to text lines.
+// Author: Ray Smith
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "colpartition.h"
+#include "colpartitiongrid.h"
+#include "colpartitionset.h"
+#include "detlinefit.h"
+#include "dppoint.h"
+#include "imagefind.h"
+#include "workingpartset.h"
+#include "host.h" // for NearlyEqual
+
+#include <algorithm>
+
+namespace tesseract {
+
+ELIST2IZE(ColPartition)
+CLISTIZE(ColPartition)
+
+//////////////// ColPartition Implementation ////////////////
+
+// enum to refer to the entries in a neighbourhood of lines.
+// Used by SmoothSpacings to test for blips with OKSpacingBlip.
+enum SpacingNeighbourhood {
+ PN_ABOVE2,
+ PN_ABOVE1,
+ PN_UPPER,
+ PN_LOWER,
+ PN_BELOW1,
+ PN_BELOW2,
+ PN_COUNT
+};
+
+// Maximum change in spacing (in inches) to ignore.
+const double kMaxSpacingDrift = 1.0 / 72; // 1/72 is one point.
+// Maximum fraction of line height used as an additional allowance
+// for top spacing.
+const double kMaxTopSpacingFraction = 0.25;
+// What multiple of the largest line height should be used as an upper bound
+// for whether lines are in the same text block?
+const double kMaxSameBlockLineSpacing = 3;
+// Maximum ratio of sizes for lines to be considered the same size.
+const double kMaxSizeRatio = 1.5;
+// Fraction of max of leader width and gap for max IQR of gaps.
+const double kMaxLeaderGapFractionOfMax = 0.25;
+// Fraction of min of leader width and gap for max IQR of gaps.
+const double kMaxLeaderGapFractionOfMin = 0.5;
+// Minimum number of blobs to be considered a leader.
+const int kMinLeaderCount = 5;
+// Minimum score for a STRONG_CHAIN textline.
+const int kMinStrongTextValue = 6;
+// Minimum score for a CHAIN textline.
+const int kMinChainTextValue = 3;
+// Minimum number of blobs for strong horizontal text lines.
+const int kHorzStrongTextlineCount = 8;
+// Minimum height (in image pixels) for strong horizontal text lines.
+const int kHorzStrongTextlineHeight = 10;
+// Minimum aspect ratio for strong horizontal text lines.
+const int kHorzStrongTextlineAspect = 5;
+// Maximum upper quartile error allowed on a baseline fit as a fraction
+// of height.
+const double kMaxBaselineError = 0.4375;
+// Min coverage for a good baseline between vectors
+const double kMinBaselineCoverage = 0.5;
+// Max RMS color noise to compare colors.
+const int kMaxRMSColorNoise = 128;
+// Maximum distance to allow a partition color to be to use that partition
+// in smoothing neighbouring types. This is a squared distance.
+const int kMaxColorDistance = 900;
+
+// blob_type is the blob_region_type_ of the blobs in this partition.
+// Vertical is the direction of logical vertical on the possibly skewed image.
+ColPartition::ColPartition(BlobRegionType blob_type, const ICOORD& vertical)
+ : left_margin_(-INT32_MAX), right_margin_(INT32_MAX),
+ median_bottom_(INT32_MAX), median_top_(-INT32_MAX),
+ median_left_(INT32_MAX), median_right_(-INT32_MAX),
+ blob_type_(blob_type),
+ vertical_(vertical) {
+ memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
+}
+
+// Constructs a fake ColPartition with a single fake BLOBNBOX, all made
+// from a single TBOX.
+// WARNING: Despite being on C_LISTs, the BLOBNBOX owns the C_BLOB and
+// the ColPartition owns the BLOBNBOX!!!
+// Call DeleteBoxes before deleting the ColPartition.
+ColPartition* ColPartition::FakePartition(const TBOX& box,
+ PolyBlockType block_type,
+ BlobRegionType blob_type,
+ BlobTextFlowType flow) {
+ ColPartition* part = new ColPartition(blob_type, ICOORD(0, 1));
+ part->set_type(block_type);
+ part->set_flow(flow);
+ part->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(box)));
+ part->set_left_margin(box.left());
+ part->set_right_margin(box.right());
+ part->SetBlobTypes();
+ part->ComputeLimits();
+ part->ClaimBoxes();
+ return part;
+}
+
+// Constructs and returns a ColPartition with the given real BLOBNBOX,
+// and sets it up to be a "big" partition (single-blob partition bigger
+// than the surrounding text that may be a dropcap, two or more vertically
+// touching characters, or some graphic element.
+// If the given list is not nullptr, the partition is also added to the list.
+ColPartition* ColPartition::MakeBigPartition(BLOBNBOX* box,
+ ColPartition_LIST* big_part_list) {
+ box->set_owner(nullptr);
+ ColPartition* single = new ColPartition(BRT_UNKNOWN, ICOORD(0, 1));
+ single->set_flow(BTFT_NONE);
+ single->AddBox(box);
+ single->ComputeLimits();
+ single->ClaimBoxes();
+ single->SetBlobTypes();
+ single->set_block_owned(true);
+ if (big_part_list != nullptr) {
+ ColPartition_IT part_it(big_part_list);
+ part_it.add_to_end(single);
+ }
+ return single;
+}
+
+ColPartition::~ColPartition() {
+ // Remove this as a partner of all partners, as we don't want them
+ // referring to a deleted object.
+ ColPartition_C_IT it(&upper_partners_);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ it.data()->RemovePartner(false, this);
+ }
+ it.set_to_list(&lower_partners_);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ it.data()->RemovePartner(true, this);
+ }
+}
+
+// Constructs a fake ColPartition with no BLOBNBOXes to represent a
+// horizontal or vertical line, given a type and a bounding box.
+ColPartition* ColPartition::MakeLinePartition(BlobRegionType blob_type,
+ const ICOORD& vertical,
+ int left, int bottom,
+ int right, int top) {
+ auto* part = new ColPartition(blob_type, vertical);
+ part->bounding_box_ = TBOX(left, bottom, right, top);
+ part->median_bottom_ = bottom;
+ part->median_top_ = top;
+ part->median_height_ = top - bottom;
+ part->median_left_ = left;
+ part->median_right_ = right;
+ part->median_width_ = right - left;
+ part->left_key_ = part->BoxLeftKey();
+ part->right_key_ = part->BoxRightKey();
+ return part;
+}
+
+
+// Adds the given box to the partition, updating the partition bounds.
+// The list of boxes in the partition is updated, ensuring that no box is
+// recorded twice, and the boxes are kept in increasing left position.
+void ColPartition::AddBox(BLOBNBOX* bbox) {
+ TBOX box = bbox->bounding_box();
+ // Update the partition limits.
+ if (boxes_.length() == 0) {
+ bounding_box_ = box;
+ } else {
+ bounding_box_ += box;
+ }
+
+ if (IsVerticalType()) {
+ if (!last_add_was_vertical_) {
+ boxes_.sort(SortByBoxBottom<BLOBNBOX>);
+ last_add_was_vertical_ = true;
+ }
+ boxes_.add_sorted(SortByBoxBottom<BLOBNBOX>, true, bbox);
+ } else {
+ if (last_add_was_vertical_) {
+ boxes_.sort(SortByBoxLeft<BLOBNBOX>);
+ last_add_was_vertical_ = false;
+ }
+ boxes_.add_sorted(SortByBoxLeft<BLOBNBOX>, true, bbox);
+ }
+ if (!left_key_tab_)
+ left_key_ = BoxLeftKey();
+ if (!right_key_tab_)
+ right_key_ = BoxRightKey();
+ if (TabFind::WithinTestRegion(2, box.left(), box.bottom()))
+ tprintf("Added box (%d,%d)->(%d,%d) left_blob_x_=%d, right_blob_x_ = %d\n",
+ box.left(), box.bottom(), box.right(), box.top(),
+ bounding_box_.left(), bounding_box_.right());
+}
+
+// Removes the given box from the partition, updating the bounds.
+void ColPartition::RemoveBox(BLOBNBOX* box) {
+ BLOBNBOX_C_IT bb_it(&boxes_);
+ for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
+ if (box == bb_it.data()) {
+ bb_it.extract();
+ ComputeLimits();
+ return;
+ }
+ }
+}
+
+// Returns the tallest box in the partition, as measured perpendicular to the
+// presumed flow of text.
+BLOBNBOX* ColPartition::BiggestBox() {
+ BLOBNBOX* biggest = nullptr;
+ BLOBNBOX_C_IT bb_it(&boxes_);
+ for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
+ BLOBNBOX* bbox = bb_it.data();
+ if (IsVerticalType()) {
+ if (biggest == nullptr ||
+ bbox->bounding_box().width() > biggest->bounding_box().width())
+ biggest = bbox;
+ } else {
+ if (biggest == nullptr ||
+ bbox->bounding_box().height() > biggest->bounding_box().height())
+ biggest = bbox;
+ }
+ }
+ return biggest;
+}
+
+// Returns the bounding box excluding the given box.
+TBOX ColPartition::BoundsWithoutBox(BLOBNBOX* box) {
+ TBOX result;
+ BLOBNBOX_C_IT bb_it(&boxes_);
+ for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
+ if (box != bb_it.data()) {
+ result += bb_it.data()->bounding_box();
+ }
+ }
+ return result;
+}
+
+// Claims the boxes in the boxes_list by marking them with a this owner
+// pointer. If a box is already owned, then it must be owned by this.
+void ColPartition::ClaimBoxes() {
+ BLOBNBOX_C_IT bb_it(&boxes_);
+ for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
+ BLOBNBOX* bblob = bb_it.data();
+ ColPartition* other = bblob->owner();
+ if (other == nullptr) {
+ // Normal case: ownership is available.
+ bblob->set_owner(this);
+ } else {
+ ASSERT_HOST(other == this);
+ }
+ }
+}
+
+// nullptr the owner of the blobs in this partition, so they can be deleted
+// independently of the ColPartition.
+void ColPartition::DisownBoxes() {
+ BLOBNBOX_C_IT bb_it(&boxes_);
+ for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
+ BLOBNBOX* bblob = bb_it.data();
+ ASSERT_HOST(bblob->owner() == this || bblob->owner() == nullptr);
+ bblob->set_owner(nullptr);
+ }
+}
+
+// nullptr the owner of the blobs in this partition that are owned by this
+// partition, so they can be deleted independently of the ColPartition.
+// Any blobs that are not owned by this partition get to keep their owner
+// without an assert failure.
+void ColPartition::DisownBoxesNoAssert() {
+ BLOBNBOX_C_IT bb_it(&boxes_);
+ for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
+ BLOBNBOX* bblob = bb_it.data();
+ if (bblob->owner() == this)
+ bblob->set_owner(nullptr);
+ }
+}
+
+// Nulls the owner of the blobs in this partition that are owned by this
+// partition and not leader blobs, removing them from the boxes_ list, thus
+// turning this partition back to a leader partition if it contains a leader,
+// or otherwise leaving it empty. Returns true if any boxes remain.
+bool ColPartition::ReleaseNonLeaderBoxes() {
+ BLOBNBOX_C_IT bb_it(&boxes_);
+ for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
+ BLOBNBOX* bblob = bb_it.data();
+ if (bblob->flow() != BTFT_LEADER) {
+ if (bblob->owner() == this) bblob->set_owner(nullptr);
+ bb_it.extract();
+ }
+ }
+ if (bb_it.empty()) return false;
+ flow_ = BTFT_LEADER;
+ ComputeLimits();
+ return true;
+}
+
+// Delete the boxes that this partition owns.
+void ColPartition::DeleteBoxes() {
+ // Although the boxes_ list is a C_LIST, in some cases it owns the
+ // BLOBNBOXes, as the ColPartition takes ownership from the grid,
+ // and the BLOBNBOXes own the underlying C_BLOBs.
+ for (BLOBNBOX_C_IT bb_it(&boxes_); !bb_it.empty(); bb_it.forward()) {
+ BLOBNBOX* bblob = bb_it.extract();
+ delete bblob->cblob();
+ delete bblob;
+ }
+}
+
+// Reflects the partition in the y-axis, assuming that its blobs have
+// already been done. Corrects only a limited part of the members, since
+// this function is assumed to be used shortly after initial creation, which
+// is before a lot of the members are used.
+void ColPartition::ReflectInYAxis() {
+ BLOBNBOX_CLIST reversed_boxes;
+ BLOBNBOX_C_IT reversed_it(&reversed_boxes);
+ // Reverse the order of the boxes_.
+ BLOBNBOX_C_IT bb_it(&boxes_);
+ for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
+ reversed_it.add_before_then_move(bb_it.extract());
+ }
+ bb_it.add_list_after(&reversed_boxes);
+ ASSERT_HOST(!left_key_tab_ && !right_key_tab_);
+ int tmp = left_margin_;
+ left_margin_ = -right_margin_;
+ right_margin_ = -tmp;
+ ComputeLimits();
+}
+
+// Returns true if this is a legal partition - meaning that the conditions
+// left_margin <= bounding_box left
+// left_key <= bounding box left key
+// bounding box left <= bounding box right
+// and likewise for right margin and key
+// are all met.
+bool ColPartition::IsLegal() {
+ if (bounding_box_.left() > bounding_box_.right()) {
+ if (textord_debug_bugs) {
+ tprintf("Bounding box invalid\n");
+ Print();
+ }
+ return false; // Bounding box invalid.
+ }
+ if (left_margin_ > bounding_box_.left() ||
+ right_margin_ < bounding_box_.right()) {
+ if (textord_debug_bugs) {
+ tprintf("Margins invalid\n");
+ Print();
+ }
+ return false; // Margins invalid.
+ }
+ if (left_key_ > BoxLeftKey() || right_key_ < BoxRightKey()) {
+ if (textord_debug_bugs) {
+ tprintf("Key inside box: %d v %d or %d v %d\n",
+ left_key_, BoxLeftKey(), right_key_, BoxRightKey());
+ Print();
+ }
+ return false; // Keys inside the box.
+ }
+ return true;
+}
+
+// Returns true if the left and right edges are approximately equal.
+bool ColPartition::MatchingColumns(const ColPartition& other) const {
+ int y = (MidY() + other.MidY()) / 2;
+ if (!NearlyEqual(other.LeftAtY(y) / kColumnWidthFactor,
+ LeftAtY(y) / kColumnWidthFactor, 1))
+ return false;
+ if (!NearlyEqual(other.RightAtY(y) / kColumnWidthFactor,
+ RightAtY(y) / kColumnWidthFactor, 1))
+ return false;
+ return true;
+}
+
+// Returns true if the colors match for two text partitions.
+bool ColPartition::MatchingTextColor(const ColPartition& other) const {
+ if (color1_[L_ALPHA_CHANNEL] > kMaxRMSColorNoise &&
+ other.color1_[L_ALPHA_CHANNEL] > kMaxRMSColorNoise)
+ return false; // Too noisy.
+
+ // Colors must match for other to count.
+ double d_this1_o = ImageFind::ColorDistanceFromLine(other.color1_,
+ other.color2_,
+ color1_);
+ double d_this2_o = ImageFind::ColorDistanceFromLine(other.color1_,
+ other.color2_,
+ color2_);
+ double d_o1_this = ImageFind::ColorDistanceFromLine(color1_, color2_,
+ other.color1_);
+ double d_o2_this = ImageFind::ColorDistanceFromLine(color1_, color2_,
+ other.color2_);
+// All 4 distances must be small enough.
+ return d_this1_o < kMaxColorDistance && d_this2_o < kMaxColorDistance &&
+ d_o1_this < kMaxColorDistance && d_o2_this < kMaxColorDistance;
+}
+
+// Returns true if the sizes match for two text partitions,
+// taking orientation into account. See also SizesSimilar.
+bool ColPartition::MatchingSizes(const ColPartition& other) const {
+ if (blob_type_ == BRT_VERT_TEXT || other.blob_type_ == BRT_VERT_TEXT)
+ return !TabFind::DifferentSizes(median_width_, other.median_width_);
+ else
+ return !TabFind::DifferentSizes(median_height_, other.median_height_);
+}
+
+// Returns true if there is no tabstop violation in merging this and other.
+bool ColPartition::ConfirmNoTabViolation(const ColPartition& other) const {
+ if (bounding_box_.right() < other.bounding_box_.left() &&
+ bounding_box_.right() < other.LeftBlobRule())
+ return false;
+ if (other.bounding_box_.right() < bounding_box_.left() &&
+ other.bounding_box_.right() < LeftBlobRule())
+ return false;
+ if (bounding_box_.left() > other.bounding_box_.right() &&
+ bounding_box_.left() > other.RightBlobRule())
+ return false;
+ if (other.bounding_box_.left() > bounding_box_.right() &&
+ other.bounding_box_.left() > RightBlobRule())
+ return false;
+ return true;
+}
+
+// Returns true if other has a similar stroke width to this.
+bool ColPartition::MatchingStrokeWidth(const ColPartition& other,
+ double fractional_tolerance,
+ double constant_tolerance) const {
+ int match_count = 0;
+ int nonmatch_count = 0;
+ BLOBNBOX_C_IT box_it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
+ BLOBNBOX_C_IT other_it(const_cast<BLOBNBOX_CLIST*>(&other.boxes_));
+ box_it.mark_cycle_pt();
+ other_it.mark_cycle_pt();
+ while (!box_it.cycled_list() && !other_it.cycled_list()) {
+ if (box_it.data()->MatchingStrokeWidth(*other_it.data(),
+ fractional_tolerance,
+ constant_tolerance))
+ ++match_count;
+ else
+ ++nonmatch_count;
+ box_it.forward();
+ other_it.forward();
+ }
+ return match_count > nonmatch_count;
+}
+
+// Returns true if base is an acceptable diacritic base char merge
+// with this as the diacritic.
+// Returns true if:
+// (1) this is a ColPartition containing only diacritics, and
+// (2) the base characters indicated on the diacritics all believably lie
+// within the text line of the candidate ColPartition.
+bool ColPartition::OKDiacriticMerge(const ColPartition& candidate,
+ bool debug) const {
+ BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
+ int min_top = INT32_MAX;
+ int max_bottom = -INT32_MAX;
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ BLOBNBOX* blob = it.data();
+ if (!blob->IsDiacritic()) {
+ if (debug) {
+ tprintf("Blob is not a diacritic:");
+ blob->bounding_box().print();
+ }
+ return false; // All blobs must have diacritic bases.
+ }
+ if (blob->base_char_top() < min_top)
+ min_top = blob->base_char_top();
+ if (blob->base_char_bottom() > max_bottom)
+ max_bottom = blob->base_char_bottom();
+ }
+ // If the intersection of all vertical ranges of all base characters
+ // overlaps the median range of this, then it is OK.
+ bool result = min_top > candidate.median_bottom_ &&
+ max_bottom < candidate.median_top_;
+ if (debug) {
+ if (result)
+ tprintf("OKDiacritic!\n");
+ else
+ tprintf("y ranges don\'t overlap: %d-%d / %d-%d\n",
+ max_bottom, min_top, median_bottom_, median_top_);
+ }
+ return result;
+}
+
+// Sets the sort key using either the tab vector, or the bounding box if
+// the tab vector is nullptr. If the tab_vector lies inside the bounding_box,
+// use the edge of the box as a key any way.
+void ColPartition::SetLeftTab(const TabVector* tab_vector) {
+ if (tab_vector != nullptr) {
+ left_key_ = tab_vector->sort_key();
+ left_key_tab_ = left_key_ <= BoxLeftKey();
+ } else {
+ left_key_tab_ = false;
+ }
+ if (!left_key_tab_)
+ left_key_ = BoxLeftKey();
+}
+
+// As SetLeftTab, but with the right.
+void ColPartition::SetRightTab(const TabVector* tab_vector) {
+ if (tab_vector != nullptr) {
+ right_key_ = tab_vector->sort_key();
+ right_key_tab_ = right_key_ >= BoxRightKey();
+ } else {
+ right_key_tab_ = false;
+ }
+ if (!right_key_tab_)
+ right_key_ = BoxRightKey();
+}
+
+// Copies the left/right tab from the src partition, but if take_box is
+// true, copies the box instead and uses that as a key.
+void ColPartition::CopyLeftTab(const ColPartition& src, bool take_box) {
+ left_key_tab_ = take_box ? false : src.left_key_tab_;
+ if (left_key_tab_) {
+ left_key_ = src.left_key_;
+ } else {
+ bounding_box_.set_left(XAtY(src.BoxLeftKey(), MidY()));
+ left_key_ = BoxLeftKey();
+ }
+ if (left_margin_ > bounding_box_.left())
+ left_margin_ = src.left_margin_;
+}
+
+// As CopyLeftTab, but with the right.
+void ColPartition::CopyRightTab(const ColPartition& src, bool take_box) {
+ right_key_tab_ = take_box ? false : src.right_key_tab_;
+ if (right_key_tab_) {
+ right_key_ = src.right_key_;
+ } else {
+ bounding_box_.set_right(XAtY(src.BoxRightKey(), MidY()));
+ right_key_ = BoxRightKey();
+ }
+ if (right_margin_ < bounding_box_.right())
+ right_margin_ = src.right_margin_;
+}
+
+// Returns the left rule line x coord of the leftmost blob.
+int ColPartition::LeftBlobRule() const {
+ BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
+ return it.data()->left_rule();
+}
+// Returns the right rule line x coord of the rightmost blob.
+int ColPartition::RightBlobRule() const {
+ BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
+ it.move_to_last();
+ return it.data()->right_rule();
+}
+
+float ColPartition::SpecialBlobsDensity(const BlobSpecialTextType type) const {
+ ASSERT_HOST(type < BSTT_COUNT);
+ return special_blobs_densities_[type];
+}
+
+int ColPartition::SpecialBlobsCount(const BlobSpecialTextType type) {
+ ASSERT_HOST(type < BSTT_COUNT);
+ BLOBNBOX_C_IT blob_it(&boxes_);
+ int count = 0;
+ for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+ BLOBNBOX* blob = blob_it.data();
+ BlobSpecialTextType blob_type = blob->special_text_type();
+ if (blob_type == type) {
+ count++;
+ }
+ }
+
+ return count;
+}
+
+void ColPartition::SetSpecialBlobsDensity(
+ const BlobSpecialTextType type, const float density) {
+ ASSERT_HOST(type < BSTT_COUNT);
+ special_blobs_densities_[type] = density;
+}
+
+void ColPartition::ComputeSpecialBlobsDensity() {
+ memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
+ if (boxes_.empty()) {
+ return;
+ }
+
+ BLOBNBOX_C_IT blob_it(&boxes_);
+ for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+ BLOBNBOX* blob = blob_it.data();
+ BlobSpecialTextType type = blob->special_text_type();
+ special_blobs_densities_[type]++;
+ }
+
+ for (float& special_blobs_density : special_blobs_densities_) {
+ special_blobs_density /= boxes_.length();
+ }
+}
+
+// Add a partner above if upper, otherwise below.
+// Add them uniquely and keep the list sorted by box left.
+// Partnerships are added symmetrically to partner and this.
+void ColPartition::AddPartner(bool upper, ColPartition* partner) {
+ if (upper) {
+ partner->lower_partners_.add_sorted(SortByBoxLeft<ColPartition>,
+ true, this);
+ upper_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, partner);
+ } else {
+ partner->upper_partners_.add_sorted(SortByBoxLeft<ColPartition>,
+ true, this);
+ lower_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, partner);
+ }
+}
+
+// Removes the partner from this, but does not remove this from partner.
+// This asymmetric removal is so as not to mess up the iterator that is
+// working on partner's partner list.
+void ColPartition::RemovePartner(bool upper, ColPartition* partner) {
+ ColPartition_C_IT it(upper ? &upper_partners_ : &lower_partners_);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ if (it.data() == partner) {
+ it.extract();
+ break;
+ }
+ }
+}
+
+// Returns the partner if the given partner is a singleton, otherwise nullptr.
+ColPartition* ColPartition::SingletonPartner(bool upper) {
+ ColPartition_CLIST* partners = upper ? &upper_partners_ : &lower_partners_;
+ if (!partners->singleton())
+ return nullptr;
+ ColPartition_C_IT it(partners);
+ return it.data();
+}
+
+// Merge with the other partition and delete it.
+void ColPartition::Absorb(ColPartition* other, WidthCallback cb) {
+ // The result has to either own all of the blobs or none of them.
+ // Verify the flag is consistent.
+ ASSERT_HOST(owns_blobs() == other->owns_blobs());
+ // TODO(nbeato): check owns_blobs better. Right now owns_blobs
+ // should always be true when this is called. So there is no issues.
+ if (TabFind::WithinTestRegion(2, bounding_box_.left(),
+ bounding_box_.bottom()) ||
+ TabFind::WithinTestRegion(2, other->bounding_box_.left(),
+ other->bounding_box_.bottom())) {
+ tprintf("Merging:");
+ Print();
+ other->Print();
+ }
+
+ // Update the special_blobs_densities_.
+ memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
+ for (int type = 0; type < BSTT_COUNT; ++type) {
+ unsigned w1 = boxes_.length();
+ unsigned w2 = other->boxes_.length();
+ float new_val = special_blobs_densities_[type] * w1 +
+ other->special_blobs_densities_[type] * w2;
+ if (!w1 || !w2) {
+ ASSERT_HOST((w1 + w2) > 0);
+ special_blobs_densities_[type] = new_val / (w1 + w2);
+ }
+ }
+
+ // Merge the two sorted lists.
+ BLOBNBOX_C_IT it(&boxes_);
+ BLOBNBOX_C_IT it2(&other->boxes_);
+ for (; !it2.empty(); it2.forward()) {
+ BLOBNBOX* bbox2 = it2.extract();
+ ColPartition* prev_owner = bbox2->owner();
+ if (prev_owner != other && prev_owner != nullptr) {
+ // A blob on other's list is owned by someone else; let them have it.
+ continue;
+ }
+ ASSERT_HOST(prev_owner == other || prev_owner == nullptr);
+ if (prev_owner == other)
+ bbox2->set_owner(this);
+ it.add_to_end(bbox2);
+ }
+ left_margin_ = std::min(left_margin_, other->left_margin_);
+ right_margin_ = std::max(right_margin_, other->right_margin_);
+ if (other->left_key_ < left_key_) {
+ left_key_ = other->left_key_;
+ left_key_tab_ = other->left_key_tab_;
+ }
+ if (other->right_key_ > right_key_) {
+ right_key_ = other->right_key_;
+ right_key_tab_ = other->right_key_tab_;
+ }
+ // Combine the flow and blob_type in a sensible way.
+ // Dominant flows stay.
+ if (!DominatesInMerge(flow_, other->flow_)) {
+ flow_ = other->flow_;
+ blob_type_ = other->blob_type_;
+ }
+ SetBlobTypes();
+ if (IsVerticalType()) {
+ boxes_.sort(SortByBoxBottom<BLOBNBOX>);
+ last_add_was_vertical_ = true;
+ } else {
+ boxes_.sort(SortByBoxLeft<BLOBNBOX>);
+ last_add_was_vertical_ = false;
+ }
+ ComputeLimits();
+ // Fix partner lists. other is going away, so remove it as a
+ // partner of all its partners and add this in its place.
+ for (int upper = 0; upper < 2; ++upper) {
+ ColPartition_CLIST partners;
+ ColPartition_C_IT part_it(&partners);
+ part_it.add_list_after(upper ? &other->upper_partners_
+ : &other->lower_partners_);
+ for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
+ ColPartition* partner = part_it.extract();
+ partner->RemovePartner(!upper, other);
+ partner->RemovePartner(!upper, this);
+ partner->AddPartner(!upper, this);
+ }
+ }
+ delete other;
+ if (cb != nullptr) {
+ SetColumnGoodness(cb);
+ }
+}
+
+// Merge1 and merge2 are candidates to be merged, yet their combined box
+// overlaps this. Is that allowed?
+// Returns true if the overlap between this and the merged pair of
+// merge candidates is sufficiently trivial to be allowed.
+// The merged box can graze the edge of this by the ok_box_overlap
+// if that exceeds the margin to the median top and bottom.
+// ok_box_overlap should be set by the caller appropriate to the sizes of
+// the text involved, and is usually a fraction of the median size of merge1
+// and/or merge2, or this.
+// TODO(rays) Determine whether vertical text needs to be considered.
+bool ColPartition::OKMergeOverlap(const ColPartition& merge1,
+ const ColPartition& merge2,
+ int ok_box_overlap, bool debug) {
+ // Vertical partitions are not allowed to be involved.
+ if (IsVerticalType() || merge1.IsVerticalType() || merge2.IsVerticalType()) {
+ if (debug)
+ tprintf("Vertical partition\n");
+ return false;
+ }
+ // The merging partitions must strongly overlap each other.
+ if (!merge1.VSignificantCoreOverlap(merge2)) {
+ if (debug)
+ tprintf("Voverlap %d (%d)\n",
+ merge1.VCoreOverlap(merge2),
+ merge1.VSignificantCoreOverlap(merge2));
+ return false;
+ }
+ // The merged box must not overlap the median bounds of this.
+ TBOX merged_box(merge1.bounding_box());
+ merged_box += merge2.bounding_box();
+ if (merged_box.bottom() < median_top_ && merged_box.top() > median_bottom_ &&
+ merged_box.bottom() < bounding_box_.top() - ok_box_overlap &&
+ merged_box.top() > bounding_box_.bottom() + ok_box_overlap) {
+ if (debug)
+ tprintf("Excessive box overlap\n");
+ return false;
+ }
+ // Looks OK!
+ return true;
+}
+
+// Find the blob at which to split this to minimize the overlap with the
+// given box. Returns the first blob to go in the second partition.
+BLOBNBOX* ColPartition::OverlapSplitBlob(const TBOX& box) {
+ if (boxes_.empty() || boxes_.singleton())
+ return nullptr;
+ BLOBNBOX_C_IT it(&boxes_);
+ TBOX left_box(it.data()->bounding_box());
+ for (it.forward(); !it.at_first(); it.forward()) {
+ BLOBNBOX* bbox = it.data();
+ left_box += bbox->bounding_box();
+ if (left_box.overlap(box))
+ return bbox;
+ }
+ return nullptr;
+}
+
+// Split this partition keeping the first half in this and returning
+// the second half.
+// Splits by putting the split_blob and the blobs that follow
+// in the second half, and the rest in the first half.
+ColPartition* ColPartition::SplitAtBlob(BLOBNBOX* split_blob) {
+ ColPartition* split_part = ShallowCopy();
+ split_part->set_owns_blobs(owns_blobs());
+ BLOBNBOX_C_IT it(&boxes_);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ BLOBNBOX* bbox = it.data();
+ ColPartition* prev_owner = bbox->owner();
+ ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == nullptr);
+ if (bbox == split_blob || !split_part->boxes_.empty()) {
+ split_part->AddBox(it.extract());
+ if (owns_blobs() && prev_owner != nullptr)
+ bbox->set_owner(split_part);
+ }
+ }
+ ASSERT_HOST(!it.empty());
+ if (split_part->IsEmpty()) {
+ // Split part ended up with nothing. Possible if split_blob is not
+ // in the list of blobs.
+ delete split_part;
+ return nullptr;
+ }
+ right_key_tab_ = false;
+ split_part->left_key_tab_ = false;
+ ComputeLimits();
+ // TODO(nbeato) Merge Ray's CL like this:
+ // if (owns_blobs())
+ // SetBlobTextlineGoodness();
+ split_part->ComputeLimits();
+ // TODO(nbeato) Merge Ray's CL like this:
+ // if (split_part->owns_blobs())
+ // split_part->SetBlobTextlineGoodness();
+ return split_part;
+}
+
+// Split this partition at the given x coordinate, returning the right
+// half and keeping the left half in this.
+ColPartition* ColPartition::SplitAt(int split_x) {
+ if (split_x <= bounding_box_.left() || split_x >= bounding_box_.right())
+ return nullptr; // There will be no change.
+ ColPartition* split_part = ShallowCopy();
+ split_part->set_owns_blobs(owns_blobs());
+ BLOBNBOX_C_IT it(&boxes_);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ BLOBNBOX* bbox = it.data();
+ ColPartition* prev_owner = bbox->owner();
+ ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == nullptr);
+ const TBOX& box = bbox->bounding_box();
+ if (box.left() >= split_x) {
+ split_part->AddBox(it.extract());
+ if (owns_blobs() && prev_owner != nullptr)
+ bbox->set_owner(split_part);
+ }
+ }
+ if (it.empty()) {
+ // Possible if split-x passes through the first blob.
+ it.add_list_after(&split_part->boxes_);
+ }
+ ASSERT_HOST(!it.empty());
+ if (split_part->IsEmpty()) {
+ // Split part ended up with nothing. Possible if split_x passes
+ // through the last blob.
+ delete split_part;
+ return nullptr;
+ }
+ right_key_tab_ = false;
+ split_part->left_key_tab_ = false;
+ right_margin_ = split_x;
+ split_part->left_margin_ = split_x;
+ ComputeLimits();
+ split_part->ComputeLimits();
+ return split_part;
+}
+
+// Recalculates all the coordinate limits of the partition.
+void ColPartition::ComputeLimits() {
+ bounding_box_ = TBOX(); // Clear it
+ BLOBNBOX_C_IT it(&boxes_);
+ BLOBNBOX* bbox = nullptr;
+ int non_leader_count = 0;
+ if (it.empty()) {
+ bounding_box_.set_left(left_margin_);
+ bounding_box_.set_right(right_margin_);
+ bounding_box_.set_bottom(0);
+ bounding_box_.set_top(0);
+ } else {
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ bbox = it.data();
+ bounding_box_ += bbox->bounding_box();
+ if (bbox->flow() != BTFT_LEADER)
+ ++non_leader_count;
+ }
+ }
+ if (!left_key_tab_)
+ left_key_ = BoxLeftKey();
+ if (left_key_ > BoxLeftKey() && textord_debug_bugs) {
+ // TODO(rays) investigate the causes of these error messages, to find
+ // out if they are genuinely harmful, or just indicative of junk input.
+ tprintf("Computed left-illegal partition\n");
+ Print();
+ }
+ if (!right_key_tab_)
+ right_key_ = BoxRightKey();
+ if (right_key_ < BoxRightKey() && textord_debug_bugs) {
+ tprintf("Computed right-illegal partition\n");
+ Print();
+ }
+ if (it.empty())
+ return;
+ if (IsImageType() || blob_type() == BRT_RECTIMAGE ||
+ blob_type() == BRT_POLYIMAGE) {
+ median_top_ = bounding_box_.top();
+ median_bottom_ = bounding_box_.bottom();
+ median_height_ = bounding_box_.height();
+ median_left_ = bounding_box_.left();
+ median_right_ = bounding_box_.right();
+ median_width_ = bounding_box_.width();
+ } else {
+ STATS top_stats(bounding_box_.bottom(), bounding_box_.top() + 1);
+ STATS bottom_stats(bounding_box_.bottom(), bounding_box_.top() + 1);
+ STATS height_stats(0, bounding_box_.height() + 1);
+ STATS left_stats(bounding_box_.left(), bounding_box_.right() + 1);
+ STATS right_stats(bounding_box_.left(), bounding_box_.right() + 1);
+ STATS width_stats(0, bounding_box_.width() + 1);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ bbox = it.data();
+ if (non_leader_count == 0 || bbox->flow() != BTFT_LEADER) {
+ const TBOX& box = bbox->bounding_box();
+ int area = box.area();
+ top_stats.add(box.top(), area);
+ bottom_stats.add(box.bottom(), area);
+ height_stats.add(box.height(), area);
+ left_stats.add(box.left(), area);
+ right_stats.add(box.right(), area);
+ width_stats.add(box.width(), area);
+ }
+ }
+ median_top_ = static_cast<int>(top_stats.median() + 0.5);
+ median_bottom_ = static_cast<int>(bottom_stats.median() + 0.5);
+ median_height_ = static_cast<int>(height_stats.median() + 0.5);
+ median_left_ = static_cast<int>(left_stats.median() + 0.5);
+ median_right_ = static_cast<int>(right_stats.median() + 0.5);
+ median_width_ = static_cast<int>(width_stats.median() + 0.5);
+ }
+
+ if (right_margin_ < bounding_box_.right() && textord_debug_bugs) {
+ tprintf("Made partition with bad right coords, %d < %d\n",
+ right_margin_, bounding_box_.right());
+ Print();
+ }
+ if (left_margin_ > bounding_box_.left() && textord_debug_bugs) {
+ tprintf("Made partition with bad left coords, %d > %d\n",
+ left_margin_, bounding_box_.left());
+ Print();
+ }
+ // Fix partner lists. The bounding box has changed and partners are stored
+ // in bounding box order, so remove and reinsert this as a partner
+ // of all its partners.
+ for (int upper = 0; upper < 2; ++upper) {
+ ColPartition_CLIST partners;
+ ColPartition_C_IT part_it(&partners);
+ part_it.add_list_after(upper ? &upper_partners_ : &lower_partners_);
+ for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
+ ColPartition* partner = part_it.extract();
+ partner->RemovePartner(!upper, this);
+ partner->AddPartner(!upper, this);
+ }
+ }
+ if (TabFind::WithinTestRegion(2, bounding_box_.left(),
+ bounding_box_.bottom())) {
+ tprintf("Recomputed box for partition %p\n", this);
+ Print();
+ }
+}
+
+// Returns the number of boxes that overlap the given box.
+int ColPartition::CountOverlappingBoxes(const TBOX& box) {
+ BLOBNBOX_C_IT it(&boxes_);
+ int overlap_count = 0;
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ BLOBNBOX* bbox = it.data();
+ if (box.overlap(bbox->bounding_box()))
+ ++overlap_count;
+ }
+ return overlap_count;
+}
+
+// Computes and sets the type_ and first_column_, last_column_ and column_set_.
+// resolution refers to the ppi resolution of the image.
+void ColPartition::SetPartitionType(int resolution, ColPartitionSet* columns) {
+ int first_spanned_col = -1;
+ ColumnSpanningType span_type =
+ columns->SpanningType(resolution,
+ bounding_box_.left(), bounding_box_.right(),
+ std::min(bounding_box_.height(), bounding_box_.width()),
+ MidY(), left_margin_, right_margin_,
+ &first_column_, &last_column_,
+ &first_spanned_col);
+ column_set_ = columns;
+ if (first_column_ < last_column_ && span_type == CST_PULLOUT &&
+ !IsLineType()) {
+ // Unequal columns may indicate that the pullout spans one of the columns
+ // it lies in, so force it to be allocated to just that column.
+ if (first_spanned_col >= 0) {
+ first_column_ = first_spanned_col;
+ last_column_ = first_spanned_col;
+ } else {
+ if ((first_column_ & 1) == 0)
+ last_column_ = first_column_;
+ else if ((last_column_ & 1) == 0)
+ first_column_ = last_column_;
+ else
+ first_column_ = last_column_ = (first_column_ + last_column_) / 2;
+ }
+ }
+ type_ = PartitionType(span_type);
+}
+
+// Returns the PartitionType from the current BlobRegionType and a column
+// flow spanning type ColumnSpanningType, generated by
+// ColPartitionSet::SpanningType, that indicates how the partition sits
+// in the columns.
+PolyBlockType ColPartition::PartitionType(ColumnSpanningType flow) const {
+ if (flow == CST_NOISE) {
+ if (blob_type_ != BRT_HLINE && blob_type_ != BRT_VLINE &&
+ blob_type_ != BRT_RECTIMAGE && blob_type_ != BRT_VERT_TEXT)
+ return PT_NOISE;
+ flow = CST_FLOWING;
+ }
+
+ switch (blob_type_) {
+ case BRT_NOISE:
+ return PT_NOISE;
+ case BRT_HLINE:
+ return PT_HORZ_LINE;
+ case BRT_VLINE:
+ return PT_VERT_LINE;
+ case BRT_RECTIMAGE:
+ case BRT_POLYIMAGE:
+ switch (flow) {
+ case CST_FLOWING:
+ return PT_FLOWING_IMAGE;
+ case CST_HEADING:
+ return PT_HEADING_IMAGE;
+ case CST_PULLOUT:
+ return PT_PULLOUT_IMAGE;
+ default:
+ ASSERT_HOST(!"Undefined flow type for image!");
+ }
+ break;
+ case BRT_VERT_TEXT:
+ return PT_VERTICAL_TEXT;
+ case BRT_TEXT:
+ case BRT_UNKNOWN:
+ default:
+ switch (flow) {
+ case CST_FLOWING:
+ return PT_FLOWING_TEXT;
+ case CST_HEADING:
+ return PT_HEADING_TEXT;
+ case CST_PULLOUT:
+ return PT_PULLOUT_TEXT;
+ default:
+ ASSERT_HOST(!"Undefined flow type for text!");
+ }
+ }
+ ASSERT_HOST(!"Should never get here!");
+ return PT_NOISE;
+}
+
+// Returns the first and last column touched by this partition.
+// resolution refers to the ppi resolution of the image.
+void ColPartition::ColumnRange(int resolution, ColPartitionSet* columns,
+ int* first_col, int* last_col) {
+ int first_spanned_col = -1;
+ ColumnSpanningType span_type =
+ columns->SpanningType(resolution,
+ bounding_box_.left(), bounding_box_.right(),
+ std::min(bounding_box_.height(), bounding_box_.width()),
+ MidY(), left_margin_, right_margin_,
+ first_col, last_col,
+ &first_spanned_col);
+ type_ = PartitionType(span_type);
+}
+
+// Sets the internal flags good_width_ and good_column_.
+void ColPartition::SetColumnGoodness(WidthCallback cb) {
+ int y = MidY();
+ int width = RightAtY(y) - LeftAtY(y);
+ good_width_ = cb(width);
+ good_column_ = blob_type_ == BRT_TEXT && left_key_tab_ && right_key_tab_;
+}
+
+// Determines whether the blobs in this partition mostly represent
+// a leader (fixed pitch sequence) and sets the member blobs accordingly.
+// Note that height is assumed to have been tested elsewhere, and that this
+// function will find most fixed-pitch text as leader without a height filter.
+// Leader detection is limited to sequences of identical width objects,
+// such as .... or ----, so patterns, such as .-.-.-.-. will not be found.
+bool ColPartition::MarkAsLeaderIfMonospaced() {
+ bool result = false;
+ // Gather statistics on the gaps between blobs and the widths of the blobs.
+ int part_width = bounding_box_.width();
+ STATS gap_stats(0, part_width);
+ STATS width_stats(0, part_width);
+ BLOBNBOX_C_IT it(&boxes_);
+ BLOBNBOX* prev_blob = it.data();
+ prev_blob->set_flow(BTFT_NEIGHBOURS);
+ width_stats.add(prev_blob->bounding_box().width(), 1);
+ int blob_count = 1;
+ for (it.forward(); !it.at_first(); it.forward()) {
+ BLOBNBOX* blob = it.data();
+ int left = blob->bounding_box().left();
+ int right = blob->bounding_box().right();
+ gap_stats.add(left - prev_blob->bounding_box().right(), 1);
+ width_stats.add(right - left, 1);
+ blob->set_flow(BTFT_NEIGHBOURS);
+ prev_blob = blob;
+ ++blob_count;
+ }
+ double median_gap = gap_stats.median();
+ double median_width = width_stats.median();
+ double max_width = std::max(median_gap, median_width);
+ double min_width = std::min(median_gap, median_width);
+ double gap_iqr = gap_stats.ile(0.75f) - gap_stats.ile(0.25f);
+ if (textord_debug_tabfind >= 4) {
+ tprintf("gap iqr = %g, blob_count=%d, limits=%g,%g\n",
+ gap_iqr, blob_count, max_width * kMaxLeaderGapFractionOfMax,
+ min_width * kMaxLeaderGapFractionOfMin);
+ }
+ if (gap_iqr < max_width * kMaxLeaderGapFractionOfMax &&
+ gap_iqr < min_width * kMaxLeaderGapFractionOfMin &&
+ blob_count >= kMinLeaderCount) {
+ // This is stable enough to be called a leader, so check the widths.
+ // Since leader dashes can join, run a dp cutting algorithm and go
+ // on the cost.
+ int offset = static_cast<int>(ceil(gap_iqr * 2));
+ int min_step = static_cast<int>(median_gap + median_width + 0.5);
+ int max_step = min_step + offset;
+ min_step -= offset;
+ // Pad the buffer with min_step/2 on each end.
+ int part_left = bounding_box_.left() - min_step / 2;
+ part_width += min_step;
+ auto* projection = new DPPoint[part_width];
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ BLOBNBOX* blob = it.data();
+ int left = blob->bounding_box().left();
+ int right = blob->bounding_box().right();
+ int height = blob->bounding_box().height();
+ for (int x = left; x < right; ++x) {
+ projection[left - part_left].AddLocalCost(height);
+ }
+ }
+ DPPoint* best_end = DPPoint::Solve(min_step, max_step, false,
+ &DPPoint::CostWithVariance,
+ part_width, projection);
+ if (best_end != nullptr && best_end->total_cost() < blob_count) {
+ // Good enough. Call it a leader.
+ result = true;
+ bool modified_blob_list = false;
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ BLOBNBOX* blob = it.data();
+ // If the first or last blob is spaced too much, don't mark it.
+ if (it.at_first()) {
+ int gap = it.data_relative(1)->bounding_box().left() -
+ blob->bounding_box().right();
+ if (blob->bounding_box().width() + gap > max_step) {
+ it.extract();
+ modified_blob_list = true;
+ continue;
+ }
+ }
+ if (it.at_last()) {
+ int gap = blob->bounding_box().left() -
+ it.data_relative(-1)->bounding_box().right();
+ if (blob->bounding_box().width() + gap > max_step) {
+ it.extract();
+ modified_blob_list = true;
+ break;
+ }
+ }
+ blob->set_region_type(BRT_TEXT);
+ blob->set_flow(BTFT_LEADER);
+ }
+ if (modified_blob_list) ComputeLimits();
+ blob_type_ = BRT_TEXT;
+ flow_ = BTFT_LEADER;
+ } else if (textord_debug_tabfind) {
+ if (best_end == nullptr) {
+ tprintf("No path\n");
+ } else {
+ tprintf("Total cost = %d vs allowed %d\n", best_end->total_cost(),
+ blob_count);
+ }
+ }
+ delete [] projection;
+ }
+ return result;
+}
+
+// Given the result of TextlineProjection::EvaluateColPartition, (positive for
+// horizontal text, negative for vertical text, and near zero for non-text),
+// sets the blob_type_ and flow_ for this partition to indicate whether it
+// is strongly or weakly vertical or horizontal text, or non-text.
+// The function assumes that the blob neighbours are valid (from
+// StrokeWidth::SetNeighbours) and that those neighbours have their
+// region_type() set.
+void ColPartition::SetRegionAndFlowTypesFromProjectionValue(int value) {
+ int blob_count = 0; // Total # blobs.
+ int good_blob_score_ = 0; // Total # good strokewidth neighbours.
+ int noisy_count = 0; // Total # neighbours marked as noise.
+ int hline_count = 0;
+ int vline_count = 0;
+ BLOBNBOX_C_IT it(&boxes_);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ BLOBNBOX* blob = it.data();
+ ++blob_count;
+ noisy_count += blob->NoisyNeighbours();
+ good_blob_score_ += blob->GoodTextBlob();
+ if (blob->region_type() == BRT_HLINE) ++hline_count;
+ if (blob->region_type() == BRT_VLINE) ++vline_count;
+ }
+ flow_ = BTFT_NEIGHBOURS;
+ blob_type_ = BRT_UNKNOWN;
+ if (hline_count > vline_count) {
+ flow_ = BTFT_NONE;
+ blob_type_ = BRT_HLINE;
+ } else if (vline_count > hline_count) {
+ flow_ = BTFT_NONE;
+ blob_type_ = BRT_VLINE;
+ } else if (value < -1 || 1 < value) {
+ int long_side;
+ int short_side;
+ if (value > 0) {
+ long_side = bounding_box_.width();
+ short_side = bounding_box_.height();
+ blob_type_ = BRT_TEXT;
+ } else {
+ long_side = bounding_box_.height();
+ short_side = bounding_box_.width();
+ blob_type_ = BRT_VERT_TEXT;
+ }
+ // We will combine the old metrics using aspect ratio and blob counts
+ // with the input value by allowing a strong indication to flip the
+ // STRONG_CHAIN/CHAIN flow values.
+ int strong_score = blob_count >= kHorzStrongTextlineCount ? 1 : 0;
+ if (short_side > kHorzStrongTextlineHeight) ++strong_score;
+ if (short_side * kHorzStrongTextlineAspect < long_side) ++strong_score;
+ if (abs(value) >= kMinStrongTextValue)
+ flow_ = BTFT_STRONG_CHAIN;
+ else if (abs(value) >= kMinChainTextValue)
+ flow_ = BTFT_CHAIN;
+ else
+ flow_ = BTFT_NEIGHBOURS;
+ // Upgrade chain to strong chain if the other indicators are good
+ if (flow_ == BTFT_CHAIN && strong_score == 3)
+ flow_ = BTFT_STRONG_CHAIN;
+ // Downgrade strong vertical text to chain if the indicators are bad.
+ if (flow_ == BTFT_STRONG_CHAIN && value < 0 && strong_score < 2)
+ flow_ = BTFT_CHAIN;
+ }
+ if (flow_ == BTFT_NEIGHBOURS) {
+ // Check for noisy neighbours.
+ if (noisy_count >= blob_count) {
+ flow_ = BTFT_NONTEXT;
+ blob_type_= BRT_NOISE;
+ }
+ }
+ if (TabFind::WithinTestRegion(2, bounding_box_.left(),
+ bounding_box_.bottom())) {
+ tprintf("RegionFlowTypesFromProjectionValue count=%d, noisy=%d, score=%d,",
+ blob_count, noisy_count, good_blob_score_);
+ tprintf(" Projection value=%d, flow=%d, blob_type=%d\n",
+ value, flow_, blob_type_);
+ Print();
+ }
+ SetBlobTypes();
+}
+
+// Sets all blobs with the partition blob type and flow, but never overwrite
+// leader blobs, as we need to be able to identify them later.
+void ColPartition::SetBlobTypes() {
+ if (!owns_blobs())
+ return;
+ BLOBNBOX_C_IT it(&boxes_);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ BLOBNBOX* blob = it.data();
+ if (blob->flow() != BTFT_LEADER)
+ blob->set_flow(flow_);
+ blob->set_region_type(blob_type_);
+ ASSERT_HOST(blob->owner() == nullptr || blob->owner() == this);
+ }
+}
+
+// Returns true if a decent baseline can be fitted through the blobs.
+// Works for both horizontal and vertical text.
+bool ColPartition::HasGoodBaseline() {
+ // Approximation of the baseline.
+ DetLineFit linepoints;
+ // Calculation of the mean height on this line segment. Note that these
+ // variable names apply to the context of a horizontal line, and work
+ // analogously, rather than literally in the case of a vertical line.
+ int total_height = 0;
+ int coverage = 0;
+ int height_count = 0;
+ int width = 0;
+ BLOBNBOX_C_IT it(&boxes_);
+ TBOX box(it.data()->bounding_box());
+ // Accumulate points representing the baseline at the middle of each blob,
+ // but add an additional point for each end of the line. This makes it
+ // harder to fit a severe skew angle, as it is most likely not right.
+ if (IsVerticalType()) {
+ // For a vertical line, use the right side as the baseline.
+ ICOORD first_pt(box.right(), box.bottom());
+ // Use the bottom-right of the first (bottom) box, the top-right of the
+ // last, and the middle-right of all others.
+ linepoints.Add(first_pt);
+ for (it.forward(); !it.at_last(); it.forward()) {
+ BLOBNBOX* blob = it.data();
+ box = blob->bounding_box();
+ ICOORD box_pt(box.right(), (box.top() + box.bottom()) / 2);
+ linepoints.Add(box_pt);
+ total_height += box.width();
+ coverage += box.height();
+ ++height_count;
+ }
+ box = it.data()->bounding_box();
+ ICOORD last_pt(box.right(), box.top());
+ linepoints.Add(last_pt);
+ width = last_pt.y() - first_pt.y();
+
+ } else {
+ // Horizontal lines use the bottom as the baseline.
+ TBOX box(it.data()->bounding_box());
+ // Use the bottom-left of the first box, the the bottom-right of the last,
+ // and the middle of all others.
+ ICOORD first_pt(box.left(), box.bottom());
+ linepoints.Add(first_pt);
+ for (it.forward(); !it.at_last(); it.forward()) {
+ BLOBNBOX* blob = it.data();
+ box = blob->bounding_box();
+ ICOORD box_pt((box.left() + box.right()) / 2, box.bottom());
+ linepoints.Add(box_pt);
+ total_height += box.height();
+ coverage += box.width();
+ ++height_count;
+ }
+ box = it.data()->bounding_box();
+ ICOORD last_pt(box.right(), box.bottom());
+ linepoints.Add(last_pt);
+ width = last_pt.x() - first_pt.x();
+ }
+ // Maximum median error allowed to be a good text line.
+ if (height_count == 0)
+ return false;
+ double max_error = kMaxBaselineError * total_height / height_count;
+ ICOORD start_pt, end_pt;
+ double error = linepoints.Fit(&start_pt, &end_pt);
+ return error < max_error && coverage >= kMinBaselineCoverage * width;
+}
+
+// Adds this ColPartition to a matching WorkingPartSet if one can be found,
+// otherwise starts a new one in the appropriate column, ending the previous.
+void ColPartition::AddToWorkingSet(const ICOORD& bleft, const ICOORD& tright,
+ int resolution,
+ ColPartition_LIST* used_parts,
+ WorkingPartSet_LIST* working_sets) {
+ if (block_owned_)
+ return; // Done it already.
+ block_owned_ = true;
+ WorkingPartSet_IT it(working_sets);
+ // If there is an upper partner use its working_set_ directly.
+ ColPartition* partner = SingletonPartner(true);
+ if (partner != nullptr && partner->working_set_ != nullptr) {
+ working_set_ = partner->working_set_;
+ working_set_->AddPartition(this);
+ return;
+ }
+ if (partner != nullptr && textord_debug_bugs) {
+ tprintf("Partition with partner has no working set!:");
+ Print();
+ partner->Print();
+ }
+ // Search for the column that the left edge fits in.
+ WorkingPartSet* work_set = nullptr;
+ it.move_to_first();
+ int col_index = 0;
+ for (it.mark_cycle_pt(); !it.cycled_list() &&
+ col_index != first_column_;
+ it.forward(), ++col_index);
+ if (textord_debug_tabfind >= 2) {
+ tprintf("Match is %s for:", (col_index & 1) ? "Real" : "Between");
+ Print();
+ }
+ if (it.cycled_list() && textord_debug_bugs) {
+ tprintf("Target column=%d, only had %d\n", first_column_, col_index);
+ }
+ ASSERT_HOST(!it.cycled_list());
+ work_set = it.data();
+ // If last_column_ != first_column, then we need to scoop up all blocks
+ // between here and the last_column_ and put back in work_set.
+ if (!it.cycled_list() && last_column_ != first_column_ && !IsPulloutType()) {
+ // Find the column that the right edge falls in.
+ BLOCK_LIST completed_blocks;
+ TO_BLOCK_LIST to_blocks;
+ for (; !it.cycled_list() && col_index <= last_column_;
+ it.forward(), ++col_index) {
+ WorkingPartSet* end_set = it.data();
+ end_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts,
+ &completed_blocks, &to_blocks);
+ }
+ work_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
+ }
+ working_set_ = work_set;
+ work_set->AddPartition(this);
+}
+
+// From the given block_parts list, builds one or more BLOCKs and
+// corresponding TO_BLOCKs, such that the line spacing is uniform in each.
+// Created blocks are appended to the end of completed_blocks and to_blocks.
+// The used partitions are put onto used_parts, as they may still be referred
+// to in the partition grid. bleft, tright and resolution are the bounds
+// and resolution of the original image.
+void ColPartition::LineSpacingBlocks(const ICOORD& bleft, const ICOORD& tright,
+ int resolution,
+ ColPartition_LIST* block_parts,
+ ColPartition_LIST* used_parts,
+ BLOCK_LIST* completed_blocks,
+ TO_BLOCK_LIST* to_blocks) {
+ int page_height = tright.y() - bleft.y();
+ // Compute the initial spacing stats.
+ ColPartition_IT it(block_parts);
+ int part_count = 0;
+ int max_line_height = 0;
+
+ // TODO(joeliu): We should add some special logic for PT_INLINE_EQUATION type
+ // because their line spacing with their neighbors maybe smaller and their
+ // height may be slightly larger.
+
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ ColPartition* part = it.data();
+ ASSERT_HOST(!part->boxes()->empty());
+ STATS side_steps(0, part->bounding_box().height());
+ if (part->bounding_box().height() > max_line_height)
+ max_line_height = part->bounding_box().height();
+ BLOBNBOX_C_IT blob_it(part->boxes());
+ int prev_bottom = blob_it.data()->bounding_box().bottom();
+ for (blob_it.forward(); !blob_it.at_first(); blob_it.forward()) {
+ BLOBNBOX* blob = blob_it.data();
+ int bottom = blob->bounding_box().bottom();
+ int step = bottom - prev_bottom;
+ if (step < 0)
+ step = -step;
+ side_steps.add(step, 1);
+ prev_bottom = bottom;
+ }
+ part->set_side_step(static_cast<int>(side_steps.median() + 0.5));
+ if (!it.at_last()) {
+ ColPartition* next_part = it.data_relative(1);
+ part->set_bottom_spacing(part->median_bottom() -
+ next_part->median_bottom());
+ part->set_top_spacing(part->median_top() - next_part->median_top());
+ } else {
+ part->set_bottom_spacing(page_height);
+ part->set_top_spacing(page_height);
+ }
+ if (textord_debug_tabfind) {
+ part->Print();
+ tprintf("side step = %.2f, top spacing = %d, bottom spacing=%d\n",
+ side_steps.median(), part->top_spacing(), part->bottom_spacing());
+ }
+ ++part_count;
+ }
+ if (part_count == 0)
+ return;
+
+ SmoothSpacings(resolution, page_height, block_parts);
+
+ // Move the partitions into individual block lists and make the blocks.
+ BLOCK_IT block_it(completed_blocks);
+ TO_BLOCK_IT to_block_it(to_blocks);
+ ColPartition_LIST spacing_parts;
+ ColPartition_IT sp_block_it(&spacing_parts);
+ int same_block_threshold = max_line_height * kMaxSameBlockLineSpacing;
+ for (it.mark_cycle_pt(); !it.empty();) {
+ ColPartition* part = it.extract();
+ sp_block_it.add_to_end(part);
+ it.forward();
+ if (it.empty() || part->bottom_spacing() > same_block_threshold ||
+ !part->SpacingsEqual(*it.data(), resolution)) {
+ // There is a spacing boundary. Check to see if it.data() belongs
+ // better in the current block or the next one.
+ if (!it.empty() && part->bottom_spacing() <= same_block_threshold) {
+ ColPartition* next_part = it.data();
+ // If there is a size match one-way, then the middle line goes with
+ // its matched size, otherwise it goes with the smallest spacing.
+ ColPartition* third_part = it.at_last() ? nullptr : it.data_relative(1);
+ if (textord_debug_tabfind) {
+ tprintf("Spacings unequal: upper:%d/%d, lower:%d/%d,"
+ " sizes %d %d %d\n",
+ part->top_spacing(), part->bottom_spacing(),
+ next_part->top_spacing(), next_part->bottom_spacing(),
+ part->median_height(), next_part->median_height(),
+ third_part != nullptr ? third_part->median_height() : 0);
+ }
+ // We can only consider adding the next line to the block if the sizes
+ // match and the lines are close enough for their size.
+ if (part->SizesSimilar(*next_part) &&
+ next_part->median_height() * kMaxSameBlockLineSpacing >
+ part->bottom_spacing() &&
+ part->median_height() * kMaxSameBlockLineSpacing >
+ part->top_spacing()) {
+ // Even now, we can only add it as long as the third line doesn't
+ // match in the same way and have a smaller bottom spacing.
+ if (third_part == nullptr ||
+ !next_part->SizesSimilar(*third_part) ||
+ third_part->median_height() * kMaxSameBlockLineSpacing <=
+ next_part->bottom_spacing() ||
+ next_part->median_height() * kMaxSameBlockLineSpacing <=
+ next_part->top_spacing() ||
+ next_part->bottom_spacing() > part->bottom_spacing()) {
+ // Add to the current block.
+ sp_block_it.add_to_end(it.extract());
+ it.forward();
+ if (textord_debug_tabfind) {
+ tprintf("Added line to current block.\n");
+ }
+ }
+ }
+ }
+ TO_BLOCK* to_block = MakeBlock(bleft, tright, &spacing_parts, used_parts);
+ if (to_block != nullptr) {
+ to_block_it.add_to_end(to_block);
+ block_it.add_to_end(to_block->block);
+ }
+ sp_block_it.set_to_list(&spacing_parts);
+ } else {
+ if (textord_debug_tabfind && !it.empty()) {
+ ColPartition* next_part = it.data();
+ tprintf("Spacings equal: upper:%d/%d, lower:%d/%d, median:%d/%d\n",
+ part->top_spacing(), part->bottom_spacing(),
+ next_part->top_spacing(), next_part->bottom_spacing(),
+ part->median_height(), next_part->median_height());
+ }
+ }
+ }
+}
+
+// Helper function to clip the input pos to the given bleft, tright bounds.
+static void ClipCoord(const ICOORD& bleft, const ICOORD& tright, ICOORD* pos) {
+ if (pos->x() < bleft.x())
+ pos->set_x(bleft.x());
+ if (pos->x() > tright.x())
+ pos->set_x(tright.x());
+ if (pos->y() < bleft.y())
+ pos->set_y(bleft.y());
+ if (pos->y() > tright.y())
+ pos->set_y(tright.y());
+}
+
+// Helper moves the blobs from the given list of block_parts into the block
+// itself. Sets up the block for (old) textline formation correctly for
+// vertical and horizontal text. The partitions are moved to used_parts
+// afterwards, as they cannot be deleted yet.
+static TO_BLOCK* MoveBlobsToBlock(bool vertical_text, int line_spacing,
+ BLOCK* block,
+ ColPartition_LIST* block_parts,
+ ColPartition_LIST* used_parts) {
+ // Make a matching TO_BLOCK and put all the BLOBNBOXes from the parts in it.
+ // Move all the parts to a done list as they are no longer needed, except
+ // that have have to continue to exist until the part grid is deleted.
+ // Compute the median blob size as we go, as the block needs to know.
+ TBOX block_box(block->pdblk.bounding_box());
+ STATS sizes(0, std::max(block_box.width(), block_box.height()));
+ bool text_type = block->pdblk.poly_block()->IsText();
+ ColPartition_IT it(block_parts);
+ auto* to_block = new TO_BLOCK(block);
+ BLOBNBOX_IT blob_it(&to_block->blobs);
+ ColPartition_IT used_it(used_parts);
+ for (it.move_to_first(); !it.empty(); it.forward()) {
+ ColPartition* part = it.extract();
+ // Transfer blobs from all regions to the output blocks.
+ // Blobs for non-text regions will be used to define the polygonal
+ // bounds of the region.
+ for (BLOBNBOX_C_IT bb_it(part->boxes()); !bb_it.empty();
+ bb_it.forward()) {
+ BLOBNBOX* bblob = bb_it.extract();
+ if (bblob->owner() != part) {
+ tprintf("Ownership incorrect for blob:");
+ bblob->bounding_box().print();
+ tprintf("Part=");
+ part->Print();
+ if (bblob->owner() == nullptr) {
+ tprintf("Not owned\n");
+ } else {
+ tprintf("Owner part:");
+ bblob->owner()->Print();
+ }
+ }
+ ASSERT_HOST(bblob->owner() == part);
+ // Assert failure here is caused by arbitrarily changing the partition
+ // type without also changing the blob type, such as in
+ // InsertSmallBlobsAsUnknowns.
+ ASSERT_HOST(!text_type || bblob->region_type() >= BRT_UNKNOWN);
+ C_OUTLINE_LIST* outlines = bblob->cblob()->out_list();
+ C_OUTLINE_IT ol_it(outlines);
+ ASSERT_HOST(!text_type || ol_it.data()->pathlength() > 0);
+ if (vertical_text)
+ sizes.add(bblob->bounding_box().width(), 1);
+ else
+ sizes.add(bblob->bounding_box().height(), 1);
+ blob_it.add_after_then_move(bblob);
+ }
+ used_it.add_to_end(part);
+ }
+ if (text_type && blob_it.empty()) {
+ delete block;
+ delete to_block;
+ return nullptr;
+ }
+ to_block->line_size = sizes.median();
+ if (vertical_text) {
+ int block_width = block->pdblk.bounding_box().width();
+ if (block_width < line_spacing)
+ line_spacing = block_width;
+ to_block->line_spacing = static_cast<float>(line_spacing);
+ to_block->max_blob_size = static_cast<float>(block_width + 1);
+ } else {
+ int block_height = block->pdblk.bounding_box().height();
+ if (block_height < line_spacing)
+ line_spacing = block_height;
+ to_block->line_spacing = static_cast<float>(line_spacing);
+ to_block->max_blob_size = static_cast<float>(block_height + 1);
+ }
+ return to_block;
+}
+
+// Constructs a block from the given list of partitions.
+// Arguments are as LineSpacingBlocks above.
+TO_BLOCK* ColPartition::MakeBlock(const ICOORD& bleft, const ICOORD& tright,
+ ColPartition_LIST* block_parts,
+ ColPartition_LIST* used_parts) {
+ if (block_parts->empty())
+ return nullptr; // Nothing to do.
+ // If the block_parts are not in reading order, then it will make an invalid
+ // block polygon and bounding_box, so sort by bounding box now just to make
+ // sure.
+ block_parts->sort(&ColPartition::SortByBBox);
+ ColPartition_IT it(block_parts);
+ ColPartition* part = it.data();
+ PolyBlockType type = part->type();
+ if (type == PT_VERTICAL_TEXT)
+ return MakeVerticalTextBlock(bleft, tright, block_parts, used_parts);
+ // LineSpacingBlocks has handed us a collection of evenly spaced lines and
+ // put the average spacing in each partition, so we can just take the
+ // linespacing from the first partition.
+ int line_spacing = part->bottom_spacing();
+ if (line_spacing < part->median_height())
+ line_spacing = part->bounding_box().height();
+ ICOORDELT_LIST vertices;
+ ICOORDELT_IT vert_it(&vertices);
+ ICOORD start, end;
+ int min_x = INT32_MAX;
+ int max_x = -INT32_MAX;
+ int min_y = INT32_MAX;
+ int max_y = -INT32_MAX;
+ int iteration = 0;
+ do {
+ if (iteration == 0)
+ ColPartition::LeftEdgeRun(&it, &start, &end);
+ else
+ ColPartition::RightEdgeRun(&it, &start, &end);
+ ClipCoord(bleft, tright, &start);
+ ClipCoord(bleft, tright, &end);
+ vert_it.add_after_then_move(new ICOORDELT(start));
+ vert_it.add_after_then_move(new ICOORDELT(end));
+ UpdateRange(start.x(), &min_x, &max_x);
+ UpdateRange(end.x(), &min_x, &max_x);
+ UpdateRange(start.y(), &min_y, &max_y);
+ UpdateRange(end.y(), &min_y, &max_y);
+ if ((iteration == 0 && it.at_first()) ||
+ (iteration == 1 && it.at_last())) {
+ ++iteration;
+ it.move_to_last();
+ }
+ } while (iteration < 2);
+ if (textord_debug_tabfind)
+ tprintf("Making block at (%d,%d)->(%d,%d)\n",
+ min_x, min_y, max_x, max_y);
+ auto* block = new BLOCK("", true, 0, 0, min_x, min_y, max_x, max_y);
+ block->pdblk.set_poly_block(new POLY_BLOCK(&vertices, type));
+ return MoveBlobsToBlock(false, line_spacing, block, block_parts, used_parts);
+}
+
+// Constructs a block from the given list of vertical text partitions.
+// Currently only creates rectangular blocks.
+TO_BLOCK* ColPartition::MakeVerticalTextBlock(const ICOORD& bleft,
+ const ICOORD& tright,
+ ColPartition_LIST* block_parts,
+ ColPartition_LIST* used_parts) {
+ if (block_parts->empty())
+ return nullptr; // Nothing to do.
+ ColPartition_IT it(block_parts);
+ ColPartition* part = it.data();
+ TBOX block_box = part->bounding_box();
+ int line_spacing = block_box.width();
+ PolyBlockType type = it.data()->type();
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ block_box += it.data()->bounding_box();
+ }
+ if (textord_debug_tabfind) {
+ tprintf("Making block at:");
+ block_box.print();
+ }
+ auto* block = new BLOCK("", true, 0, 0, block_box.left(), block_box.bottom(),
+ block_box.right(), block_box.top());
+ block->pdblk.set_poly_block(new POLY_BLOCK(block_box, type));
+ return MoveBlobsToBlock(true, line_spacing, block, block_parts, used_parts);
+}
+
+// Makes a TO_ROW matching this and moves all the blobs to it, transferring
+// ownership to to returned TO_ROW.
+TO_ROW* ColPartition::MakeToRow() {
+ BLOBNBOX_C_IT blob_it(&boxes_);
+ TO_ROW* row = nullptr;
+ int line_size = IsVerticalType() ? median_width_ : median_height_;
+ // Add all the blobs to a single TO_ROW.
+ for (; !blob_it.empty(); blob_it.forward()) {
+ BLOBNBOX* blob = blob_it.extract();
+// blob->compute_bounding_box();
+ int top = blob->bounding_box().top();
+ int bottom = blob->bounding_box().bottom();
+ if (row == nullptr) {
+ row = new TO_ROW(blob, static_cast<float>(top),
+ static_cast<float>(bottom),
+ static_cast<float>(line_size));
+ } else {
+ row->add_blob(blob, static_cast<float>(top),
+ static_cast<float>(bottom),
+ static_cast<float>(line_size));
+ }
+ }
+ return row;
+}
+
+// Returns a copy of everything except the list of boxes. The resulting
+// ColPartition is only suitable for keeping in a column candidate list.
+ColPartition* ColPartition::ShallowCopy() const {
+ auto* part = new ColPartition(blob_type_, vertical_);
+ part->left_margin_ = left_margin_;
+ part->right_margin_ = right_margin_;
+ part->bounding_box_ = bounding_box_;
+ memcpy(part->special_blobs_densities_, special_blobs_densities_,
+ sizeof(special_blobs_densities_));
+ part->median_bottom_ = median_bottom_;
+ part->median_top_ = median_top_;
+ part->median_height_ = median_height_;
+ part->median_left_ = median_left_;
+ part->median_right_ = median_right_;
+ part->median_width_ = median_width_;
+ part->good_width_ = good_width_;
+ part->good_column_ = good_column_;
+ part->left_key_tab_ = left_key_tab_;
+ part->right_key_tab_ = right_key_tab_;
+ part->type_ = type_;
+ part->flow_ = flow_;
+ part->left_key_ = left_key_;
+ part->right_key_ = right_key_;
+ part->first_column_ = first_column_;
+ part->last_column_ = last_column_;
+ part->owns_blobs_ = false;
+ return part;
+}
+
+ColPartition* ColPartition::CopyButDontOwnBlobs() {
+ ColPartition* copy = ShallowCopy();
+ copy->set_owns_blobs(false);
+ BLOBNBOX_C_IT inserter(copy->boxes());
+ BLOBNBOX_C_IT traverser(boxes());
+ for (traverser.mark_cycle_pt(); !traverser.cycled_list(); traverser.forward())
+ inserter.add_after_then_move(traverser.data());
+ return copy;
+}
+
+#ifndef GRAPHICS_DISABLED
+// Provides a color for BBGrid to draw the rectangle.
+// Must be kept in sync with PolyBlockType.
+ScrollView::Color ColPartition::BoxColor() const {
+ if (type_ == PT_UNKNOWN)
+ return BLOBNBOX::TextlineColor(blob_type_, flow_);
+ return POLY_BLOCK::ColorForPolyBlockType(type_);
+}
+#endif // !GRAPHICS_DISABLED
+
+// Keep in sync with BlobRegionType.
+static char kBlobTypes[BRT_COUNT + 1] = "NHSRIUVT";
+
+// Prints debug information on this.
+void ColPartition::Print() const {
+ int y = MidY();
+ tprintf("ColPart:%c(M%d-%c%d-B%d/%d,%d/%d)->(%dB-%d%c-%dM/%d,%d/%d)"
+ " w-ok=%d, v-ok=%d, type=%d%c%d, fc=%d, lc=%d, boxes=%d"
+ " ts=%d bs=%d ls=%d rs=%d\n",
+ boxes_.empty() ? 'E' : ' ',
+ left_margin_, left_key_tab_ ? 'T' : 'B', LeftAtY(y),
+ bounding_box_.left(), median_left_,
+ bounding_box_.bottom(), median_bottom_,
+ bounding_box_.right(), RightAtY(y), right_key_tab_ ? 'T' : 'B',
+ right_margin_, median_right_, bounding_box_.top(), median_top_,
+ good_width_, good_column_, type_,
+ kBlobTypes[blob_type_], flow_,
+ first_column_, last_column_, boxes_.length(),
+ space_above_, space_below_, space_to_left_, space_to_right_);
+}
+
+// Prints debug information on the colors.
+void ColPartition::PrintColors() {
+ tprintf("Colors:(%d, %d, %d)%d -> (%d, %d, %d)\n",
+ color1_[COLOR_RED], color1_[COLOR_GREEN], color1_[COLOR_BLUE],
+ color1_[L_ALPHA_CHANNEL],
+ color2_[COLOR_RED], color2_[COLOR_GREEN], color2_[COLOR_BLUE]);
+}
+
+// Sets the types of all partitions in the run to be the max of the types.
+void ColPartition::SmoothPartnerRun(int working_set_count) {
+ STATS left_stats(0, working_set_count);
+ STATS right_stats(0, working_set_count);
+ PolyBlockType max_type = type_;
+ ColPartition* partner;
+ for (partner = SingletonPartner(false); partner != nullptr;
+ partner = partner->SingletonPartner(false)) {
+ if (partner->type_ > max_type)
+ max_type = partner->type_;
+ if (column_set_ == partner->column_set_) {
+ left_stats.add(partner->first_column_, 1);
+ right_stats.add(partner->last_column_, 1);
+ }
+ }
+ type_ = max_type;
+ // TODO(rays) Either establish that it isn't necessary to set the columns,
+ // or find a way to do it that does not cause an assert failure in
+ // AddToWorkingSet.
+#if 0
+ first_column_ = left_stats.mode();
+ last_column_ = right_stats.mode();
+ if (last_column_ < first_column_)
+ last_column_ = first_column_;
+#endif
+
+ for (partner = SingletonPartner(false); partner != nullptr;
+ partner = partner->SingletonPartner(false)) {
+ partner->type_ = max_type;
+#if 0 // See TODO above
+ if (column_set_ == partner->column_set_) {
+ partner->first_column_ = first_column_;
+ partner->last_column_ = last_column_;
+ }
+#endif
+ }
+}
+
+// ======= Scenario common to all Refine*Partners* functions =======
+// ColPartitions are aiming to represent textlines, or horizontal slices
+// of images, and we are trying to form bi-directional (upper/lower) chains
+// of UNIQUE partner ColPartitions that can be made into blocks.
+// The ColPartitions have previously been typed (see SetPartitionType)
+// according to a combination of the content type and
+// how they lie on the columns. We want to chain text into
+// groups of a single type, but image ColPartitions may have been typed
+// differently in different parts of the image, due to being non-rectangular.
+//
+// We previously ran a search for upper and lower partners, but there may
+// be more than one, and they may be of mixed types, so now we wish to
+// refine the partners down to at most one.
+// A heading may have multiple partners:
+// ===============================
+// ======== ========== =========
+// ======== ========== =========
+// but it should be a different type.
+// A regular flowing text line may have multiple partners:
+// ================== ===================
+// ======= ================= ===========
+// This could be the start of a pull-out, or it might all be in a single
+// column and might be caused by tightly spaced text, bold words, bullets,
+// funny punctuation etc, all of which can cause textlines to be split into
+// multiple ColPartitions. Pullouts and figure captions should now be different
+// types so we can more aggressively merge groups of partners that all sit
+// in a single column.
+//
+// Cleans up the partners of the given type so that there is at most
+// one partner. This makes block creation simpler.
+// If get_desperate is true, goes to more desperate merge methods
+// to merge flowing text before breaking partnerships.
+void ColPartition::RefinePartners(PolyBlockType type, bool get_desperate,
+ ColPartitionGrid* grid) {
+ if (TypesSimilar(type_, type)) {
+ RefinePartnersInternal(true, get_desperate, grid);
+ RefinePartnersInternal(false, get_desperate, grid);
+ } else if (type == PT_COUNT) {
+ // This is the final pass. Make sure only the correctly typed
+ // partners surivive, however many there are.
+ RefinePartnersByType(true, &upper_partners_);
+ RefinePartnersByType(false, &lower_partners_);
+ // It is possible for a merge to have given a partition multiple
+ // partners again, so the last resort is to use overlap which is
+ // guaranteed to leave at most one partner left.
+ if (!upper_partners_.empty() && !upper_partners_.singleton())
+ RefinePartnersByOverlap(true, &upper_partners_);
+ if (!lower_partners_.empty() && !lower_partners_.singleton())
+ RefinePartnersByOverlap(false, &lower_partners_);
+ }
+}
+
+////////////////// PRIVATE CODE /////////////////////////////
+
+// Cleans up the partners above if upper is true, else below.
+// If get_desperate is true, goes to more desperate merge methods
+// to merge flowing text before breaking partnerships.
+void ColPartition::RefinePartnersInternal(bool upper, bool get_desperate,
+ ColPartitionGrid* grid) {
+ ColPartition_CLIST* partners = upper ? &upper_partners_ : &lower_partners_;
+ if (!partners->empty() && !partners->singleton()) {
+ RefinePartnersByType(upper, partners);
+ if (!partners->empty() && !partners->singleton()) {
+ // Check for transitive partnerships and break the cycle.
+ RefinePartnerShortcuts(upper, partners);
+ if (!partners->empty() && !partners->singleton()) {
+ // Types didn't fix it. Flowing text keeps the one with the longest
+ // sequence of singleton matching partners. All others max overlap.
+ if (TypesSimilar(type_, PT_FLOWING_TEXT) && get_desperate) {
+ RefineTextPartnersByMerge(upper, false, partners, grid);
+ if (!partners->empty() && !partners->singleton())
+ RefineTextPartnersByMerge(upper, true, partners, grid);
+ }
+ // The last resort is to use overlap.
+ if (!partners->empty() && !partners->singleton())
+ RefinePartnersByOverlap(upper, partners);
+ }
+ }
+ }
+}
+
+// Cleans up the partners above if upper is true, else below.
+// Restricts the partners to only desirable types. For text and BRT_HLINE this
+// means the same type_ , and for image types it means any image type.
+void ColPartition::RefinePartnersByType(bool upper,
+ ColPartition_CLIST* partners) {
+ bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(),
+ bounding_box_.bottom());
+ if (debug) {
+ tprintf("Refining %d %s partners by type for:\n",
+ partners->length(), upper ? "Upper" : "Lower");
+ Print();
+ }
+ ColPartition_C_IT it(partners);
+ // Purify text by type.
+ if (!IsImageType() && !IsLineType() && type() != PT_TABLE) {
+ // Keep only partners matching type_.
+ // Exception: PT_VERTICAL_TEXT is allowed to stay with the other
+ // text types if it is the only partner.
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ ColPartition* partner = it.data();
+ if (!TypesSimilar(type_, partner->type_)) {
+ if (debug) {
+ tprintf("Removing partner:");
+ partner->Print();
+ }
+ partner->RemovePartner(!upper, this);
+ it.extract();
+ } else if (debug) {
+ tprintf("Keeping partner:");
+ partner->Print();
+ }
+ }
+ } else {
+ // Only polyimages are allowed to have partners of any kind!
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ ColPartition* partner = it.data();
+ if (partner->blob_type() != BRT_POLYIMAGE ||
+ blob_type() != BRT_POLYIMAGE) {
+ if (debug) {
+ tprintf("Removing partner:");
+ partner->Print();
+ }
+ partner->RemovePartner(!upper, this);
+ it.extract();
+ } else if (debug) {
+ tprintf("Keeping partner:");
+ partner->Print();
+ }
+ }
+ }
+}
+
+// Cleans up the partners above if upper is true, else below.
+// Remove transitive partnerships: this<->a, and a<->b and this<->b.
+// Gets rid of this<->b, leaving a clean chain.
+// Also if we have this<->a and a<->this, then gets rid of this<->a, as
+// this has multiple partners.
+void ColPartition::RefinePartnerShortcuts(bool upper,
+ ColPartition_CLIST* partners) {
+ bool done_any = false;
+ do {
+ done_any = false;
+ ColPartition_C_IT it(partners);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ ColPartition* a = it.data();
+ // Check for a match between all of a's partners (it1/b1) and all
+ // of this's partners (it2/b2).
+ ColPartition_C_IT it1(upper ? &a->upper_partners_ : &a->lower_partners_);
+ for (it1.mark_cycle_pt(); !it1.cycled_list(); it1.forward()) {
+ ColPartition* b1 = it1.data();
+ if (b1 == this) {
+ done_any = true;
+ it.extract();
+ a->RemovePartner(!upper, this);
+ break;
+ }
+ ColPartition_C_IT it2(partners);
+ for (it2.mark_cycle_pt(); !it2.cycled_list(); it2.forward()) {
+ ColPartition* b2 = it2.data();
+ if (b1 == b2) {
+ // Jackpot! b2 should not be a partner of this.
+ it2.extract();
+ b2->RemovePartner(!upper, this);
+ done_any = true;
+ // That potentially invalidated all the iterators, so break out
+ // and start again.
+ break;
+ }
+ }
+ if (done_any)
+ break;
+ }
+ if (done_any)
+ break;
+ }
+ } while (done_any && !partners->empty() && !partners->singleton());
+}
+
+// Cleans up the partners above if upper is true, else below.
+// If multiple text partners can be merged, (with each other, NOT with this),
+// then do so.
+// If desperate is true, then an increase in overlap with the merge is
+// allowed. If the overlap increases, then the desperately_merged_ flag
+// is set, indicating that the textlines probably need to be regenerated
+// by aggressive line fitting/splitting, as there are probably vertically
+// joined blobs that cross textlines.
+void ColPartition::RefineTextPartnersByMerge(bool upper, bool desperate,
+ ColPartition_CLIST* partners,
+ ColPartitionGrid* grid) {
+ bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(),
+ bounding_box_.bottom());
+ if (debug) {
+ tprintf("Refining %d %s partners by merge for:\n",
+ partners->length(), upper ? "Upper" : "Lower");
+ Print();
+ }
+ while (!partners->empty() && !partners->singleton()) {
+ // Absorb will mess up the iterators, so we have to merge one partition
+ // at a time and rebuild the iterators each time.
+ ColPartition_C_IT it(partners);
+ ColPartition* part = it.data();
+ // Gather a list of merge candidates, from the list of partners, that
+ // are all in the same single column. See general scenario comment above.
+ ColPartition_CLIST candidates;
+ ColPartition_C_IT cand_it(&candidates);
+ for (it.forward(); !it.at_first(); it.forward()) {
+ ColPartition* candidate = it.data();
+ if (part->first_column_ == candidate->last_column_ &&
+ part->last_column_ == candidate->first_column_)
+ cand_it.add_after_then_move(it.data());
+ }
+ int overlap_increase;
+ ColPartition* candidate = grid->BestMergeCandidate(part, &candidates, debug,
+ nullptr, &overlap_increase);
+ if (candidate != nullptr && (overlap_increase <= 0 || desperate)) {
+ if (debug) {
+ tprintf("Merging:hoverlap=%d, voverlap=%d, OLI=%d\n",
+ part->HCoreOverlap(*candidate), part->VCoreOverlap(*candidate),
+ overlap_increase);
+ }
+ // Remove before merge and re-insert to keep the integrity of the grid.
+ grid->RemoveBBox(candidate);
+ grid->RemoveBBox(part);
+ part->Absorb(candidate, nullptr);
+ // We modified the box of part, so re-insert it into the grid.
+ grid->InsertBBox(true, true, part);
+ if (overlap_increase > 0)
+ part->desperately_merged_ = true;
+ } else {
+ break; // Can't merge.
+ }
+ }
+}
+
+// Cleans up the partners above if upper is true, else below.
+// Keep the partner with the biggest overlap.
+void ColPartition::RefinePartnersByOverlap(bool upper,
+ ColPartition_CLIST* partners) {
+ bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(),
+ bounding_box_.bottom());
+ if (debug) {
+ tprintf("Refining %d %s partners by overlap for:\n",
+ partners->length(), upper ? "Upper" : "Lower");
+ Print();
+ }
+ ColPartition_C_IT it(partners);
+ ColPartition* best_partner = it.data();
+ // Find the partner with the best overlap.
+ int best_overlap = 0;
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ ColPartition* partner = it.data();
+ int overlap = std::min(bounding_box_.right(), partner->bounding_box_.right())
+ - std::max(bounding_box_.left(), partner->bounding_box_.left());
+ if (overlap > best_overlap) {
+ best_overlap = overlap;
+ best_partner = partner;
+ }
+ }
+ // Keep only the best partner.
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ ColPartition* partner = it.data();
+ if (partner != best_partner) {
+ if (debug) {
+ tprintf("Removing partner:");
+ partner->Print();
+ }
+ partner->RemovePartner(!upper, this);
+ it.extract();
+ }
+ }
+}
+
+// Return true if bbox belongs better in this than other.
+bool ColPartition::ThisPartitionBetter(BLOBNBOX* bbox,
+ const ColPartition& other) {
+ const TBOX& box = bbox->bounding_box();
+ // Margins take priority.
+ int left = box.left();
+ int right = box.right();
+ if (left < left_margin_ || right > right_margin_)
+ return false;
+ if (left < other.left_margin_ || right > other.right_margin_)
+ return true;
+ int top = box.top();
+ int bottom = box.bottom();
+ int this_overlap = std::min(top, median_top_) - std::max(bottom, median_bottom_);
+ int other_overlap = std::min(top, other.median_top_) -
+ std::max(bottom, other.median_bottom_);
+ int this_miss = median_top_ - median_bottom_ - this_overlap;
+ int other_miss = other.median_top_ - other.median_bottom_ - other_overlap;
+ if (TabFind::WithinTestRegion(3, box.left(), box.bottom())) {
+ tprintf("Unique on (%d,%d)->(%d,%d) overlap %d/%d, miss %d/%d, mt=%d/%d\n",
+ box.left(), box.bottom(), box.right(), box.top(),
+ this_overlap, other_overlap, this_miss, other_miss,
+ median_top_, other.median_top_);
+ }
+ if (this_miss < other_miss)
+ return true;
+ if (this_miss > other_miss)
+ return false;
+ if (this_overlap > other_overlap)
+ return true;
+ if (this_overlap < other_overlap)
+ return false;
+ return median_top_ >= other.median_top_;
+}
+
+// Returns the median line-spacing between the current position and the end
+// of the list.
+// The iterator is passed by value so the iteration does not modify the
+// caller's iterator.
+static int MedianSpacing(int page_height, ColPartition_IT it) {
+ STATS stats(0, page_height);
+ while (!it.cycled_list()) {
+ ColPartition* part = it.data();
+ it.forward();
+ stats.add(part->bottom_spacing(), 1);
+ stats.add(part->top_spacing(), 1);
+ }
+ return static_cast<int>(stats.median() + 0.5);
+}
+
+// Returns true if this column partition is in the same column as
+// part. This function will only work after the SetPartitionType function
+// has been called on both column partitions. This is useful for
+// doing a SideSearch when you want things in the same page column.
+//
+// Currently called by the table detection code to identify if potential table
+// partitions exist in the same column.
+bool ColPartition::IsInSameColumnAs(const ColPartition& part) const {
+ // Overlap does not occur when last < part.first or first > part.last.
+ // In other words, one is completely to the side of the other.
+ // This is just DeMorgan's law applied to that so the function returns true.
+ return (last_column_ >= part.first_column_) &&
+ (first_column_ <= part.last_column_);
+}
+
+// Smoothes the spacings in the list into groups of equal linespacing.
+// resolution is the resolution of the original image, used as a basis
+// for thresholds in change of spacing. page_height is in pixels.
+void ColPartition::SmoothSpacings(int resolution, int page_height,
+ ColPartition_LIST* parts) {
+ // The task would be trivial if we didn't have to allow for blips -
+ // occasional offsets in spacing caused by anomalous text, such as all
+ // caps, groups of descenders, joined words, Arabic etc.
+ // The neighbourhood stores a consecutive group of partitions so that
+ // blips can be detected correctly, yet conservatively enough to not
+ // mistake genuine spacing changes for blips. See example below.
+ ColPartition* neighbourhood[PN_COUNT];
+ ColPartition_IT it(parts);
+ it.mark_cycle_pt();
+ // Although we know nothing about the spacings is this list, the median is
+ // used as an approximation to allow blips.
+ // If parts of this block aren't spaced to the median, then we can't
+ // accept blips in those parts, but we'll recalculate it each time we
+ // split the block, so the median becomes more likely to match all the text.
+ int median_space = MedianSpacing(page_height, it);
+ ColPartition_IT start_it(it);
+ ColPartition_IT end_it(it);
+ for (int i = 0; i < PN_COUNT; ++i) {
+ if (i < PN_UPPER || it.cycled_list()) {
+ neighbourhood[i] = nullptr;
+ } else {
+ if (i == PN_LOWER)
+ end_it = it;
+ neighbourhood[i] = it.data();
+ it.forward();
+ }
+ }
+ while (neighbourhood[PN_UPPER] != nullptr) {
+ // Test for end of a group. Normally SpacingsEqual is true within a group,
+ // but in the case of a blip, it will be false. Here is an example:
+ // Line enum Spacing below (spacing between tops of lines)
+ // 1 ABOVE2 20
+ // 2 ABOVE1 20
+ // 3 UPPER 15
+ // 4 LOWER 25
+ // 5 BELOW1 20
+ // 6 BELOW2 20
+ // Line 4 is all in caps (regular caps), so the spacing between line 3
+ // and line 4 (looking at the tops) is smaller than normal, and the
+ // spacing between line 4 and line 5 is larger than normal, but the
+ // two of them add to twice the normal spacing.
+ // The following if has to accept unequal spacings 3 times to pass the
+ // blip (20/15, 15/25 and 25/20)
+ // When the blip is in the middle, OKSpacingBlip tests that one of
+ // ABOVE1 and BELOW1 matches the median.
+ // The first time, everything is shifted down 1, so we present
+ // OKSpacingBlip with neighbourhood+1 and check that PN_UPPER is median.
+ // The last time, everything is shifted up 1, so we present OKSpacingBlip
+ // with neighbourhood-1 and check that PN_LOWER matches the median.
+ if (neighbourhood[PN_LOWER] == nullptr ||
+ (!neighbourhood[PN_UPPER]->SpacingsEqual(*neighbourhood[PN_LOWER],
+ resolution) &&
+ (neighbourhood[PN_UPPER] == nullptr ||
+ neighbourhood[PN_LOWER] == nullptr ||
+ !OKSpacingBlip(resolution, median_space, neighbourhood, 0)) &&
+ (neighbourhood[PN_UPPER - 1] == nullptr ||
+ neighbourhood[PN_LOWER - 1] == nullptr ||
+ !OKSpacingBlip(resolution, median_space, neighbourhood, -1) ||
+ !neighbourhood[PN_LOWER]->SpacingEqual(median_space, resolution)) &&
+ (neighbourhood[PN_UPPER + 1] == nullptr ||
+ neighbourhood[PN_LOWER + 1] == nullptr ||
+ !OKSpacingBlip(resolution, median_space, neighbourhood, 1) ||
+ !neighbourhood[PN_UPPER]->SpacingEqual(median_space, resolution)))) {
+ // The group has ended. PN_UPPER is the last member.
+ // Compute the mean spacing over the group.
+ ColPartition_IT sum_it(start_it);
+ ColPartition* last_part = neighbourhood[PN_UPPER];
+ double total_bottom = 0.0;
+ double total_top = 0.0;
+ int total_count = 0;
+ ColPartition* upper = sum_it.data();
+ // We do not process last_part, as its spacing is different.
+ while (upper != last_part) {
+ total_bottom += upper->bottom_spacing();
+ total_top += upper->top_spacing();
+ ++total_count;
+ sum_it.forward();
+ upper = sum_it.data();
+ }
+ if (total_count > 0) {
+ // There were at least 2 lines, so set them all to the mean.
+ int top_spacing = static_cast<int>(total_top / total_count + 0.5);
+ int bottom_spacing = static_cast<int>(total_bottom / total_count + 0.5);
+ if (textord_debug_tabfind) {
+ tprintf("Spacing run ended. Cause:");
+ if (neighbourhood[PN_LOWER] == nullptr) {
+ tprintf("No more lines\n");
+ } else {
+ tprintf("Spacing change. Spacings:\n");
+ for (int i = 0; i < PN_COUNT; ++i) {
+ if (neighbourhood[i] == nullptr) {
+ tprintf("NULL");
+ if (i > 0 && neighbourhood[i - 1] != nullptr) {
+ if (neighbourhood[i - 1]->SingletonPartner(false) != nullptr) {
+ tprintf(" Lower partner:");
+ neighbourhood[i - 1]->SingletonPartner(false)->Print();
+ } else {
+ tprintf(" nullptr lower partner:\n");
+ }
+ } else {
+ tprintf("\n");
+ }
+ } else {
+ tprintf("Top = %d, bottom = %d\n",
+ neighbourhood[i]->top_spacing(),
+ neighbourhood[i]->bottom_spacing());
+ }
+ }
+ }
+ tprintf("Mean spacing = %d/%d\n", top_spacing, bottom_spacing);
+ }
+ sum_it = start_it;
+ upper = sum_it.data();
+ while (upper != last_part) {
+ upper->set_top_spacing(top_spacing);
+ upper->set_bottom_spacing(bottom_spacing);
+ if (textord_debug_tabfind) {
+ tprintf("Setting mean on:");
+ upper->Print();
+ }
+ sum_it.forward();
+ upper = sum_it.data();
+ }
+ }
+ // PN_LOWER starts the next group and end_it is the next start_it.
+ start_it = end_it;
+ // Recalculate the median spacing to maximize the chances of detecting
+ // spacing blips.
+ median_space = MedianSpacing(page_height, end_it);
+ }
+ // Shuffle pointers.
+ for (int j = 1; j < PN_COUNT; ++j) {
+ neighbourhood[j - 1] = neighbourhood[j];
+ }
+ if (it.cycled_list()) {
+ neighbourhood[PN_COUNT - 1] = nullptr;
+ } else {
+ neighbourhood[PN_COUNT - 1] = it.data();
+ it.forward();
+ }
+ end_it.forward();
+ }
+}
+
+// Returns true if the parts array of pointers to partitions matches the
+// condition for a spacing blip. See SmoothSpacings for what this means
+// and how it is used.
+bool ColPartition::OKSpacingBlip(int resolution, int median_spacing,
+ ColPartition** parts, int offset) {
+ // The blip is OK if upper and lower sum to an OK value and at least
+ // one of above1 and below1 is equal to the median.
+ parts += offset;
+ return parts[PN_UPPER]->SummedSpacingOK(*parts[PN_LOWER],
+ median_spacing, resolution) &&
+ ((parts[PN_ABOVE1] != nullptr &&
+ parts[PN_ABOVE1]->SpacingEqual(median_spacing, resolution)) ||
+ (parts[PN_BELOW1] != nullptr &&
+ parts[PN_BELOW1]->SpacingEqual(median_spacing, resolution)));
+}
+
+// Returns true if both the top and bottom spacings of this match the given
+// spacing to within suitable margins dictated by the image resolution.
+bool ColPartition::SpacingEqual(int spacing, int resolution) const {
+ int bottom_error = BottomSpacingMargin(resolution);
+ int top_error = TopSpacingMargin(resolution);
+ return NearlyEqual(bottom_spacing_, spacing, bottom_error) &&
+ NearlyEqual(top_spacing_, spacing, top_error);
+}
+
+// Returns true if both the top and bottom spacings of this and other
+// match to within suitable margins dictated by the image resolution.
+bool ColPartition::SpacingsEqual(const ColPartition& other,
+ int resolution) const {
+ int bottom_error = std::max(BottomSpacingMargin(resolution),
+ other.BottomSpacingMargin(resolution));
+ int top_error = std::max(TopSpacingMargin(resolution),
+ other.TopSpacingMargin(resolution));
+ return NearlyEqual(bottom_spacing_, other.bottom_spacing_, bottom_error) &&
+ (NearlyEqual(top_spacing_, other.top_spacing_, top_error) ||
+ NearlyEqual(top_spacing_ + other.top_spacing_, bottom_spacing_ * 2,
+ bottom_error));
+}
+
+// Returns true if the sum spacing of this and other match the given
+// spacing (or twice the given spacing) to within a suitable margin dictated
+// by the image resolution.
+bool ColPartition::SummedSpacingOK(const ColPartition& other,
+ int spacing, int resolution) const {
+ int bottom_error = std::max(BottomSpacingMargin(resolution),
+ other.BottomSpacingMargin(resolution));
+ int top_error = std::max(TopSpacingMargin(resolution),
+ other.TopSpacingMargin(resolution));
+ int bottom_total = bottom_spacing_ + other.bottom_spacing_;
+ int top_total = top_spacing_ + other.top_spacing_;
+ return (NearlyEqual(spacing, bottom_total, bottom_error) &&
+ NearlyEqual(spacing, top_total, top_error)) ||
+ (NearlyEqual(spacing * 2, bottom_total, bottom_error) &&
+ NearlyEqual(spacing * 2, top_total, top_error));
+}
+
+// Returns a suitable spacing margin that can be applied to bottoms of
+// text lines, based on the resolution and the stored side_step_.
+int ColPartition::BottomSpacingMargin(int resolution) const {
+ return static_cast<int>(kMaxSpacingDrift * resolution + 0.5) + side_step_;
+}
+
+// Returns a suitable spacing margin that can be applied to tops of
+// text lines, based on the resolution and the stored side_step_.
+int ColPartition::TopSpacingMargin(int resolution) const {
+ return static_cast<int>(kMaxTopSpacingFraction * median_height_ + 0.5) +
+ BottomSpacingMargin(resolution);
+}
+
+// Returns true if the median text sizes of this and other agree to within
+// a reasonable multiplicative factor.
+bool ColPartition::SizesSimilar(const ColPartition& other) const {
+ return median_height_ <= other.median_height_ * kMaxSizeRatio &&
+ other.median_height_ <= median_height_ * kMaxSizeRatio;
+}
+
+// Helper updates margin_left and margin_right, being the bounds of the left
+// margin of part of a block. Returns false and does not update the bounds if
+// this partition has a disjoint margin with the established margin.
+static bool UpdateLeftMargin(const ColPartition& part,
+ int* margin_left, int* margin_right) {
+ const TBOX& part_box = part.bounding_box();
+ int top = part_box.top();
+ int bottom = part_box.bottom();
+ int tl_key = part.SortKey(part.left_margin(), top);
+ int tr_key = part.SortKey(part_box.left(), top);
+ int bl_key = part.SortKey(part.left_margin(), bottom);
+ int br_key = part.SortKey(part_box.left(), bottom);
+ int left_key = std::max(tl_key, bl_key);
+ int right_key = std::min(tr_key, br_key);
+ if (left_key <= *margin_right && right_key >= *margin_left) {
+ // This part is good - let's keep it.
+ *margin_right = std::min(*margin_right, right_key);
+ *margin_left = std::max(*margin_left, left_key);
+ return true;
+ }
+ return false;
+}
+
+// Computes and returns in start, end a line segment formed from a
+// forwards-iterated group of left edges of partitions that satisfy the
+// condition that the intersection of the left margins is non-empty, ie the
+// rightmost left margin is to the left of the leftmost left bounding box edge.
+// On return the iterator is set to the start of the next run.
+void ColPartition::LeftEdgeRun(ColPartition_IT* part_it,
+ ICOORD* start, ICOORD* end) {
+ ColPartition* part = part_it->data();
+ ColPartition* start_part = part;
+ int start_y = part->bounding_box_.top();
+ if (!part_it->at_first()) {
+ int prev_bottom = part_it->data_relative(-1)->bounding_box_.bottom();
+ if (prev_bottom < start_y)
+ start_y = prev_bottom;
+ else if (prev_bottom > start_y)
+ start_y = (start_y + prev_bottom) / 2;
+ }
+ int end_y = part->bounding_box_.bottom();
+ int margin_right = INT32_MAX;
+ int margin_left = -INT32_MAX;
+ UpdateLeftMargin(*part, &margin_left, &margin_right);
+ do {
+ part_it->forward();
+ part = part_it->data();
+ } while (!part_it->at_first() &&
+ UpdateLeftMargin(*part, &margin_left, &margin_right));
+ // The run ended. If we were pushed inwards, compute the next run and
+ // extend it backwards into the run we just calculated to find the end of
+ // this run that provides a tight box.
+ int next_margin_right = INT32_MAX;
+ int next_margin_left = -INT32_MAX;
+ UpdateLeftMargin(*part, &next_margin_left, &next_margin_right);
+ if (next_margin_left > margin_right) {
+ ColPartition_IT next_it(*part_it);
+ do {
+ next_it.forward();
+ part = next_it.data();
+ } while (!next_it.at_first() &&
+ UpdateLeftMargin(*part, &next_margin_left, &next_margin_right));
+ // Now extend the next run backwards into the original run to get the
+ // tightest fit.
+ do {
+ part_it->backward();
+ part = part_it->data();
+ } while (part != start_part &&
+ UpdateLeftMargin(*part, &next_margin_left, &next_margin_right));
+ part_it->forward();
+ }
+ // Now calculate the end_y.
+ part = part_it->data_relative(-1);
+ end_y = part->bounding_box_.bottom();
+ if (!part_it->at_first() && part_it->data()->bounding_box_.top() < end_y)
+ end_y = (end_y + part_it->data()->bounding_box_.top()) / 2;
+ start->set_y(start_y);
+ start->set_x(part->XAtY(margin_right, start_y));
+ end->set_y(end_y);
+ end->set_x(part->XAtY(margin_right, end_y));
+ if (textord_debug_tabfind && !part_it->at_first())
+ tprintf("Left run from y=%d to %d terminated with sum %d-%d, new %d-%d\n",
+ start_y, end_y, part->XAtY(margin_left, end_y),
+ end->x(), part->left_margin_, part->bounding_box_.left());
+}
+
+// Helper updates margin_left and margin_right, being the bounds of the right
+// margin of part of a block. Returns false and does not update the bounds if
+// this partition has a disjoint margin with the established margin.
+static bool UpdateRightMargin(const ColPartition& part,
+ int* margin_left, int* margin_right) {
+ const TBOX& part_box = part.bounding_box();
+ int top = part_box.top();
+ int bottom = part_box.bottom();
+ int tl_key = part.SortKey(part_box.right(), top);
+ int tr_key = part.SortKey(part.right_margin(), top);
+ int bl_key = part.SortKey(part_box.right(), bottom);
+ int br_key = part.SortKey(part.right_margin(), bottom);
+ int left_key = std::max(tl_key, bl_key);
+ int right_key = std::min(tr_key, br_key);
+ if (left_key <= *margin_right && right_key >= *margin_left) {
+ // This part is good - let's keep it.
+ *margin_right = std::min(*margin_right, right_key);
+ *margin_left = std::max(*margin_left, left_key);
+ return true;
+ }
+ return false;
+}
+
+// Computes and returns in start, end a line segment formed from a
+// backwards-iterated group of right edges of partitions that satisfy the
+// condition that the intersection of the right margins is non-empty, ie the
+// leftmost right margin is to the right of the rightmost right bounding box
+// edge.
+// On return the iterator is set to the start of the next run.
+void ColPartition::RightEdgeRun(ColPartition_IT* part_it,
+ ICOORD* start, ICOORD* end) {
+ ColPartition* part = part_it->data();
+ ColPartition* start_part = part;
+ int start_y = part->bounding_box_.bottom();
+ if (!part_it->at_last()) {
+ int next_y = part_it->data_relative(1)->bounding_box_.top();
+ if (next_y > start_y)
+ start_y = next_y;
+ else if (next_y < start_y)
+ start_y = (start_y + next_y) / 2;
+ }
+ int end_y = part->bounding_box_.top();
+ int margin_right = INT32_MAX;
+ int margin_left = -INT32_MAX;
+ UpdateRightMargin(*part, &margin_left, &margin_right);
+ do {
+ part_it->backward();
+ part = part_it->data();
+ } while (!part_it->at_last() &&
+ UpdateRightMargin(*part, &margin_left, &margin_right));
+ // The run ended. If we were pushed inwards, compute the next run and
+ // extend it backwards to find the end of this run for a tight box.
+ int next_margin_right = INT32_MAX;
+ int next_margin_left = -INT32_MAX;
+ UpdateRightMargin(*part, &next_margin_left, &next_margin_right);
+ if (next_margin_right < margin_left) {
+ ColPartition_IT next_it(*part_it);
+ do {
+ next_it.backward();
+ part = next_it.data();
+ } while (!next_it.at_last() &&
+ UpdateRightMargin(*part, &next_margin_left,
+ &next_margin_right));
+ // Now extend the next run forwards into the original run to get the
+ // tightest fit.
+ do {
+ part_it->forward();
+ part = part_it->data();
+ } while (part != start_part &&
+ UpdateRightMargin(*part, &next_margin_left,
+ &next_margin_right));
+ part_it->backward();
+ }
+ // Now calculate the end_y.
+ part = part_it->data_relative(1);
+ end_y = part->bounding_box().top();
+ if (!part_it->at_last() &&
+ part_it->data()->bounding_box_.bottom() > end_y)
+ end_y = (end_y + part_it->data()->bounding_box_.bottom()) / 2;
+ start->set_y(start_y);
+ start->set_x(part->XAtY(margin_left, start_y));
+ end->set_y(end_y);
+ end->set_x(part->XAtY(margin_left, end_y));
+ if (textord_debug_tabfind && !part_it->at_last())
+ tprintf("Right run from y=%d to %d terminated with sum %d-%d, new %d-%d\n",
+ start_y, end_y, end->x(), part->XAtY(margin_right, end_y),
+ part->bounding_box_.right(), part->right_margin_);
+}
+
+} // namespace tesseract.
diff --git a/tesseract/src/textord/colpartition.h b/tesseract/src/textord/colpartition.h
new file mode 100644
index 00000000..5c299b3e
--- /dev/null
+++ b/tesseract/src/textord/colpartition.h
@@ -0,0 +1,927 @@
+///////////////////////////////////////////////////////////////////////
+// File: colpartition.h
+// Description: Class to hold partitions of the page that correspond
+// roughly to text lines.
+// Author: Ray Smith
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TEXTORD_COLPARTITION_H_
+#define TESSERACT_TEXTORD_COLPARTITION_H_
+
+#include "bbgrid.h"
+#include "blobbox.h" // For BlobRegionType.
+#include "ocrblock.h"
+#include "rect.h" // For TBOX.
+#include "scrollview.h"
+#include "tabfind.h" // For WidthCallback.
+#include "tabvector.h" // For BLOBNBOX_CLIST.
+
+#include <algorithm>
+
+namespace tesseract {
+
+// Number of colors in the color1, color2 arrays.
+const int kRGBRMSColors = 4;
+
+class ColPartition;
+class ColPartitionSet;
+class ColPartitionGrid;
+class WorkingPartSet;
+class WorkingPartSet_LIST;
+
+// An enum to indicate how a partition sits on the columns.
+// The order of flowing/heading/pullout must be kept consistent with
+// PolyBlockType.
+enum ColumnSpanningType {
+ CST_NOISE, // Strictly between columns.
+ CST_FLOWING, // Strictly within a single column.
+ CST_HEADING, // Spans multiple columns.
+ CST_PULLOUT, // Touches multiple columns, but doesn't span them.
+ CST_COUNT // Number of entries.
+};
+
+ELIST2IZEH(ColPartition)
+CLISTIZEH(ColPartition)
+
+/**
+ * ColPartition is a partition of a horizontal slice of the page.
+ * It starts out as a collection of blobs at a particular y-coord in the grid,
+ * but ends up (after merging and uniquing) as an approximate text line.
+ * ColPartitions are also used to hold a partitioning of the page into
+ * columns, each representing one column. Although a ColPartition applies
+ * to a given y-coordinate range, eventually, a ColPartitionSet of ColPartitions
+ * emerges, which represents the columns over a wide y-coordinate range.
+ */
+class TESS_API ColPartition : public ELIST2_LINK {
+ public:
+ // This empty constructor is here only so that the class can be ELISTIZED.
+ // TODO(rays) change deep_copy in elst.h line 955 to take a callback copier
+ // and eliminate CLASSNAME##_copier.
+ ColPartition() = default;
+
+ /**
+ * @param blob_type is the blob_region_type_ of the blobs in this partition.
+ * @param vertical is the direction of logical vertical on the possibly skewed image.
+ */
+ ColPartition(BlobRegionType blob_type, const ICOORD& vertical);
+ /**
+ * Constructs a fake ColPartition with no BLOBNBOXes to represent a
+ * horizontal or vertical line, given a type and a bounding box.
+ */
+ static ColPartition* MakeLinePartition(BlobRegionType blob_type,
+ const ICOORD& vertical,
+ int left, int bottom,
+ int right, int top);
+
+ // Constructs and returns a fake ColPartition with a single fake BLOBNBOX,
+ // all made from a single TBOX.
+ // WARNING: Despite being on C_LISTs, the BLOBNBOX owns the C_BLOB and
+ // the ColPartition owns the BLOBNBOX!!!
+ // Call DeleteBoxes before deleting the ColPartition.
+ static ColPartition* FakePartition(const TBOX& box,
+ PolyBlockType block_type,
+ BlobRegionType blob_type,
+ BlobTextFlowType flow);
+
+ // Constructs and returns a ColPartition with the given real BLOBNBOX,
+ // and sets it up to be a "big" partition (single-blob partition bigger
+ // than the surrounding text that may be a dropcap, two or more vertically
+ // touching characters, or some graphic element.
+ // If the given list is not nullptr, the partition is also added to the list.
+ static ColPartition* MakeBigPartition(BLOBNBOX* box,
+ ColPartition_LIST* big_part_list);
+
+ ~ColPartition();
+
+ // Simple accessors.
+ const TBOX& bounding_box() const {
+ return bounding_box_;
+ }
+ int left_margin() const {
+ return left_margin_;
+ }
+ void set_left_margin(int margin) {
+ left_margin_ = margin;
+ }
+ int right_margin() const {
+ return right_margin_;
+ }
+ void set_right_margin(int margin) {
+ right_margin_ = margin;
+ }
+ int median_top() const {
+ return median_top_;
+ }
+ int median_bottom() const {
+ return median_bottom_;
+ }
+ int median_left() const {
+ return median_left_;
+ }
+ int median_right() const {
+ return median_right_;
+ }
+ int median_height() const {
+ return median_height_;
+ }
+ void set_median_height(int height) {
+ median_height_ = height;
+ }
+ int median_width() const {
+ return median_width_;
+ }
+ void set_median_width(int width) {
+ median_width_ = width;
+ }
+ BlobRegionType blob_type() const {
+ return blob_type_;
+ }
+ void set_blob_type(BlobRegionType t) {
+ blob_type_ = t;
+ }
+ BlobTextFlowType flow() const {
+ return flow_;
+ }
+ void set_flow(BlobTextFlowType f) {
+ flow_ = f;
+ }
+ int good_blob_score() const {
+ return good_blob_score_;
+ }
+ bool good_width() const {
+ return good_width_;
+ }
+ bool good_column() const {
+ return good_column_;
+ }
+ bool left_key_tab() const {
+ return left_key_tab_;
+ }
+ int left_key() const {
+ return left_key_;
+ }
+ bool right_key_tab() const {
+ return right_key_tab_;
+ }
+ int right_key() const {
+ return right_key_;
+ }
+ PolyBlockType type() const {
+ return type_;
+ }
+ void set_type(PolyBlockType t) {
+ type_ = t;
+ }
+ BLOBNBOX_CLIST* boxes() {
+ return &boxes_;
+ }
+ int boxes_count() const {
+ return boxes_.length();
+ }
+ void set_vertical(const ICOORD& v) {
+ vertical_ = v;
+ }
+ ColPartition_CLIST* upper_partners() {
+ return &upper_partners_;
+ }
+ ColPartition_CLIST* lower_partners() {
+ return &lower_partners_;
+ }
+ void set_working_set(WorkingPartSet* working_set) {
+ working_set_ = working_set;
+ }
+ bool block_owned() const {
+ return block_owned_;
+ }
+ void set_block_owned(bool owned) {
+ block_owned_ = owned;
+ }
+ bool desperately_merged() const {
+ return desperately_merged_;
+ }
+ ColPartitionSet* column_set() const {
+ return column_set_;
+ }
+ void set_side_step(int step) {
+ side_step_ = step;
+ }
+ int bottom_spacing() const {
+ return bottom_spacing_;
+ }
+ void set_bottom_spacing(int spacing) {
+ bottom_spacing_ = spacing;
+ }
+ int top_spacing() const {
+ return top_spacing_;
+ }
+ void set_top_spacing(int spacing) {
+ top_spacing_ = spacing;
+ }
+
+ void set_table_type() {
+ if (type_ != PT_TABLE) {
+ type_before_table_ = type_;
+ type_ = PT_TABLE;
+ }
+ }
+ void clear_table_type() {
+ if (type_ == PT_TABLE)
+ type_ = type_before_table_;
+ }
+ bool inside_table_column() {
+ return inside_table_column_;
+ }
+ void set_inside_table_column(bool val) {
+ inside_table_column_ = val;
+ }
+ ColPartition* nearest_neighbor_above() const {
+ return nearest_neighbor_above_;
+ }
+ void set_nearest_neighbor_above(ColPartition* part) {
+ nearest_neighbor_above_ = part;
+ }
+ ColPartition* nearest_neighbor_below() const {
+ return nearest_neighbor_below_;
+ }
+ void set_nearest_neighbor_below(ColPartition* part) {
+ nearest_neighbor_below_ = part;
+ }
+ int space_above() const {
+ return space_above_;
+ }
+ void set_space_above(int space) {
+ space_above_ = space;
+ }
+ int space_below() const {
+ return space_below_;
+ }
+ void set_space_below(int space) {
+ space_below_ = space;
+ }
+ int space_to_left() const {
+ return space_to_left_;
+ }
+ void set_space_to_left(int space) {
+ space_to_left_ = space;
+ }
+ int space_to_right() const {
+ return space_to_right_;
+ }
+ void set_space_to_right(int space) {
+ space_to_right_ = space;
+ }
+ uint8_t* color1() {
+ return color1_;
+ }
+ uint8_t* color2() {
+ return color2_;
+ }
+ bool owns_blobs() const {
+ return owns_blobs_;
+ }
+ void set_owns_blobs(bool owns_blobs) {
+ // Do NOT change ownership flag when there are blobs in the list.
+ // Immediately set the ownership flag when creating copies.
+ ASSERT_HOST(boxes_.empty());
+ owns_blobs_ = owns_blobs;
+ }
+
+ // Inline quasi-accessors that require some computation.
+
+ // Returns the middle y-coord of the bounding box.
+ int MidY() const {
+ return (bounding_box_.top() + bounding_box_.bottom()) / 2;
+ }
+ // Returns the middle y-coord of the median top and bottom.
+ int MedianY() const {
+ return (median_top_ + median_bottom_) / 2;
+ }
+ // Returns the middle x-coord of the bounding box.
+ int MidX() const {
+ return (bounding_box_.left() + bounding_box_.right()) / 2;
+ }
+ // Returns the sort key at any given x,y.
+ int SortKey(int x, int y) const {
+ return TabVector::SortKey(vertical_, x, y);
+ }
+ // Returns the x corresponding to the sortkey, y pair.
+ int XAtY(int sort_key, int y) const {
+ return TabVector::XAtY(vertical_, sort_key, y);
+ }
+ // Returns the x difference between the two sort keys.
+ int KeyWidth(int left_key, int right_key) const {
+ return (right_key - left_key) / vertical_.y();
+ }
+ // Returns the column width between the left and right keys.
+ int ColumnWidth() const {
+ return KeyWidth(left_key_, right_key_);
+ }
+ // Returns the sort key of the box left edge.
+ int BoxLeftKey() const {
+ return SortKey(bounding_box_.left(), MidY());
+ }
+ // Returns the sort key of the box right edge.
+ int BoxRightKey() const {
+ return SortKey(bounding_box_.right(), MidY());
+ }
+ // Returns the left edge at the given y, using the sort key.
+ int LeftAtY(int y) const {
+ return XAtY(left_key_, y);
+ }
+ // Returns the right edge at the given y, using the sort key.
+ int RightAtY(int y) const {
+ return XAtY(right_key_, y);
+ }
+ // Returns true if the right edge of this is to the left of the right
+ // edge of other.
+ bool IsLeftOf(const ColPartition& other) const {
+ return bounding_box_.right() < other.bounding_box_.right();
+ }
+ // Returns true if the partition contains the given x coordinate at the y.
+ bool ColumnContains(int x, int y) const {
+ return LeftAtY(y) - 1 <= x && x <= RightAtY(y) + 1;
+ }
+ // Returns true if there are no blobs in the list.
+ bool IsEmpty() const {
+ return boxes_.empty();
+ }
+ // Returns true if there is a single blob in the list.
+ bool IsSingleton() const {
+ return boxes_.singleton();
+ }
+ // Returns true if this and other overlap horizontally by bounding box.
+ bool HOverlaps(const ColPartition& other) const {
+ return bounding_box_.x_overlap(other.bounding_box_);
+ }
+ // Returns true if this and other's bounding boxes overlap vertically.
+ // TODO(rays) Make HOverlaps and VOverlaps truly symmetric.
+ bool VOverlaps(const ColPartition& other) const {
+ return bounding_box_.y_gap(other.bounding_box_) < 0;
+ }
+ // Returns the vertical overlap (by median) of this and other.
+ // WARNING! Only makes sense on horizontal partitions!
+ int VCoreOverlap(const ColPartition& other) const {
+ if (median_bottom_ == INT32_MAX || other.median_bottom_ == INT32_MAX) {
+ return 0;
+ }
+ return std::min(median_top_, other.median_top_) -
+ std::max(median_bottom_, other.median_bottom_);
+ }
+ // Returns the horizontal overlap (by median) of this and other.
+ // WARNING! Only makes sense on vertical partitions!
+ int HCoreOverlap(const ColPartition& other) const {
+ return std::min(median_right_, other.median_right_) -
+ std::max(median_left_, other.median_left_);
+ }
+ // Returns true if this and other overlap significantly vertically.
+ // WARNING! Only makes sense on horizontal partitions!
+ bool VSignificantCoreOverlap(const ColPartition& other) const {
+ if (median_bottom_ == INT32_MAX || other.median_bottom_ == INT32_MAX) {
+ return false;
+ }
+ int overlap = VCoreOverlap(other);
+ int height = std::min(median_top_ - median_bottom_,
+ other.median_top_ - other.median_bottom_);
+ return overlap * 3 > height;
+ }
+ // Returns true if this and other can be combined without putting a
+ // horizontal step in either left or right edge of the resulting block.
+ bool WithinSameMargins(const ColPartition& other) const {
+ return left_margin_ <= other.bounding_box_.left() &&
+ bounding_box_.left() >= other.left_margin_ &&
+ bounding_box_.right() <= other.right_margin_ &&
+ right_margin_ >= other.bounding_box_.right();
+ }
+ // Returns true if the region types (aligned_text_) match.
+ // Lines never match anything, as they should never be merged or chained.
+ bool TypesMatch(const ColPartition& other) const {
+ return TypesMatch(blob_type_, other.blob_type_);
+ }
+ static bool TypesMatch(BlobRegionType type1, BlobRegionType type2) {
+ return (type1 == type2 || type1 == BRT_UNKNOWN || type2 == BRT_UNKNOWN) &&
+ !BLOBNBOX::IsLineType(type1) && !BLOBNBOX::IsLineType(type2);
+ }
+
+ // Returns true if the types are similar to each other.
+ static bool TypesSimilar(PolyBlockType type1, PolyBlockType type2) {
+ return (type1 == type2 ||
+ (type1 == PT_FLOWING_TEXT && type2 == PT_INLINE_EQUATION) ||
+ (type2 == PT_FLOWING_TEXT && type1 == PT_INLINE_EQUATION));
+ }
+
+ // Returns true if partitions is of horizontal line type
+ bool IsLineType() const {
+ return PTIsLineType(type_);
+ }
+ // Returns true if partitions is of image type
+ bool IsImageType() const {
+ return PTIsImageType(type_);
+ }
+ // Returns true if partitions is of text type
+ bool IsTextType() const {
+ return PTIsTextType(type_);
+ }
+ // Returns true if partitions is of pullout(inter-column) type
+ bool IsPulloutType() const {
+ return PTIsPulloutType(type_);
+ }
+ // Returns true if the partition is of an exclusively vertical type.
+ bool IsVerticalType() const {
+ return blob_type_ == BRT_VERT_TEXT || blob_type_ == BRT_VLINE;
+ }
+ // Returns true if the partition is of a definite horizontal type.
+ bool IsHorizontalType() const {
+ return blob_type_ == BRT_TEXT || blob_type_ == BRT_HLINE;
+ }
+ // Returns true is the partition is of a type that cannot be merged.
+ bool IsUnMergeableType() const {
+ return BLOBNBOX::UnMergeableType(blob_type_) || type_ == PT_NOISE;
+ }
+ // Returns true if this partition is a vertical line
+ // TODO(nbeato): Use PartitionType enum when Ray's code is submitted.
+ bool IsVerticalLine() const {
+ return IsVerticalType() && IsLineType();
+ }
+ // Returns true if this partition is a horizontal line
+ // TODO(nbeato): Use PartitionType enum when Ray's code is submitted.
+ bool IsHorizontalLine() const {
+ return IsHorizontalType() && IsLineType();
+ }
+
+ // Adds the given box to the partition, updating the partition bounds.
+ // The list of boxes in the partition is updated, ensuring that no box is
+ // recorded twice, and the boxes are kept in increasing left position.
+ void AddBox(BLOBNBOX* box);
+
+ // Removes the given box from the partition, updating the bounds.
+ void RemoveBox(BLOBNBOX* box);
+
+ // Returns the tallest box in the partition, as measured perpendicular to the
+ // presumed flow of text.
+ BLOBNBOX* BiggestBox();
+
+ // Returns the bounding box excluding the given box.
+ TBOX BoundsWithoutBox(BLOBNBOX* box);
+
+ // Claims the boxes in the boxes_list by marking them with a this owner
+ // pointer.
+ void ClaimBoxes();
+
+ // nullptr the owner of the blobs in this partition, so they can be deleted
+ // independently of the ColPartition.
+ void DisownBoxes();
+ // nullptr the owner of the blobs in this partition that are owned by this
+ // partition, so they can be deleted independently of the ColPartition.
+ // Any blobs that are not owned by this partition get to keep their owner
+ // without an assert failure.
+ void DisownBoxesNoAssert();
+ // Nulls the owner of the blobs in this partition that are owned by this
+ // partition and not leader blobs, removing them from the boxes_ list, thus
+ // turning this partition back to a leader partition if it contains a leader,
+ // or otherwise leaving it empty. Returns true if any boxes remain.
+ bool ReleaseNonLeaderBoxes();
+
+ // Delete the boxes that this partition owns.
+ void DeleteBoxes();
+
+ // Reflects the partition in the y-axis, assuming that its blobs have
+ // already been done. Corrects only a limited part of the members, since
+ // this function is assumed to be used shortly after initial creation, which
+ // is before a lot of the members are used.
+ void ReflectInYAxis();
+
+ // Returns true if this is a legal partition - meaning that the conditions
+ // left_margin <= bounding_box left
+ // left_key <= bounding box left key
+ // bounding box left <= bounding box right
+ // and likewise for right margin and key
+ // are all met.
+ bool IsLegal();
+
+ // Returns true if the left and right edges are approximately equal.
+ bool MatchingColumns(const ColPartition& other) const;
+
+ // Returns true if the colors match for two text partitions.
+ bool MatchingTextColor(const ColPartition& other) const;
+
+ // Returns true if the sizes match for two text partitions,
+ // taking orientation into account
+ bool MatchingSizes(const ColPartition& other) const;
+
+ // Returns true if there is no tabstop violation in merging this and other.
+ bool ConfirmNoTabViolation(const ColPartition& other) const;
+
+ // Returns true if other has a similar stroke width to this.
+ bool MatchingStrokeWidth(const ColPartition& other,
+ double fractional_tolerance,
+ double constant_tolerance) const;
+ // Returns true if candidate is an acceptable diacritic base char merge
+ // with this as the diacritic.
+ bool OKDiacriticMerge(const ColPartition& candidate, bool debug) const;
+
+ // Sets the sort key using either the tab vector, or the bounding box if
+ // the tab vector is nullptr. If the tab_vector lies inside the bounding_box,
+ // use the edge of the box as a key any way.
+ void SetLeftTab(const TabVector* tab_vector);
+ void SetRightTab(const TabVector* tab_vector);
+
+ // Copies the left/right tab from the src partition, but if take_box is
+ // true, copies the box instead and uses that as a key.
+ void CopyLeftTab(const ColPartition& src, bool take_box);
+ void CopyRightTab(const ColPartition& src, bool take_box);
+
+ // Returns the left rule line x coord of the leftmost blob.
+ int LeftBlobRule() const;
+ // Returns the right rule line x coord of the rightmost blob.
+ int RightBlobRule() const;
+
+ // Returns the density value for a particular BlobSpecialTextType.
+ float SpecialBlobsDensity(const BlobSpecialTextType type) const;
+ // Returns the number of blobs for a particular BlobSpecialTextType.
+ int SpecialBlobsCount(const BlobSpecialTextType type);
+ // Set the density value for a particular BlobSpecialTextType, should ONLY be
+ // used for debugging or testing. In production code, use
+ // ComputeSpecialBlobsDensity instead.
+ void SetSpecialBlobsDensity(
+ const BlobSpecialTextType type, const float density);
+ // Compute the SpecialTextType density of blobs, where we assume
+ // that the SpecialTextType in the boxes_ has been set.
+ void ComputeSpecialBlobsDensity();
+
+ // Add a partner above if upper, otherwise below.
+ // Add them uniquely and keep the list sorted by box left.
+ // Partnerships are added symmetrically to partner and this.
+ void AddPartner(bool upper, ColPartition* partner);
+ // Removes the partner from this, but does not remove this from partner.
+ // This asymmetric removal is so as not to mess up the iterator that is
+ // working on partner's partner list.
+ void RemovePartner(bool upper, ColPartition* partner);
+ // Returns the partner if the given partner is a singleton, otherwise nullptr.
+ ColPartition* SingletonPartner(bool upper);
+
+ // Merge with the other partition and delete it.
+ void Absorb(ColPartition* other, WidthCallback cb);
+
+ // Returns true if the overlap between this and the merged pair of
+ // merge candidates is sufficiently trivial to be allowed.
+ // The merged box can graze the edge of this by the ok_box_overlap
+ // if that exceeds the margin to the median top and bottom.
+ bool OKMergeOverlap(const ColPartition& merge1, const ColPartition& merge2,
+ int ok_box_overlap, bool debug);
+
+ // Find the blob at which to split this to minimize the overlap with the
+ // given box. Returns the first blob to go in the second partition.
+ BLOBNBOX* OverlapSplitBlob(const TBOX& box);
+
+ // Split this partition keeping the first half in this and returning
+ // the second half.
+ // Splits by putting the split_blob and the blobs that follow
+ // in the second half, and the rest in the first half.
+ ColPartition* SplitAtBlob(BLOBNBOX* split_blob);
+
+ // Splits this partition at the given x coordinate, returning the right
+ // half and keeping the left half in this.
+ ColPartition* SplitAt(int split_x);
+
+ // Recalculates all the coordinate limits of the partition.
+ void ComputeLimits();
+
+ // Returns the number of boxes that overlap the given box.
+ int CountOverlappingBoxes(const TBOX& box);
+
+ // Computes and sets the type_, first_column_, last_column_ and column_set_.
+ // resolution refers to the ppi resolution of the image.
+ void SetPartitionType(int resolution, ColPartitionSet* columns);
+
+ // Returns the PartitionType from the current BlobRegionType and a column
+ // flow spanning type ColumnSpanningType, generated by
+ // ColPartitionSet::SpanningType, that indicates how the partition sits
+ // in the columns.
+ PolyBlockType PartitionType(ColumnSpanningType flow) const;
+
+ // Returns the first and last column touched by this partition.
+ // resolution refers to the ppi resolution of the image.
+ void ColumnRange(int resolution, ColPartitionSet* columns,
+ int* first_col, int* last_col);
+
+ // Sets the internal flags good_width_ and good_column_.
+ void SetColumnGoodness(WidthCallback cb);
+
+ // Determines whether the blobs in this partition mostly represent
+ // a leader (fixed pitch sequence) and sets the member blobs accordingly.
+ // Note that height is assumed to have been tested elsewhere, and that this
+ // function will find most fixed-pitch text as leader without a height filter.
+ // Leader detection is limited to sequences of identical width objects,
+ // such as .... or ----, so patterns, such as .-.-.-.-. will not be found.
+ bool MarkAsLeaderIfMonospaced();
+ // Given the result of TextlineProjection::EvaluateColPartition, (positive for
+ // horizontal text, negative for vertical text, and near zero for non-text),
+ // sets the blob_type_ and flow_ for this partition to indicate whether it
+ // is strongly or weakly vertical or horizontal text, or non-text.
+ void SetRegionAndFlowTypesFromProjectionValue(int value);
+
+ // Sets all blobs with the partition blob type and flow, but never overwrite
+ // leader blobs, as we need to be able to identify them later.
+ void SetBlobTypes();
+
+ // Returns true if a decent baseline can be fitted through the blobs.
+ // Works for both horizontal and vertical text.
+ bool HasGoodBaseline();
+
+ // Adds this ColPartition to a matching WorkingPartSet if one can be found,
+ // otherwise starts a new one in the appropriate column, ending the previous.
+ void AddToWorkingSet(const ICOORD& bleft, const ICOORD& tright,
+ int resolution, ColPartition_LIST* used_parts,
+ WorkingPartSet_LIST* working_set);
+
+ // From the given block_parts list, builds one or more BLOCKs and
+ // corresponding TO_BLOCKs, such that the line spacing is uniform in each.
+ // Created blocks are appended to the end of completed_blocks and to_blocks.
+ // The used partitions are put onto used_parts, as they may still be referred
+ // to in the partition grid. bleft, tright and resolution are the bounds
+ // and resolution of the original image.
+ static void LineSpacingBlocks(const ICOORD& bleft, const ICOORD& tright,
+ int resolution,
+ ColPartition_LIST* block_parts,
+ ColPartition_LIST* used_parts,
+ BLOCK_LIST* completed_blocks,
+ TO_BLOCK_LIST* to_blocks);
+ // Constructs a block from the given list of partitions.
+ // Arguments are as LineSpacingBlocks above.
+ static TO_BLOCK* MakeBlock(const ICOORD& bleft, const ICOORD& tright,
+ ColPartition_LIST* block_parts,
+ ColPartition_LIST* used_parts);
+
+ // Constructs a block from the given list of vertical text partitions.
+ // Currently only creates rectangular blocks.
+ static TO_BLOCK* MakeVerticalTextBlock(const ICOORD& bleft,
+ const ICOORD& tright,
+ ColPartition_LIST* block_parts,
+ ColPartition_LIST* used_parts);
+
+ // Makes a TO_ROW matching this and moves all the blobs to it, transferring
+ // ownership to to returned TO_ROW.
+ TO_ROW* MakeToRow();
+
+
+ // Returns a copy of everything except the list of boxes. The resulting
+ // ColPartition is only suitable for keeping in a column candidate list.
+ ColPartition* ShallowCopy() const;
+ // Returns a copy of everything with a shallow copy of the blobs.
+ // The blobs are still owned by their original parent, so they are
+ // treated as read-only.
+ ColPartition* CopyButDontOwnBlobs();
+
+ #ifndef GRAPHICS_DISABLED
+ // Provides a color for BBGrid to draw the rectangle.
+ ScrollView::Color BoxColor() const;
+ #endif // !GRAPHICS_DISABLED
+
+ // Prints debug information on this.
+ void Print() const;
+ // Prints debug information on the colors.
+ void PrintColors();
+
+ // Sets the types of all partitions in the run to be the max of the types.
+ void SmoothPartnerRun(int working_set_count);
+
+ // Cleans up the partners of the given type so that there is at most
+ // one partner. This makes block creation simpler.
+ // If get_desperate is true, goes to more desperate merge methods
+ // to merge flowing text before breaking partnerships.
+ void RefinePartners(PolyBlockType type, bool get_desperate,
+ ColPartitionGrid* grid);
+
+ // Returns true if this column partition is in the same column as
+ // part. This function will only work after the SetPartitionType function
+ // has been called on both column partitions. This is useful for
+ // doing a SideSearch when you want things in the same page column.
+ bool IsInSameColumnAs(const ColPartition& part) const;
+
+ // Sort function to sort by bounding box.
+ static int SortByBBox(const void* p1, const void* p2) {
+ const ColPartition* part1 = *static_cast<const ColPartition* const*>(p1);
+ const ColPartition* part2 = *static_cast<const ColPartition* const*>(p2);
+ int mid_y1 = part1->bounding_box_.y_middle();
+ int mid_y2 = part2->bounding_box_.y_middle();
+ if ((part2->bounding_box_.bottom() <= mid_y1 &&
+ mid_y1 <= part2->bounding_box_.top()) ||
+ (part1->bounding_box_.bottom() <= mid_y2 &&
+ mid_y2 <= part1->bounding_box_.top())) {
+ // Sort by increasing x.
+ return part1->bounding_box_.x_middle() - part2->bounding_box_.x_middle();
+ }
+ // Sort by decreasing y.
+ return mid_y2 - mid_y1;
+ }
+
+ // Sets the column bounds. Primarily used in testing.
+ void set_first_column(int column) {
+ first_column_ = column;
+ }
+ void set_last_column(int column) {
+ last_column_ = column;
+ }
+
+ private:
+ // Cleans up the partners above if upper is true, else below.
+ // If get_desperate is true, goes to more desperate merge methods
+ // to merge flowing text before breaking partnerships.
+ void RefinePartnersInternal(bool upper, bool get_desperate,
+ ColPartitionGrid* grid);
+ // Restricts the partners to only desirable types. For text and BRT_HLINE this
+ // means the same type_ , and for image types it means any image type.
+ void RefinePartnersByType(bool upper, ColPartition_CLIST* partners);
+ // Remove transitive partnerships: this<->a, and a<->b and this<->b.
+ // Gets rid of this<->b, leaving a clean chain.
+ // Also if we have this<->a and a<->this, then gets rid of this<->a, as
+ // this has multiple partners.
+ void RefinePartnerShortcuts(bool upper, ColPartition_CLIST* partners);
+ // If multiple text partners can be merged, then do so.
+ // If desperate is true, then an increase in overlap with the merge is
+ // allowed. If the overlap increases, then the desperately_merged_ flag
+ // is set, indicating that the textlines probably need to be regenerated
+ // by aggressive line fitting/splitting, as there are probably vertically
+ // joined blobs that cross textlines.
+ void RefineTextPartnersByMerge(bool upper, bool desperate,
+ ColPartition_CLIST* partners,
+ ColPartitionGrid* grid);
+ // Keep the partner with the biggest overlap.
+ void RefinePartnersByOverlap(bool upper, ColPartition_CLIST* partners);
+
+ // Return true if bbox belongs better in this than other.
+ bool ThisPartitionBetter(BLOBNBOX* bbox, const ColPartition& other);
+
+ // Smoothes the spacings in the list into groups of equal linespacing.
+ // resolution is the resolution of the original image, used as a basis
+ // for thresholds in change of spacing. page_height is in pixels.
+ static void SmoothSpacings(int resolution, int page_height,
+ ColPartition_LIST* parts);
+
+ // Returns true if the parts array of pointers to partitions matches the
+ // condition for a spacing blip. See SmoothSpacings for what this means
+ // and how it is used.
+ static bool OKSpacingBlip(int resolution, int median_spacing,
+ ColPartition** parts, int offset);
+
+ // Returns true if both the top and bottom spacings of this match the given
+ // spacing to within suitable margins dictated by the image resolution.
+ bool SpacingEqual(int spacing, int resolution) const;
+
+ // Returns true if both the top and bottom spacings of this and other
+ // match to within suitable margins dictated by the image resolution.
+ bool SpacingsEqual(const ColPartition& other, int resolution) const;
+
+ // Returns true if the sum spacing of this and other match the given
+ // spacing (or twice the given spacing) to within a suitable margin dictated
+ // by the image resolution.
+ bool SummedSpacingOK(const ColPartition& other,
+ int spacing, int resolution) const;
+
+ // Returns a suitable spacing margin that can be applied to bottoms of
+ // text lines, based on the resolution and the stored side_step_.
+ int BottomSpacingMargin(int resolution) const;
+
+ // Returns a suitable spacing margin that can be applied to tops of
+ // text lines, based on the resolution and the stored side_step_.
+ int TopSpacingMargin(int resolution) const;
+
+ // Returns true if the median text sizes of this and other agree to within
+ // a reasonable multiplicative factor.
+ bool SizesSimilar(const ColPartition& other) const;
+
+ // Computes and returns in start, end a line segment formed from a
+ // forwards-iterated group of left edges of partitions that satisfy the
+ // condition that the rightmost left margin is to the left of the
+ // leftmost left bounding box edge.
+ // TODO(rays) Not good enough. Needs improving to tightly wrap text in both
+ // directions, and to loosely wrap images.
+ static void LeftEdgeRun(ColPartition_IT* part_it,
+ ICOORD* start, ICOORD* end);
+ // Computes and returns in start, end a line segment formed from a
+ // backwards-iterated group of right edges of partitions that satisfy the
+ // condition that the leftmost right margin is to the right of the
+ // rightmost right bounding box edge.
+ // TODO(rays) Not good enough. Needs improving to tightly wrap text in both
+ // directions, and to loosely wrap images.
+ static void RightEdgeRun(ColPartition_IT* part_it,
+ ICOORD* start, ICOORD* end);
+
+ // The margins are determined by the position of the nearest vertically
+ // overlapping neighbour to the side. They indicate the maximum extent
+ // that the block/column may be extended without touching something else.
+ // Leftmost coordinate that the region may occupy over the y limits.
+ int left_margin_ = 0;
+ // Rightmost coordinate that the region may occupy over the y limits.
+ int right_margin_ = 0;
+ // Bounding box of all blobs in the partition.
+ TBOX bounding_box_;
+ // Median top and bottom of blobs in this partition.
+ int median_bottom_ = 0;
+ int median_top_ = 0;
+ // Median height of blobs in this partition.
+ int median_height_ = 0;
+ // Median left and right of blobs in this partition.
+ int median_left_ = 0;
+ int median_right_ = 0;
+ // Median width of blobs in this partition.
+ int median_width_ = 0;
+ // blob_region_type_ for the blobs in this partition.
+ BlobRegionType blob_type_ = BRT_UNKNOWN;
+ BlobTextFlowType flow_ = BTFT_NONE; // Quality of text flow.
+ // Total of GoodTextBlob results for all blobs in the partition.
+ int good_blob_score_ = 0;
+ // True if this partition has a common width.
+ bool good_width_ = false;
+ // True if this is a good column candidate.
+ bool good_column_ = false;
+ // True if the left_key_ is from a tab vector.
+ bool left_key_tab_ = false;
+ // True if the right_key_ is from a tab vector.
+ bool right_key_tab_ = false;
+ // Left and right sort keys for the edges of the partition.
+ // If the respective *_key_tab_ is true then this key came from a tab vector.
+ // If not, then the class promises to keep the key equal to the sort key
+ // for the respective edge of the bounding box at the MidY, so that
+ // LeftAtY and RightAtY always returns an x coordinate on the line parallel
+ // to vertical_ through the bounding box edge at MidY.
+ int left_key_ = 0;
+ int right_key_ = 0;
+ // Type of this partition after looking at its relation to the columns.
+ PolyBlockType type_ = PT_UNKNOWN;
+ // The global vertical skew direction.
+ ICOORD vertical_;
+ // All boxes in the partition stored in increasing left edge coordinate.
+ BLOBNBOX_CLIST boxes_;
+ // The partitions above that matched this.
+ ColPartition_CLIST upper_partners_;
+ // The partitions below that matched this.
+ ColPartition_CLIST lower_partners_;
+ // The WorkingPartSet it lives in while blocks are being made.
+ WorkingPartSet* working_set_ = nullptr;
+ // Column_set_ is the column layout applicable to this ColPartition.
+ ColPartitionSet* column_set_ = nullptr;
+ // Flag is true when AddBox is sorting vertically, false otherwise.
+ bool last_add_was_vertical_ = false;
+ // True when the partition's ownership has been taken from the grid and
+ // placed in a working set, or, after that, in the good_parts_ list.
+ bool block_owned_ = false;
+ // Flag to indicate that this partition was subjected to a desperate merge,
+ // and therefore the textlines need rebuilding.
+ bool desperately_merged_ = false;
+ bool owns_blobs_ = true; // Does the partition own its blobs?
+ // The first and last column that this partition applies to.
+ // Flowing partitions (see type_) will have an equal first and last value
+ // of the form 2n + 1, where n is the zero-based index into the partitions
+ // in column_set_. (See ColPartitionSet::GetColumnByIndex).
+ // Heading partitions will have unequal values of the same form.
+ // Pullout partitions will have equal values, but may have even values,
+ // indicating placement between columns.
+ int first_column_ = -1;
+ int last_column_ = -1;
+ // Linespacing data.
+ int side_step_ = 0; // Median y-shift to next blob on same line.
+ int top_spacing_ = 0; // Line spacing from median_top_.
+ int bottom_spacing_ = 0; // Line spacing from median_bottom_.
+
+ // Nearest neighbor above with major x-overlap
+ ColPartition* nearest_neighbor_above_ = nullptr;
+ // Nearest neighbor below with major x-overlap
+ ColPartition* nearest_neighbor_below_ = nullptr;
+ int space_above_ = 0; // Distance from nearest_neighbor_above
+ int space_below_ = 0; // Distance from nearest_neighbor_below
+ int space_to_left_ = 0; // Distance from the left edge of the column
+ int space_to_right_ = 0; // Distance from the right edge of the column
+ // Color foreground/background data.
+ uint8_t color1_[kRGBRMSColors];
+ uint8_t color2_[kRGBRMSColors];
+ // The density of special blobs.
+ float special_blobs_densities_[BSTT_COUNT];
+ // Type of this partition before considering it as a table cell. This is
+ // used to revert the type if a partition is first marked as a table cell but
+ // later filtering steps decide it does not belong to a table
+ PolyBlockType type_before_table_ = PT_UNKNOWN;
+ // Check whether the current partition has been assigned to a table column.
+ bool inside_table_column_ = false;
+};
+
+// Typedef it now in case it becomes a class later.
+using ColPartitionGridSearch = GridSearch<ColPartition,
+ ColPartition_CLIST,
+ ColPartition_C_IT> ;
+
+} // namespace tesseract.
+
+#endif // TESSERACT_TEXTORD_COLPARTITION_H_
diff --git a/tesseract/src/textord/colpartitiongrid.cpp b/tesseract/src/textord/colpartitiongrid.cpp
new file mode 100644
index 00000000..fcf9b000
--- /dev/null
+++ b/tesseract/src/textord/colpartitiongrid.cpp
@@ -0,0 +1,1743 @@
+///////////////////////////////////////////////////////////////////////
+// File: colpartitiongrid.cpp
+// Description: Class collecting code that acts on a BBGrid of ColPartitions.
+// Author: Ray Smith
+//
+// (C) Copyright 2009, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "colpartitiongrid.h"
+#include "colpartitionset.h"
+#include "imagefind.h"
+
+#include <algorithm>
+
+namespace tesseract {
+
+// Max pad factor used to search the neighbourhood of a partition to smooth
+// partition types.
+const int kMaxPadFactor = 6;
+// Max multiple of size (min(height, width)) for the distance of the nearest
+// neighbour for the change of type to be used.
+const int kMaxNeighbourDistFactor = 4;
+// Maximum number of lines in a credible figure caption.
+const int kMaxCaptionLines = 7;
+// Min ratio between biggest and smallest gap to bound a caption.
+const double kMinCaptionGapRatio = 2.0;
+// Min ratio between biggest gap and mean line height to bound a caption.
+const double kMinCaptionGapHeightRatio = 0.5;
+// Min fraction of ColPartition height to be overlapping for margin purposes.
+const double kMarginOverlapFraction = 0.25;
+// Size ratio required to consider an unmerged overlapping partition to be big.
+const double kBigPartSizeRatio = 1.75;
+// Fraction of gridsize to allow arbitrary overlap between partitions.
+const double kTinyEnoughTextlineOverlapFraction = 0.25;
+// Max vertical distance of neighbouring ColPartition as a multiple of
+// partition height for it to be a partner.
+// TODO(rays) fix the problem that causes a larger number to not work well.
+// The value needs to be larger as sparse text blocks in a page that gets
+// marked as single column will not find adjacent lines as partners, and
+// will merge horizontally distant, but aligned lines. See rep.4B3 p5.
+// The value needs to be small because double-spaced legal docs written
+// in a single column, but justified courier have widely spaced lines
+// that need to get merged before they partner-up with the lines above
+// and below. See legal.3B5 p13/17. Neither of these should depend on
+// the value of kMaxPartitionSpacing to be successful, and ColPartition
+// merging needs attention to fix this problem.
+const double kMaxPartitionSpacing = 1.75;
+// Margin by which text has to beat image or vice-versa to make a firm
+// decision in GridSmoothNeighbour.
+const int kSmoothDecisionMargin = 4;
+
+ColPartitionGrid::ColPartitionGrid(int gridsize,
+ const ICOORD& bleft, const ICOORD& tright)
+ : BBGrid<ColPartition, ColPartition_CLIST, ColPartition_C_IT>(gridsize,
+ bleft, tright) {
+}
+
+// Handles a click event in a display window.
+void ColPartitionGrid::HandleClick(int x, int y) {
+ BBGrid<ColPartition,
+ ColPartition_CLIST, ColPartition_C_IT>::HandleClick(x, y);
+ // Run a radial search for partitions that overlap.
+ ColPartitionGridSearch radsearch(this);
+ radsearch.SetUniqueMode(true);
+ radsearch.StartRadSearch(x, y, 1);
+ ColPartition* neighbour;
+ FCOORD click(x, y);
+ while ((neighbour = radsearch.NextRadSearch()) != nullptr) {
+ const TBOX& nbox = neighbour->bounding_box();
+ if (nbox.contains(click)) {
+ tprintf("Block box:");
+ neighbour->bounding_box().print();
+ neighbour->Print();
+ }
+ }
+}
+
+// Merges ColPartitions in the grid that look like they belong in the same
+// textline.
+// For all partitions in the grid, calls the box_cb permanent callback
+// to compute the search box, searches the box, and if a candidate is found,
+// calls the confirm_cb to check any more rules. If the confirm_cb returns
+// true, then the partitions are merged.
+// Both callbacks are deleted before returning.
+void ColPartitionGrid::Merges(
+ std::function<bool(ColPartition*, TBOX*)> box_cb,
+ std::function<bool(const ColPartition*, const ColPartition*)> confirm_cb) {
+ // Iterate the ColPartitions in the grid.
+ ColPartitionGridSearch gsearch(this);
+ gsearch.StartFullSearch();
+ ColPartition* part;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ if (MergePart(box_cb, confirm_cb, part))
+ gsearch.RepositionIterator();
+ }
+}
+
+// For the given partition, calls the box_cb permanent callback
+// to compute the search box, searches the box, and if a candidate is found,
+// calls the confirm_cb to check any more rules. If the confirm_cb returns
+// true, then the partitions are merged.
+// Returns true if the partition is consumed by one or more merges.
+bool ColPartitionGrid::MergePart(
+ std::function<bool(ColPartition*, TBOX*)> box_cb,
+ std::function<bool(const ColPartition*, const ColPartition*)> confirm_cb,
+ ColPartition* part) {
+ if (part->IsUnMergeableType())
+ return false;
+ bool any_done = false;
+ // Repeatedly merge part while we find a best merge candidate that works.
+ bool merge_done = false;
+ do {
+ merge_done = false;
+ TBOX box = part->bounding_box();
+ bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom());
+ if (debug) {
+ tprintf("Merge candidate:");
+ box.print();
+ }
+ // Set up a rectangle search bounded by the part.
+ if (!box_cb(part, &box))
+ continue;
+ // Create a list of merge candidates.
+ ColPartition_CLIST merge_candidates;
+ FindMergeCandidates(part, box, debug, &merge_candidates);
+ // Find the best merge candidate based on minimal overlap increase.
+ int overlap_increase;
+ ColPartition* neighbour = BestMergeCandidate(part, &merge_candidates, debug,
+ confirm_cb,
+ &overlap_increase);
+ if (neighbour != nullptr && overlap_increase <= 0) {
+ if (debug) {
+ tprintf("Merging:hoverlap=%d, voverlap=%d, OLI=%d\n",
+ part->HCoreOverlap(*neighbour), part->VCoreOverlap(*neighbour),
+ overlap_increase);
+ }
+ // Looks like a good candidate so merge it.
+ RemoveBBox(neighbour);
+ // We will modify the box of part, so remove it from the grid, merge
+ // it and then re-insert it into the grid.
+ RemoveBBox(part);
+ part->Absorb(neighbour, nullptr);
+ InsertBBox(true, true, part);
+ merge_done = true;
+ any_done = true;
+ } else if (neighbour != nullptr) {
+ if (debug) {
+ tprintf("Overlapped when merged with increase %d: ", overlap_increase);
+ neighbour->bounding_box().print();
+ }
+ } else if (debug) {
+ tprintf("No candidate neighbour returned\n");
+ }
+ } while (merge_done);
+ return any_done;
+}
+
+// Returns true if the given part and merge candidate might believably
+// be part of a single text line according to the default rules.
+// In general we only want to merge partitions that look like they
+// are on the same text line, ie their median limits overlap, but we have
+// to make exceptions for diacritics and stray punctuation.
+static bool OKMergeCandidate(const ColPartition* part,
+ const ColPartition* candidate,
+ bool debug) {
+ const TBOX& part_box = part->bounding_box();
+ if (candidate == part)
+ return false; // Ignore itself.
+ if (!part->TypesMatch(*candidate) || candidate->IsUnMergeableType())
+ return false; // Don't mix inappropriate types.
+
+ const TBOX& c_box = candidate->bounding_box();
+ if (debug) {
+ tprintf("Examining merge candidate:");
+ c_box.print();
+ }
+ // Candidates must be within a reasonable distance.
+ if (candidate->IsVerticalType() || part->IsVerticalType()) {
+ int h_dist = -part->HCoreOverlap(*candidate);
+ if (h_dist >= std::max(part_box.width(), c_box.width()) / 2) {
+ if (debug)
+ tprintf("Too far away: h_dist = %d\n", h_dist);
+ return false;
+ }
+ } else {
+ // Coarse filter by vertical distance between partitions.
+ int v_dist = -part->VCoreOverlap(*candidate);
+ if (v_dist >= std::max(part_box.height(), c_box.height()) / 2) {
+ if (debug)
+ tprintf("Too far away: v_dist = %d\n", v_dist);
+ return false;
+ }
+ // Candidates must either overlap in median y,
+ // or part or candidate must be an acceptable diacritic.
+ if (!part->VSignificantCoreOverlap(*candidate) &&
+ !part->OKDiacriticMerge(*candidate, debug) &&
+ !candidate->OKDiacriticMerge(*part, debug)) {
+ if (debug)
+ tprintf("Candidate fails overlap and diacritic tests!\n");
+ return false;
+ }
+ }
+ return true;
+}
+
+// Helper function to compute the increase in overlap of the parts list of
+// Colpartitions with the combination of merge1 and merge2, compared to
+// the overlap with them uncombined.
+// An overlap is not counted if passes the OKMergeOverlap test with ok_overlap
+// as the pixel overlap limit. merge1 and merge2 must both be non-nullptr.
+static int IncreaseInOverlap(const ColPartition* merge1,
+ const ColPartition* merge2,
+ int ok_overlap,
+ ColPartition_CLIST* parts) {
+ ASSERT_HOST(merge1 != nullptr && merge2 != nullptr);
+ int total_area = 0;
+ ColPartition_C_IT it(parts);
+ TBOX merged_box(merge1->bounding_box());
+ merged_box += merge2->bounding_box();
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ ColPartition* part = it.data();
+ if (part == merge1 || part == merge2)
+ continue;
+ TBOX part_box = part->bounding_box();
+ // Compute the overlap of the merged box with part.
+ int overlap_area = part_box.intersection(merged_box).area();
+ if (overlap_area > 0 && !part->OKMergeOverlap(*merge1, *merge2,
+ ok_overlap, false)) {
+ total_area += overlap_area;
+ // Subtract the overlap of merge1 and merge2 individually.
+ overlap_area = part_box.intersection(merge1->bounding_box()).area();
+ if (overlap_area > 0)
+ total_area -= overlap_area;
+ TBOX intersection_box = part_box.intersection(merge2->bounding_box());
+ overlap_area = intersection_box.area();
+ if (overlap_area > 0) {
+ total_area -= overlap_area;
+ // Add back the 3-way area.
+ intersection_box &= merge1->bounding_box(); // In-place intersection.
+ overlap_area = intersection_box.area();
+ if (overlap_area > 0)
+ total_area += overlap_area;
+ }
+ }
+ }
+ return total_area;
+}
+
+// Helper function to test that each partition in candidates is either a
+// good diacritic merge with part or an OK merge candidate with all others
+// in the candidates list.
+// ASCII Art Scenario:
+// We sometimes get text such as "join-this" where the - is actually a long
+// dash culled from a standard set of extra characters that don't match the
+// font of the text. This makes its strokewidth not match and forms a broken
+// set of 3 partitions for "join", "-" and "this" and the dash may slightly
+// overlap BOTH words.
+// ------- -------
+// | ==== |
+// ------- -------
+// The standard merge rule: "you can merge 2 partitions as long as there is
+// no increase in overlap elsewhere" fails miserably here. Merge any pair
+// of partitions and the combined box overlaps more with the third than
+// before. To allow the merge, we need to consider whether it is safe to
+// merge everything, without merging separate text lines. For that we need
+// everything to be an OKMergeCandidate (which is supposed to prevent
+// separate text lines merging), but this is hard for diacritics to satisfy,
+// so an alternative to being OKMergeCandidate with everything is to be an
+// OKDiacriticMerge with part as the base character.
+static bool TestCompatibleCandidates(const ColPartition& part, bool debug,
+ ColPartition_CLIST* candidates) {
+ ColPartition_C_IT it(candidates);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ ColPartition* candidate = it.data();
+ if (!candidate->OKDiacriticMerge(part, false)) {
+ ColPartition_C_IT it2(it);
+ for (it2.mark_cycle_pt(); !it2.cycled_list(); it2.forward()) {
+ ColPartition* candidate2 = it2.data();
+ if (candidate2 != candidate &&
+ !OKMergeCandidate(candidate, candidate2, false)) {
+ if (debug) {
+ tprintf("NC overlap failed:Candidate:");
+ candidate2->bounding_box().print();
+ tprintf("fails to be a good merge with:");
+ candidate->bounding_box().print();
+ }
+ return false;
+ }
+ }
+ }
+ }
+ return true;
+}
+
+// Computes and returns the total overlap of all partitions in the grid.
+// If overlap_grid is non-null, it is filled with a grid that holds empty
+// partitions representing the union of all overlapped partitions.
+int ColPartitionGrid::ComputeTotalOverlap(ColPartitionGrid** overlap_grid) {
+ int total_overlap = 0;
+ // Iterate the ColPartitions in the grid.
+ ColPartitionGridSearch gsearch(this);
+ gsearch.StartFullSearch();
+ ColPartition* part;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ ColPartition_CLIST neighbors;
+ const TBOX& part_box = part->bounding_box();
+ FindOverlappingPartitions(part_box, part, &neighbors);
+ ColPartition_C_IT n_it(&neighbors);
+ bool any_part_overlap = false;
+ for (n_it.mark_cycle_pt(); !n_it.cycled_list(); n_it.forward()) {
+ const TBOX& n_box = n_it.data()->bounding_box();
+ int overlap = n_box.intersection(part_box).area();
+ if (overlap > 0 && overlap_grid != nullptr) {
+ if (*overlap_grid == nullptr) {
+ *overlap_grid = new ColPartitionGrid(gridsize(), bleft(), tright());
+ }
+ (*overlap_grid)->InsertBBox(true, true, n_it.data()->ShallowCopy());
+ if (!any_part_overlap) {
+ (*overlap_grid)->InsertBBox(true, true, part->ShallowCopy());
+ }
+ }
+ any_part_overlap = true;
+ total_overlap += overlap;
+ }
+ }
+ return total_overlap;
+}
+
+// Finds all the ColPartitions in the grid that overlap with the given
+// box and returns them SortByBoxLeft(ed) and uniqued in the given list.
+// Any partition equal to not_this (may be nullptr) is excluded.
+void ColPartitionGrid::FindOverlappingPartitions(const TBOX& box,
+ const ColPartition* not_this,
+ ColPartition_CLIST* parts) {
+ ColPartitionGridSearch rsearch(this);
+ rsearch.StartRectSearch(box);
+ ColPartition* part;
+ while ((part = rsearch.NextRectSearch()) != nullptr) {
+ if (part != not_this)
+ parts->add_sorted(SortByBoxLeft<ColPartition>, true, part);
+ }
+}
+
+// Finds and returns the best candidate ColPartition to merge with part,
+// selected from the candidates list, based on the minimum increase in
+// pairwise overlap among all the partitions overlapped by the combined box.
+// If overlap_increase is not nullptr then it returns the increase in overlap
+// that would result from the merge.
+// confirm_cb is a permanent callback that (if non-null) will be used to
+// confirm the validity of a proposed merge candidate before selecting it.
+//
+// ======HOW MERGING WORKS======
+// The problem:
+// We want to merge all the parts of a textline together, but avoid merging
+// separate textlines. Diacritics, i dots, punctuation, and broken characters
+// are examples of small bits that need merging with the main textline.
+// Drop-caps and descenders in one line that touch ascenders in the one below
+// are examples of cases where we don't want to merge.
+//
+// The solution:
+// Merges that increase overlap among other partitions are generally bad.
+// Those that don't increase overlap (much) and minimize the total area
+// seem to be good.
+//
+// Ascii art example:
+// The text:
+// groggy descenders
+// minimum ascenders
+// The boxes: The === represents a small box near or overlapping the lower box.
+// -----------------
+// | |
+// -----------------
+// -===-------------
+// | |
+// -----------------
+// In considering what to do with the small === box, we find the 2 larger
+// boxes as neighbours and possible merge candidates, but merging with the
+// upper box increases overlap with the lower box, whereas merging with the
+// lower box does not increase overlap.
+// If the small === box didn't overlap either to start with, total area
+// would be minimized by merging with the nearer (lower) box.
+//
+// This is a simple example. In reality, we have to allow some increase
+// in overlap, or tightly spaced text would end up in bits.
+ColPartition* ColPartitionGrid::BestMergeCandidate(
+ const ColPartition* part, ColPartition_CLIST* candidates, bool debug,
+ std::function<bool(const ColPartition*, const ColPartition*)> confirm_cb,
+ int* overlap_increase) {
+ if (overlap_increase != nullptr)
+ *overlap_increase = 0;
+ if (candidates->empty())
+ return nullptr;
+ int ok_overlap =
+ static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5);
+ // The best neighbour to merge with is the one that causes least
+ // total pairwise overlap among all the neighbours.
+ // If more than one offers the same total overlap, choose the one
+ // with the least total area.
+ const TBOX& part_box = part->bounding_box();
+ ColPartition_C_IT it(candidates);
+ ColPartition* best_candidate = nullptr;
+ // Find the total combined box of all candidates and the original.
+ TBOX full_box(part_box);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ ColPartition* candidate = it.data();
+ full_box += candidate->bounding_box();
+ }
+ // Keep valid neighbours in a list.
+ ColPartition_CLIST neighbours;
+ // Now run a rect search of the merged box for overlapping neighbours, as
+ // we need anything that might be overlapped by the merged box.
+ FindOverlappingPartitions(full_box, part, &neighbours);
+ if (debug) {
+ tprintf("Finding best merge candidate from %d, %d neighbours for box:",
+ candidates->length(), neighbours.length());
+ part_box.print();
+ }
+ // If the best increase in overlap is positive, then we also check the
+ // worst non-candidate overlap. This catches the case of multiple good
+ // candidates that overlap each other when merged. If the worst
+ // non-candidate overlap is better than the best overlap, then return
+ // the worst non-candidate overlap instead.
+ ColPartition_CLIST non_candidate_neighbours;
+ non_candidate_neighbours.set_subtract(SortByBoxLeft<ColPartition>, true,
+ &neighbours, candidates);
+ int worst_nc_increase = 0;
+ int best_increase = INT32_MAX;
+ int best_area = 0;
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ ColPartition* candidate = it.data();
+ if (confirm_cb != nullptr && !confirm_cb(part, candidate)) {
+ if (debug) {
+ tprintf("Candidate not confirmed:");
+ candidate->bounding_box().print();
+ }
+ continue;
+ }
+ int increase = IncreaseInOverlap(part, candidate, ok_overlap, &neighbours);
+ const TBOX& cand_box = candidate->bounding_box();
+ if (best_candidate == nullptr || increase < best_increase) {
+ best_candidate = candidate;
+ best_increase = increase;
+ best_area = cand_box.bounding_union(part_box).area() - cand_box.area();
+ if (debug) {
+ tprintf("New best merge candidate has increase %d, area %d, over box:",
+ increase, best_area);
+ full_box.print();
+ candidate->Print();
+ }
+ } else if (increase == best_increase) {
+ int area = cand_box.bounding_union(part_box).area() - cand_box.area();
+ if (area < best_area) {
+ best_area = area;
+ best_candidate = candidate;
+ }
+ }
+ increase = IncreaseInOverlap(part, candidate, ok_overlap,
+ &non_candidate_neighbours);
+ if (increase > worst_nc_increase)
+ worst_nc_increase = increase;
+ }
+ if (best_increase > 0) {
+ // If the worst non-candidate increase is less than the best increase
+ // including the candidates, then all the candidates can merge together
+ // and the increase in outside overlap would be less, so use that result,
+ // but only if each candidate is either a good diacritic merge with part,
+ // or an ok merge candidate with all the others.
+ // See TestCompatibleCandidates for more explanation and a picture.
+ if (worst_nc_increase < best_increase &&
+ TestCompatibleCandidates(*part, debug, candidates)) {
+ best_increase = worst_nc_increase;
+ }
+ }
+ if (overlap_increase != nullptr)
+ *overlap_increase = best_increase;
+ return best_candidate;
+}
+
+// Helper to remove the given box from the given partition, put it in its
+// own partition, and add to the partition list.
+static void RemoveBadBox(BLOBNBOX* box, ColPartition* part,
+ ColPartition_LIST* part_list) {
+ part->RemoveBox(box);
+ ColPartition::MakeBigPartition(box, part_list);
+}
+
+
+// Split partitions where it reduces overlap between their bounding boxes.
+// ColPartitions are after all supposed to be a partitioning of the blobs
+// AND of the space on the page!
+// Blobs that cause overlaps get removed, put in individual partitions
+// and added to the big_parts list. They are most likely characters on
+// 2 textlines that touch, or something big like a dropcap.
+void ColPartitionGrid::SplitOverlappingPartitions(
+ ColPartition_LIST* big_parts) {
+ int ok_overlap =
+ static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5);
+ // Iterate the ColPartitions in the grid.
+ ColPartitionGridSearch gsearch(this);
+ gsearch.StartFullSearch();
+ ColPartition* part;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ // Set up a rectangle search bounded by the part.
+ const TBOX& box = part->bounding_box();
+ ColPartitionGridSearch rsearch(this);
+ rsearch.SetUniqueMode(true);
+ rsearch.StartRectSearch(box);
+ int unresolved_overlaps = 0;
+
+ ColPartition* neighbour;
+ while ((neighbour = rsearch.NextRectSearch()) != nullptr) {
+ if (neighbour == part)
+ continue;
+ const TBOX& neighbour_box = neighbour->bounding_box();
+ if (neighbour->OKMergeOverlap(*part, *part, ok_overlap, false) &&
+ part->OKMergeOverlap(*neighbour, *neighbour, ok_overlap, false))
+ continue; // The overlap is OK both ways.
+
+ // If removal of the biggest box from either partition eliminates the
+ // overlap, and it is much bigger than the box left behind, then
+ // it is either a drop-cap, an inter-line join, or some junk that
+ // we don't want anyway, so put it in the big_parts list.
+ if (!part->IsSingleton()) {
+ BLOBNBOX* excluded = part->BiggestBox();
+ TBOX shrunken = part->BoundsWithoutBox(excluded);
+ if (!shrunken.overlap(neighbour_box) &&
+ excluded->bounding_box().height() >
+ kBigPartSizeRatio * shrunken.height()) {
+ // Removing the biggest box fixes the overlap, so do it!
+ gsearch.RemoveBBox();
+ RemoveBadBox(excluded, part, big_parts);
+ InsertBBox(true, true, part);
+ gsearch.RepositionIterator();
+ break;
+ }
+ } else if (box.contains(neighbour_box)) {
+ ++unresolved_overlaps;
+ continue; // No amount of splitting will fix it.
+ }
+ if (!neighbour->IsSingleton()) {
+ BLOBNBOX* excluded = neighbour->BiggestBox();
+ TBOX shrunken = neighbour->BoundsWithoutBox(excluded);
+ if (!shrunken.overlap(box) &&
+ excluded->bounding_box().height() >
+ kBigPartSizeRatio * shrunken.height()) {
+ // Removing the biggest box fixes the overlap, so do it!
+ rsearch.RemoveBBox();
+ RemoveBadBox(excluded, neighbour, big_parts);
+ InsertBBox(true, true, neighbour);
+ gsearch.RepositionIterator();
+ break;
+ }
+ }
+ int part_overlap_count = part->CountOverlappingBoxes(neighbour_box);
+ int neighbour_overlap_count = neighbour->CountOverlappingBoxes(box);
+ ColPartition* right_part = nullptr;
+ if (neighbour_overlap_count <= part_overlap_count ||
+ part->IsSingleton()) {
+ // Try to split the neighbour to reduce overlap.
+ BLOBNBOX* split_blob = neighbour->OverlapSplitBlob(box);
+ if (split_blob != nullptr) {
+ rsearch.RemoveBBox();
+ right_part = neighbour->SplitAtBlob(split_blob);
+ InsertBBox(true, true, neighbour);
+ ASSERT_HOST(right_part != nullptr);
+ }
+ } else {
+ // Try to split part to reduce overlap.
+ BLOBNBOX* split_blob = part->OverlapSplitBlob(neighbour_box);
+ if (split_blob != nullptr) {
+ gsearch.RemoveBBox();
+ right_part = part->SplitAtBlob(split_blob);
+ InsertBBox(true, true, part);
+ ASSERT_HOST(right_part != nullptr);
+ }
+ }
+ if (right_part != nullptr) {
+ InsertBBox(true, true, right_part);
+ gsearch.RepositionIterator();
+ rsearch.RepositionIterator();
+ break;
+ }
+ }
+ if (unresolved_overlaps > 2 && part->IsSingleton()) {
+ // This part is no good so just add to big_parts.
+ RemoveBBox(part);
+ ColPartition_IT big_it(big_parts);
+ part->set_block_owned(true);
+ big_it.add_to_end(part);
+ gsearch.RepositionIterator();
+ }
+ }
+}
+
+// Filters partitions of source_type by looking at local neighbours.
+// Where a majority of neighbours have a text type, the partitions are
+// changed to text, where the neighbours have image type, they are changed
+// to image, and partitions that have no definite neighbourhood type are
+// left unchanged.
+// im_box and rerotation are used to map blob coordinates onto the
+// nontext_map, which is used to prevent the spread of text neighbourhoods
+// into images.
+// Returns true if anything was changed.
+bool ColPartitionGrid::GridSmoothNeighbours(BlobTextFlowType source_type,
+ Pix* nontext_map,
+ const TBOX& im_box,
+ const FCOORD& rotation) {
+ // Iterate the ColPartitions in the grid.
+ ColPartitionGridSearch gsearch(this);
+ gsearch.StartFullSearch();
+ ColPartition* part;
+ bool any_changed = false;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ if (part->flow() != source_type || BLOBNBOX::IsLineType(part->blob_type()))
+ continue;
+ const TBOX& box = part->bounding_box();
+ bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom());
+ if (SmoothRegionType(nontext_map, im_box, rotation, debug, part))
+ any_changed = true;
+ }
+ return any_changed;
+}
+
+// Reflects the grid and its colpartitions in the y-axis, assuming that
+// all blob boxes have already been done.
+void ColPartitionGrid::ReflectInYAxis() {
+ ColPartition_LIST parts;
+ ColPartition_IT part_it(&parts);
+ // Iterate the ColPartitions in the grid to extract them.
+ ColPartitionGridSearch gsearch(this);
+ gsearch.StartFullSearch();
+ ColPartition* part;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ part_it.add_after_then_move(part);
+ }
+ ICOORD bot_left(-tright().x(), bleft().y());
+ ICOORD top_right(-bleft().x(), tright().y());
+ // Reinitializing the grid with reflected coords also clears all the
+ // pointers, so parts will now own the ColPartitions. (Briefly).
+ Init(gridsize(), bot_left, top_right);
+ for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
+ part = part_it.extract();
+ part->ReflectInYAxis();
+ InsertBBox(true, true, part);
+ }
+}
+
+// Transforms the grid of partitions to the output blocks, putting each
+// partition into a separate block. We don't really care about the order,
+// as we just want to get as much text as possible without trying to organize
+// it into proper blocks or columns.
+// TODO(rays) some kind of sort function would be useful and probably better
+// than the default here, which is to sort by order of the grid search.
+void ColPartitionGrid::ExtractPartitionsAsBlocks(BLOCK_LIST* blocks,
+ TO_BLOCK_LIST* to_blocks) {
+ TO_BLOCK_IT to_block_it(to_blocks);
+ BLOCK_IT block_it(blocks);
+ // All partitions will be put on this list and deleted on return.
+ ColPartition_LIST parts;
+ ColPartition_IT part_it(&parts);
+ // Iterate the ColPartitions in the grid to extract them.
+ ColPartitionGridSearch gsearch(this);
+ gsearch.StartFullSearch();
+ ColPartition* part;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ part_it.add_after_then_move(part);
+ // The partition has to be at least vaguely like text.
+ BlobRegionType blob_type = part->blob_type();
+ if (BLOBNBOX::IsTextType(blob_type) ||
+ (blob_type == BRT_UNKNOWN && part->boxes_count() > 1)) {
+ PolyBlockType type = blob_type == BRT_VERT_TEXT ? PT_VERTICAL_TEXT
+ : PT_FLOWING_TEXT;
+ // Get metrics from the row that will be used for the block.
+ TBOX box = part->bounding_box();
+ int median_width = part->median_width();
+ int median_height = part->median_height();
+ // Turn the partition into a TO_ROW.
+ TO_ROW* row = part->MakeToRow();
+ if (row == nullptr) {
+ // This partition is dead.
+ part->DeleteBoxes();
+ continue;
+ }
+ auto* block = new BLOCK("", true, 0, 0, box.left(), box.bottom(),
+ box.right(), box.top());
+ block->pdblk.set_poly_block(new POLY_BLOCK(box, type));
+ auto* to_block = new TO_BLOCK(block);
+ TO_ROW_IT row_it(to_block->get_rows());
+ row_it.add_after_then_move(row);
+ // We haven't differentially rotated vertical and horizontal text at
+ // this point, so use width or height as appropriate.
+ if (blob_type == BRT_VERT_TEXT) {
+ to_block->line_size = static_cast<float>(median_width);
+ to_block->line_spacing = static_cast<float>(box.width());
+ to_block->max_blob_size = static_cast<float>(box.width() + 1);
+ } else {
+ to_block->line_size = static_cast<float>(median_height);
+ to_block->line_spacing = static_cast<float>(box.height());
+ to_block->max_blob_size = static_cast<float>(box.height() + 1);
+ }
+ if (to_block->line_size == 0) to_block->line_size = 1;
+ block_it.add_to_end(block);
+ to_block_it.add_to_end(to_block);
+ } else {
+ // This partition is dead.
+ part->DeleteBoxes();
+ }
+ }
+ Clear();
+ // Now it is safe to delete the ColPartitions as parts goes out of scope.
+}
+
+// Rotates the grid and its colpartitions by the given angle, assuming that
+// all blob boxes have already been done.
+void ColPartitionGrid::Deskew(const FCOORD& deskew) {
+ ColPartition_LIST parts;
+ ColPartition_IT part_it(&parts);
+ // Iterate the ColPartitions in the grid to extract them.
+ ColPartitionGridSearch gsearch(this);
+ gsearch.StartFullSearch();
+ ColPartition* part;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ part_it.add_after_then_move(part);
+ }
+ // Rebuild the grid to the new size.
+ TBOX grid_box(bleft_, tright_);
+ grid_box.rotate_large(deskew);
+ Init(gridsize(), grid_box.botleft(), grid_box.topright());
+ // Reinitializing the grid with rotated coords also clears all the
+ // pointers, so parts will now own the ColPartitions. (Briefly).
+ for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
+ part = part_it.extract();
+ part->ComputeLimits();
+ InsertBBox(true, true, part);
+ }
+}
+
+// Sets the left and right tabs of the partitions in the grid.
+void ColPartitionGrid::SetTabStops(TabFind* tabgrid) {
+ // Iterate the ColPartitions in the grid.
+ ColPartitionGridSearch gsearch(this);
+ gsearch.StartFullSearch();
+ ColPartition* part;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ const TBOX& part_box = part->bounding_box();
+ TabVector* left_line = tabgrid->LeftTabForBox(part_box, true, false);
+ // If the overlapping line is not a left tab, try for non-overlapping.
+ if (left_line != nullptr && !left_line->IsLeftTab())
+ left_line = tabgrid->LeftTabForBox(part_box, false, false);
+ if (left_line != nullptr && left_line->IsLeftTab())
+ part->SetLeftTab(left_line);
+ TabVector* right_line = tabgrid->RightTabForBox(part_box, true, false);
+ if (right_line != nullptr && !right_line->IsRightTab())
+ right_line = tabgrid->RightTabForBox(part_box, false, false);
+ if (right_line != nullptr && right_line->IsRightTab())
+ part->SetRightTab(right_line);
+ part->SetColumnGoodness(tabgrid->WidthCB());
+ }
+}
+
+// Makes the ColPartSets and puts them in the PartSetVector ready
+// for finding column bounds. Returns false if no partitions were found.
+bool ColPartitionGrid::MakeColPartSets(PartSetVector* part_sets) {
+ auto* part_lists = new ColPartition_LIST[gridheight()];
+ part_sets->reserve(gridheight());
+ // Iterate the ColPartitions in the grid to get parts onto lists for the
+ // y bottom of each.
+ ColPartitionGridSearch gsearch(this);
+ gsearch.StartFullSearch();
+ ColPartition* part;
+ bool any_parts_found = false;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ BlobRegionType blob_type = part->blob_type();
+ if (blob_type != BRT_NOISE &&
+ (blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) {
+ int grid_x, grid_y;
+ const TBOX& part_box = part->bounding_box();
+ GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y);
+ ColPartition_IT part_it(&part_lists[grid_y]);
+ part_it.add_to_end(part);
+ any_parts_found = true;
+ }
+ }
+ if (any_parts_found) {
+ for (int grid_y = 0; grid_y < gridheight(); ++grid_y) {
+ ColPartitionSet* line_set = nullptr;
+ if (!part_lists[grid_y].empty()) {
+ line_set = new ColPartitionSet(&part_lists[grid_y]);
+ }
+ part_sets->push_back(line_set);
+ }
+ }
+ delete [] part_lists;
+ return any_parts_found;
+}
+
+// Makes a single ColPartitionSet consisting of a single ColPartition that
+// represents the total horizontal extent of the significant content on the
+// page. Used for the single column setting in place of automatic detection.
+// Returns nullptr if the page is empty of significant content.
+ColPartitionSet* ColPartitionGrid::MakeSingleColumnSet(WidthCallback cb) {
+ ColPartition* single_column_part = nullptr;
+ // Iterate the ColPartitions in the grid to get parts onto lists for the
+ // y bottom of each.
+ ColPartitionGridSearch gsearch(this);
+ gsearch.StartFullSearch();
+ ColPartition* part;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ BlobRegionType blob_type = part->blob_type();
+ if (blob_type != BRT_NOISE &&
+ (blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) {
+ // Consider for single column.
+ BlobTextFlowType flow = part->flow();
+ if ((blob_type == BRT_TEXT &&
+ (flow == BTFT_STRONG_CHAIN || flow == BTFT_CHAIN ||
+ flow == BTFT_LEADER || flow == BTFT_TEXT_ON_IMAGE)) ||
+ blob_type == BRT_RECTIMAGE || blob_type == BRT_POLYIMAGE) {
+ if (single_column_part == nullptr) {
+ single_column_part = part->ShallowCopy();
+ single_column_part->set_blob_type(BRT_TEXT);
+ // Copy the tabs from itself to properly setup the margins.
+ single_column_part->CopyLeftTab(*single_column_part, false);
+ single_column_part->CopyRightTab(*single_column_part, false);
+ } else {
+ if (part->left_key() < single_column_part->left_key())
+ single_column_part->CopyLeftTab(*part, false);
+ if (part->right_key() > single_column_part->right_key())
+ single_column_part->CopyRightTab(*part, false);
+ }
+ }
+ }
+ }
+ if (single_column_part != nullptr) {
+ // Make a ColPartitionSet out of the single_column_part as a candidate
+ // for the single column case.
+ single_column_part->SetColumnGoodness(cb);
+ return new ColPartitionSet(single_column_part);
+ }
+ return nullptr;
+}
+
+// Mark the BLOBNBOXes in each partition as being owned by that partition.
+void ColPartitionGrid::ClaimBoxes() {
+ // Iterate the ColPartitions in the grid.
+ ColPartitionGridSearch gsearch(this);
+ gsearch.StartFullSearch();
+ ColPartition* part;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ part->ClaimBoxes();
+ }
+}
+
+// Retypes all the blobs referenced by the partitions in the grid.
+// Image blobs are found and returned in the im_blobs list, as they are not
+// owned by the block.
+void ColPartitionGrid::ReTypeBlobs(BLOBNBOX_LIST* im_blobs) {
+ BLOBNBOX_IT im_blob_it(im_blobs);
+ ColPartition_LIST dead_parts;
+ ColPartition_IT dead_part_it(&dead_parts);
+ // Iterate the ColPartitions in the grid.
+ ColPartitionGridSearch gsearch(this);
+ gsearch.StartFullSearch();
+ ColPartition* part;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ BlobRegionType blob_type = part->blob_type();
+ BlobTextFlowType flow = part->flow();
+ bool any_blobs_moved = false;
+ if (blob_type == BRT_POLYIMAGE || blob_type == BRT_RECTIMAGE) {
+ BLOBNBOX_C_IT blob_it(part->boxes());
+ for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+ BLOBNBOX* blob = blob_it.data();
+ im_blob_it.add_after_then_move(blob);
+ }
+ } else if (blob_type != BRT_NOISE) {
+ // Make sure the blobs are marked with the correct type and flow.
+ BLOBNBOX_C_IT blob_it(part->boxes());
+ for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+ BLOBNBOX* blob = blob_it.data();
+ if (blob->region_type() == BRT_NOISE) {
+ // TODO(rays) Deprecated. Change this section to an assert to verify
+ // and then delete.
+ ASSERT_HOST(blob->cblob()->area() != 0);
+ blob->set_owner(nullptr);
+ blob_it.extract();
+ any_blobs_moved = true;
+ } else {
+ blob->set_region_type(blob_type);
+ if (blob->flow() != BTFT_LEADER)
+ blob->set_flow(flow);
+ }
+ }
+ }
+ if (blob_type == BRT_NOISE || part->boxes()->empty()) {
+ BLOBNBOX_C_IT blob_it(part->boxes());
+ part->DisownBoxes();
+ dead_part_it.add_to_end(part);
+ gsearch.RemoveBBox();
+ for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+ BLOBNBOX* blob = blob_it.data();
+ if (blob->cblob()->area() == 0) {
+ // Any blob with zero area is a fake image blob and should be deleted.
+ delete blob->cblob();
+ delete blob;
+ }
+ }
+ } else if (any_blobs_moved) {
+ gsearch.RemoveBBox();
+ part->ComputeLimits();
+ InsertBBox(true, true, part);
+ gsearch.RepositionIterator();
+ }
+ }
+}
+
+// The boxes within the partitions have changed (by deskew) so recompute
+// the bounds of all the partitions and reinsert them into the grid.
+void ColPartitionGrid::RecomputeBounds(int gridsize,
+ const ICOORD& bleft,
+ const ICOORD& tright,
+ const ICOORD& vertical) {
+ ColPartition_LIST saved_parts;
+ ColPartition_IT part_it(&saved_parts);
+ // Iterate the ColPartitions in the grid to get parts onto a list.
+ ColPartitionGridSearch gsearch(this);
+ gsearch.StartFullSearch();
+ ColPartition* part;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ part_it.add_to_end(part);
+ }
+ // Reinitialize grid to the new size.
+ Init(gridsize, bleft, tright);
+ // Recompute the bounds of the parts and put them back in the new grid.
+ for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
+ part = part_it.extract();
+ part->set_vertical(vertical);
+ part->ComputeLimits();
+ InsertBBox(true, true, part);
+ }
+}
+
+// Improves the margins of the ColPartitions in the grid by calling
+// FindPartitionMargins on each.
+// best_columns, which may be nullptr, is an array of pointers indicating the
+// column set at each y-coordinate in the grid.
+// best_columns is usually the best_columns_ member of ColumnFinder.
+void ColPartitionGrid::GridFindMargins(ColPartitionSet** best_columns) {
+ // Iterate the ColPartitions in the grid.
+ ColPartitionGridSearch gsearch(this);
+ gsearch.StartFullSearch();
+ ColPartition* part;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ // Set up a rectangle search x-bounded by the column and y by the part.
+ ColPartitionSet* columns = best_columns != nullptr
+ ? best_columns[gsearch.GridY()]
+ : nullptr;
+ FindPartitionMargins(columns, part);
+ const TBOX& box = part->bounding_box();
+ if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom())) {
+ tprintf("Computed margins for part:");
+ part->Print();
+ }
+ }
+}
+
+// Improves the margins of the ColPartitions in the list by calling
+// FindPartitionMargins on each.
+// best_columns, which may be nullptr, is an array of pointers indicating the
+// column set at each y-coordinate in the grid.
+// best_columns is usually the best_columns_ member of ColumnFinder.
+void ColPartitionGrid::ListFindMargins(ColPartitionSet** best_columns,
+ ColPartition_LIST* parts) {
+ ColPartition_IT part_it(parts);
+ for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) {
+ ColPartition* part = part_it.data();
+ ColPartitionSet* columns = nullptr;
+ if (best_columns != nullptr) {
+ const TBOX& part_box = part->bounding_box();
+ // Get the columns from the y grid coord.
+ int grid_x, grid_y;
+ GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y);
+ columns = best_columns[grid_y];
+ }
+ FindPartitionMargins(columns, part);
+ }
+}
+
+// Deletes all the partitions in the grid after disowning all the blobs.
+void ColPartitionGrid::DeleteParts() {
+ ColPartition_LIST dead_parts;
+ ColPartition_IT dead_it(&dead_parts);
+ ColPartitionGridSearch gsearch(this);
+ gsearch.StartFullSearch();
+ ColPartition* part;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ part->DisownBoxes();
+ dead_it.add_to_end(part); // Parts will be deleted on return.
+ }
+ Clear();
+}
+
+// Deletes all the partitions in the grid that are of type BRT_UNKNOWN and
+// all the blobs in them.
+void ColPartitionGrid::DeleteUnknownParts(TO_BLOCK* block) {
+ ColPartitionGridSearch gsearch(this);
+ gsearch.StartFullSearch();
+ ColPartition* part;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ if (part->blob_type() == BRT_UNKNOWN) {
+ gsearch.RemoveBBox();
+ // Once marked, the blobs will be swept up by DeleteUnownedNoise.
+ part->set_flow(BTFT_NONTEXT);
+ part->set_blob_type(BRT_NOISE);
+ part->SetBlobTypes();
+ part->DisownBoxes();
+ delete part;
+ }
+ }
+ block->DeleteUnownedNoise();
+}
+
+// Deletes all the partitions in the grid that are NOT of flow type BTFT_LEADER.
+void ColPartitionGrid::DeleteNonLeaderParts() {
+ ColPartitionGridSearch gsearch(this);
+ gsearch.StartFullSearch();
+ ColPartition* part;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ if (part->flow() != BTFT_LEADER) {
+ gsearch.RemoveBBox();
+ if (part->ReleaseNonLeaderBoxes()) {
+ InsertBBox(true, true, part);
+ gsearch.RepositionIterator();
+ } else {
+ delete part;
+ }
+ }
+ }
+}
+
+// Finds and marks text partitions that represent figure captions.
+void ColPartitionGrid::FindFigureCaptions() {
+ // For each image region find its best candidate text caption region,
+ // if any and mark it as such.
+ ColPartitionGridSearch gsearch(this);
+ gsearch.StartFullSearch();
+ ColPartition* part;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ if (part->IsImageType()) {
+ const TBOX& part_box = part->bounding_box();
+ bool debug = AlignedBlob::WithinTestRegion(2, part_box.left(),
+ part_box.bottom());
+ ColPartition* best_caption = nullptr;
+ int best_dist = 0; // Distance to best_caption.
+ int best_upper = 0; // Direction of best_caption.
+ // Handle both lower and upper directions.
+ for (int upper = 0; upper < 2; ++upper) {
+ ColPartition_C_IT partner_it(upper ? part->upper_partners()
+ : part->lower_partners());
+ // If there are no image partners, then this direction is ok.
+ for (partner_it.mark_cycle_pt(); !partner_it.cycled_list();
+ partner_it.forward()) {
+ ColPartition* partner = partner_it.data();
+ if (partner->IsImageType()) {
+ break;
+ }
+ }
+ if (!partner_it.cycled_list()) continue;
+ // Find the nearest totally overlapping text partner.
+ for (partner_it.mark_cycle_pt(); !partner_it.cycled_list();
+ partner_it.forward()) {
+ ColPartition* partner = partner_it.data();
+ if (!partner->IsTextType() || partner->type() == PT_TABLE) continue;
+ const TBOX& partner_box = partner->bounding_box();
+ if (debug) {
+ tprintf("Finding figure captions for image part:");
+ part_box.print();
+ tprintf("Considering partner:");
+ partner_box.print();
+ }
+ if (partner_box.left() >= part_box.left() &&
+ partner_box.right() <= part_box.right()) {
+ int dist = partner_box.y_gap(part_box);
+ if (best_caption == nullptr || dist < best_dist) {
+ best_dist = dist;
+ best_caption = partner;
+ best_upper = upper;
+ }
+ }
+ }
+ }
+ if (best_caption != nullptr) {
+ if (debug) {
+ tprintf("Best caption candidate:");
+ best_caption->bounding_box().print();
+ }
+ // We have a candidate caption. Qualify it as being separable from
+ // any body text. We are looking for either a small number of lines
+ // or a big gap that indicates a separation from the body text.
+ int line_count = 0;
+ int biggest_gap = 0;
+ int smallest_gap = INT16_MAX;
+ int total_height = 0;
+ int mean_height = 0;
+ ColPartition* end_partner = nullptr;
+ ColPartition* next_partner = nullptr;
+ for (ColPartition* partner = best_caption; partner != nullptr &&
+ line_count <= kMaxCaptionLines;
+ partner = next_partner) {
+ if (!partner->IsTextType()) {
+ end_partner = partner;
+ break;
+ }
+ ++line_count;
+ total_height += partner->bounding_box().height();
+ next_partner = partner->SingletonPartner(best_upper);
+ if (next_partner != nullptr) {
+ int gap = partner->bounding_box().y_gap(
+ next_partner->bounding_box());
+ if (gap > biggest_gap) {
+ biggest_gap = gap;
+ end_partner = next_partner;
+ mean_height = total_height / line_count;
+ } else if (gap < smallest_gap) {
+ smallest_gap = gap;
+ }
+ // If the gap looks big compared to the text size and the smallest
+ // gap seen so far, then we can stop.
+ if (biggest_gap > mean_height * kMinCaptionGapHeightRatio &&
+ biggest_gap > smallest_gap * kMinCaptionGapRatio)
+ break;
+ }
+ }
+ if (debug) {
+ tprintf("Line count=%d, biggest gap %d, smallest%d, mean height %d\n",
+ line_count, biggest_gap, smallest_gap, mean_height);
+ if (end_partner != nullptr) {
+ tprintf("End partner:");
+ end_partner->bounding_box().print();
+ }
+ }
+ if (next_partner == nullptr && line_count <= kMaxCaptionLines)
+ end_partner = nullptr; // No gap, but line count is small.
+ if (line_count <= kMaxCaptionLines) {
+ // This is a qualified caption. Mark the text as caption.
+ for (ColPartition* partner = best_caption; partner != nullptr &&
+ partner != end_partner;
+ partner = next_partner) {
+ partner->set_type(PT_CAPTION_TEXT);
+ partner->SetBlobTypes();
+ if (debug) {
+ tprintf("Set caption type for partition:");
+ partner->bounding_box().print();
+ }
+ next_partner = partner->SingletonPartner(best_upper);
+ }
+ }
+ }
+ }
+ }
+}
+
+//////// Functions that manipulate ColPartitions in the part_grid_ /////
+//////// to find chains of partner partitions of the same type. ///////
+
+// For every ColPartition in the grid, finds its upper and lower neighbours.
+void ColPartitionGrid::FindPartitionPartners() {
+ ColPartitionGridSearch gsearch(this);
+ gsearch.StartFullSearch();
+ ColPartition* part;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ if (part->IsVerticalType()) {
+ FindVPartitionPartners(true, part);
+ FindVPartitionPartners(false, part);
+ } else {
+ FindPartitionPartners(true, part);
+ FindPartitionPartners(false, part);
+ }
+ }
+}
+
+// Finds the best partner in the given direction for the given partition.
+// Stores the result with AddPartner.
+void ColPartitionGrid::FindPartitionPartners(bool upper, ColPartition* part) {
+ if (part->type() == PT_NOISE)
+ return; // Noise is not allowed to partner anything.
+ const TBOX& box = part->bounding_box();
+ int top = part->median_top();
+ int bottom = part->median_bottom();
+ int height = top - bottom;
+ int mid_y = (bottom + top) / 2;
+ ColPartitionGridSearch vsearch(this);
+ // Search down for neighbour below
+ vsearch.StartVerticalSearch(box.left(), box.right(), part->MidY());
+ ColPartition* neighbour;
+ ColPartition* best_neighbour = nullptr;
+ int best_dist = INT32_MAX;
+ while ((neighbour = vsearch.NextVerticalSearch(!upper)) != nullptr) {
+ if (neighbour == part || neighbour->type() == PT_NOISE)
+ continue; // Noise is not allowed to partner anything.
+ int neighbour_bottom = neighbour->median_bottom();
+ int neighbour_top = neighbour->median_top();
+ int neighbour_y = (neighbour_bottom + neighbour_top) / 2;
+ if (upper != (neighbour_y > mid_y))
+ continue;
+ if (!part->HOverlaps(*neighbour) && !part->WithinSameMargins(*neighbour))
+ continue;
+ if (!part->TypesMatch(*neighbour)) {
+ if (best_neighbour == nullptr)
+ best_neighbour = neighbour;
+ continue;
+ }
+ int dist = upper ? neighbour_bottom - top : bottom - neighbour_top;
+ if (dist <= kMaxPartitionSpacing * height) {
+ if (dist < best_dist) {
+ best_dist = dist;
+ best_neighbour = neighbour;
+ }
+ } else {
+ break;
+ }
+ }
+ if (best_neighbour != nullptr)
+ part->AddPartner(upper, best_neighbour);
+}
+
+// Finds the best partner in the given direction for the given partition.
+// Stores the result with AddPartner.
+void ColPartitionGrid::FindVPartitionPartners(bool to_the_left,
+ ColPartition* part) {
+ if (part->type() == PT_NOISE)
+ return; // Noise is not allowed to partner anything.
+ const TBOX& box = part->bounding_box();
+ int left = part->median_left();
+ int right = part->median_right();
+ int width = right >= left ? right - left : -1;
+ int mid_x = (left + right) / 2;
+ ColPartitionGridSearch hsearch(this);
+ // Search left for neighbour to_the_left
+ hsearch.StartSideSearch(mid_x, box.bottom(), box.top());
+ ColPartition* neighbour;
+ ColPartition* best_neighbour = nullptr;
+ int best_dist = INT32_MAX;
+ while ((neighbour = hsearch.NextSideSearch(to_the_left)) != nullptr) {
+ if (neighbour == part || neighbour->type() == PT_NOISE)
+ continue; // Noise is not allowed to partner anything.
+ int neighbour_left = neighbour->median_left();
+ int neighbour_right = neighbour->median_right();
+ int neighbour_x = (neighbour_left + neighbour_right) / 2;
+ if (to_the_left != (neighbour_x < mid_x))
+ continue;
+ if (!part->VOverlaps(*neighbour))
+ continue;
+ if (!part->TypesMatch(*neighbour))
+ continue; // Only match to other vertical text.
+ int dist = to_the_left ? left - neighbour_right : neighbour_left - right;
+ if (dist <= kMaxPartitionSpacing * width) {
+ if (dist < best_dist || best_neighbour == nullptr) {
+ best_dist = dist;
+ best_neighbour = neighbour;
+ }
+ } else {
+ break;
+ }
+ }
+ // For vertical partitions, the upper partner is to the left, and lower is
+ // to the right.
+ if (best_neighbour != nullptr)
+ part->AddPartner(to_the_left, best_neighbour);
+}
+
+// For every ColPartition with multiple partners in the grid, reduces the
+// number of partners to 0 or 1. If get_desperate is true, goes to more
+// desperate merge methods to merge flowing text before breaking partnerships.
+void ColPartitionGrid::RefinePartitionPartners(bool get_desperate) {
+ ColPartitionGridSearch gsearch(this);
+ // Refine in type order so that chasing multiple partners can be done
+ // before eliminating type mis-matching partners.
+ for (int type = PT_UNKNOWN + 1; type <= PT_COUNT; type++) {
+ // Iterate the ColPartitions in the grid.
+ gsearch.StartFullSearch();
+ ColPartition* part;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ part->RefinePartners(static_cast<PolyBlockType>(type),
+ get_desperate, this);
+ // Iterator may have been messed up by a merge.
+ gsearch.RepositionIterator();
+ }
+ }
+}
+
+
+// ========================== PRIVATE CODE ========================
+
+// Finds and returns a list of candidate ColPartitions to merge with part.
+// The candidates must overlap search_box, and when merged must not
+// overlap any other partitions that are not overlapped by each individually.
+void ColPartitionGrid::FindMergeCandidates(const ColPartition* part,
+ const TBOX& search_box, bool debug,
+ ColPartition_CLIST* candidates) {
+ int ok_overlap =
+ static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5);
+ const TBOX& part_box = part->bounding_box();
+ // Now run the rect search.
+ ColPartitionGridSearch rsearch(this);
+ rsearch.SetUniqueMode(true);
+ rsearch.StartRectSearch(search_box);
+ ColPartition* candidate;
+ while ((candidate = rsearch.NextRectSearch()) != nullptr) {
+ if (!OKMergeCandidate(part, candidate, debug))
+ continue;
+ const TBOX& c_box = candidate->bounding_box();
+ // Candidate seems to be a potential merge with part. If one contains
+ // the other, then the merge is a no-brainer. Otherwise, search the
+ // combined box to see if anything else is inappropriately overlapped.
+ if (!part_box.contains(c_box) && !c_box.contains(part_box)) {
+ // Search the combined rectangle to see if anything new is overlapped.
+ // This is a preliminary test designed to quickly weed-out poor
+ // merge candidates that would create a big list of overlapped objects
+ // for the squared-order overlap analysis. Eg. vertical and horizontal
+ // line-like objects that overlap real text when merged:
+ // || ==========================
+ // ||
+ // || r e a l t e x t
+ // ||
+ // ||
+ TBOX merged_box(part_box);
+ merged_box += c_box;
+ ColPartitionGridSearch msearch(this);
+ msearch.SetUniqueMode(true);
+ msearch.StartRectSearch(merged_box);
+ ColPartition* neighbour;
+ while ((neighbour = msearch.NextRectSearch()) != nullptr) {
+ if (neighbour == part || neighbour == candidate)
+ continue; // Ignore itself.
+ if (neighbour->OKMergeOverlap(*part, *candidate, ok_overlap, false))
+ continue; // This kind of merge overlap is OK.
+ TBOX n_box = neighbour->bounding_box();
+ // The overlap is OK if:
+ // * the n_box already overlapped the part or the candidate OR
+ // * the n_box is a suitable merge with either part or candidate
+ if (!n_box.overlap(part_box) && !n_box.overlap(c_box) &&
+ !OKMergeCandidate(part, neighbour, false) &&
+ !OKMergeCandidate(candidate, neighbour, false))
+ break;
+ }
+ if (neighbour != nullptr) {
+ if (debug) {
+ tprintf("Combined box overlaps another that is not OK despite"
+ " allowance of %d:", ok_overlap);
+ neighbour->bounding_box().print();
+ tprintf("Reason:");
+ OKMergeCandidate(part, neighbour, true);
+ tprintf("...and:");
+ OKMergeCandidate(candidate, neighbour, true);
+ tprintf("Overlap:");
+ neighbour->OKMergeOverlap(*part, *candidate, ok_overlap, true);
+ }
+ continue;
+ }
+ }
+ if (debug) {
+ tprintf("Adding candidate:");
+ candidate->bounding_box().print();
+ }
+ // Unique elements as they arrive.
+ candidates->add_sorted(SortByBoxLeft<ColPartition>, true, candidate);
+ }
+}
+
+// Smoothes the region type/flow type of the given part by looking at local
+// neighbours and the given image mask. Searches a padded rectangle with the
+// padding truncated on one size of the part's box in turn for each side,
+// using the result (if any) that has the least distance to all neighbours
+// that contribute to the decision. This biases in favor of rectangular
+// regions without completely enforcing them.
+// If a good decision cannot be reached, the part is left unchanged.
+// im_box and rerotation are used to map blob coordinates onto the
+// nontext_map, which is used to prevent the spread of text neighbourhoods
+// into images.
+// Returns true if the partition was changed.
+bool ColPartitionGrid::SmoothRegionType(Pix* nontext_map,
+ const TBOX& im_box,
+ const FCOORD& rerotation,
+ bool debug,
+ ColPartition* part) {
+ const TBOX& part_box = part->bounding_box();
+ if (debug) {
+ tprintf("Smooothing part at:");
+ part_box.print();
+ }
+ BlobRegionType best_type = BRT_UNKNOWN;
+ int best_dist = INT32_MAX;
+ int max_dist = std::min(part_box.width(), part_box.height());
+ max_dist = std::max(max_dist * kMaxNeighbourDistFactor, gridsize() * 2);
+ // Search with the pad truncated on each side of the box in turn.
+ bool any_image = false;
+ bool all_image = true;
+ for (int d = 0; d < BND_COUNT; ++d) {
+ int dist;
+ auto dir = static_cast<BlobNeighbourDir>(d);
+ BlobRegionType type = SmoothInOneDirection(dir, nontext_map, im_box,
+ rerotation, debug, *part,
+ &dist);
+ if (debug) {
+ tprintf("Result in dir %d = %d at dist %d\n", dir, type, dist);
+ }
+ if (type != BRT_UNKNOWN && dist < best_dist) {
+ best_dist = dist;
+ best_type = type;
+ }
+ if (type == BRT_POLYIMAGE)
+ any_image = true;
+ else
+ all_image = false;
+ }
+ if (best_dist > max_dist)
+ return false; // Too far away to set the type with it.
+ if (part->flow() == BTFT_STRONG_CHAIN && !all_image) {
+ return false; // We are not modifying it.
+ }
+ BlobRegionType new_type = part->blob_type();
+ BlobTextFlowType new_flow = part->flow();
+ if (best_type == BRT_TEXT && !any_image) {
+ new_flow = BTFT_STRONG_CHAIN;
+ new_type = BRT_TEXT;
+ } else if (best_type == BRT_VERT_TEXT && !any_image) {
+ new_flow = BTFT_STRONG_CHAIN;
+ new_type = BRT_VERT_TEXT;
+ } else if (best_type == BRT_POLYIMAGE) {
+ new_flow = BTFT_NONTEXT;
+ new_type = BRT_UNKNOWN;
+ }
+ if (new_type != part->blob_type() || new_flow != part->flow()) {
+ part->set_flow(new_flow);
+ part->set_blob_type(new_type);
+ part->SetBlobTypes();
+ if (debug) {
+ tprintf("Modified part:");
+ part->Print();
+ }
+ return true;
+ } else {
+ return false;
+ }
+}
+
+// Sets up a search box based on the part_box, padded in all directions
+// except direction. Also setup dist_scaling to weight x,y distances according
+// to the given direction.
+static void ComputeSearchBoxAndScaling(BlobNeighbourDir direction,
+ const TBOX& part_box,
+ int min_padding,
+ TBOX* search_box,
+ ICOORD* dist_scaling) {
+ *search_box = part_box;
+ // Generate a pad value based on the min dimension of part_box, but at least
+ // min_padding and then scaled by kMaxPadFactor.
+ int padding = std::min(part_box.height(), part_box.width());
+ padding = std::max(padding, min_padding);
+ padding *= kMaxPadFactor;
+ search_box->pad(padding, padding);
+ // Truncate the box in the appropriate direction and make the distance
+ // metric slightly biased in the truncated direction.
+ switch (direction) {
+ case BND_LEFT:
+ search_box->set_left(part_box.left());
+ *dist_scaling = ICOORD(2, 1);
+ break;
+ case BND_BELOW:
+ search_box->set_bottom(part_box.bottom());
+ *dist_scaling = ICOORD(1, 2);
+ break;
+ case BND_RIGHT:
+ search_box->set_right(part_box.right());
+ *dist_scaling = ICOORD(2, 1);
+ break;
+ case BND_ABOVE:
+ search_box->set_top(part_box.top());
+ *dist_scaling = ICOORD(1, 2);
+ break;
+ default:
+ ASSERT_HOST(false);
+ }
+}
+
+// Local enum used by SmoothInOneDirection and AccumulatePartDistances
+// for the different types of partition neighbour.
+enum NeighbourPartitionType {
+ NPT_HTEXT, // Definite horizontal text.
+ NPT_VTEXT, // Definite vertical text.
+ NPT_WEAK_HTEXT, // Weakly horizontal text. Counts as HTEXT for HTEXT, but
+ // image for image and VTEXT.
+ NPT_WEAK_VTEXT, // Weakly vertical text. Counts as VTEXT for VTEXT, but
+ // image for image and HTEXT.
+ NPT_IMAGE, // Defininte non-text.
+ NPT_COUNT // Number of array elements.
+};
+
+// Executes the search for SmoothRegionType in a single direction.
+// Creates a bounding box that is padded in all directions except direction,
+// and searches it for other partitions. Finds the nearest collection of
+// partitions that makes a decisive result (if any) and returns the type
+// and the distance of the collection. If there are any pixels in the
+// nontext_map, then the decision is biased towards image.
+BlobRegionType ColPartitionGrid::SmoothInOneDirection(
+ BlobNeighbourDir direction, Pix* nontext_map,
+ const TBOX& im_box, const FCOORD& rerotation,
+ bool debug, const ColPartition& part, int* best_distance) {
+ // Set up a rectangle search bounded by the part.
+ const TBOX& part_box = part.bounding_box();
+ TBOX search_box;
+ ICOORD dist_scaling;
+ ComputeSearchBoxAndScaling(direction, part_box, gridsize(),
+ &search_box, &dist_scaling);
+ bool image_region = ImageFind::CountPixelsInRotatedBox(search_box, im_box,
+ rerotation,
+ nontext_map) > 0;
+ GenericVector<int> dists[NPT_COUNT];
+ AccumulatePartDistances(part, dist_scaling, search_box,
+ nontext_map, im_box, rerotation, debug, dists);
+ // By iteratively including the next smallest distance across the vectors,
+ // (as in a merge sort) we can use the vector indices as counts of each type
+ // and find the nearest set of objects that give us a definite decision.
+ int counts[NPT_COUNT];
+ memset(counts, 0, sizeof(counts[0]) * NPT_COUNT);
+ // If there is image in the search box, tip the balance in image's favor.
+ int image_bias = image_region ? kSmoothDecisionMargin / 2 : 0;
+ BlobRegionType text_dir = part.blob_type();
+ BlobTextFlowType flow_type = part.flow();
+ int min_dist = 0;
+ do {
+ // Find the minimum new entry across the vectors
+ min_dist = INT32_MAX;
+ for (int i = 0; i < NPT_COUNT; ++i) {
+ if (counts[i] < dists[i].size() && dists[i][counts[i]] < min_dist)
+ min_dist = dists[i][counts[i]];
+ }
+ // Step all the indices/counts forward to include min_dist.
+ for (int i = 0; i < NPT_COUNT; ++i) {
+ while (counts[i] < dists[i].size() && dists[i][counts[i]] <= min_dist)
+ ++counts[i];
+ }
+ *best_distance = min_dist;
+ if (debug) {
+ tprintf("Totals: htext=%d+%d, vtext=%d+%d, image=%d+%d, at dist=%d\n",
+ counts[NPT_HTEXT], counts[NPT_WEAK_HTEXT],
+ counts[NPT_VTEXT], counts[NPT_WEAK_VTEXT],
+ counts[NPT_IMAGE], image_bias, min_dist);
+ }
+ // See if we have a decision yet.
+ int image_count = counts[NPT_IMAGE];
+ int htext_score = counts[NPT_HTEXT] + counts[NPT_WEAK_HTEXT] -
+ (image_count + counts[NPT_WEAK_VTEXT]);
+ int vtext_score = counts[NPT_VTEXT] + counts[NPT_WEAK_VTEXT] -
+ (image_count + counts[NPT_WEAK_HTEXT]);
+ if (image_count > 0 &&
+ image_bias - htext_score >= kSmoothDecisionMargin &&
+ image_bias - vtext_score >= kSmoothDecisionMargin) {
+ *best_distance = dists[NPT_IMAGE][0];
+ if (!dists[NPT_WEAK_VTEXT].empty() &&
+ *best_distance > dists[NPT_WEAK_VTEXT][0])
+ *best_distance = dists[NPT_WEAK_VTEXT][0];
+ if (!dists[NPT_WEAK_HTEXT].empty() &&
+ *best_distance > dists[NPT_WEAK_HTEXT][0])
+ *best_distance = dists[NPT_WEAK_HTEXT][0];
+ return BRT_POLYIMAGE;
+ }
+ if ((text_dir != BRT_VERT_TEXT || flow_type != BTFT_CHAIN) &&
+ counts[NPT_HTEXT] > 0 && htext_score >= kSmoothDecisionMargin) {
+ *best_distance = dists[NPT_HTEXT][0];
+ return BRT_TEXT;
+ } else if ((text_dir != BRT_TEXT || flow_type != BTFT_CHAIN) &&
+ counts[NPT_VTEXT] > 0 && vtext_score >= kSmoothDecisionMargin) {
+ *best_distance = dists[NPT_VTEXT][0];
+ return BRT_VERT_TEXT;
+ }
+ } while (min_dist < INT32_MAX);
+ return BRT_UNKNOWN;
+}
+
+// Counts the partitions in the given search_box by appending the gap
+// distance (scaled by dist_scaling) of the part from the base_part to the
+// vector of the appropriate type for the partition. Prior to return, the
+// vectors in the dists array are sorted in increasing order.
+// The nontext_map (+im_box, rerotation) is used to make text invisible if
+// there is non-text in between.
+// dists must be an array of GenericVectors of size NPT_COUNT.
+void ColPartitionGrid::AccumulatePartDistances(const ColPartition& base_part,
+ const ICOORD& dist_scaling,
+ const TBOX& search_box,
+ Pix* nontext_map,
+ const TBOX& im_box,
+ const FCOORD& rerotation,
+ bool debug,
+ GenericVector<int>* dists) {
+ const TBOX& part_box = base_part.bounding_box();
+ ColPartitionGridSearch rsearch(this);
+ rsearch.SetUniqueMode(true);
+ rsearch.StartRectSearch(search_box);
+ ColPartition* neighbour;
+ // Search for compatible neighbours with a similar strokewidth, but not
+ // on the other side of a tab vector.
+ while ((neighbour = rsearch.NextRectSearch()) != nullptr) {
+ if (neighbour->IsUnMergeableType() ||
+ !base_part.ConfirmNoTabViolation(*neighbour) ||
+ neighbour == &base_part)
+ continue;
+ TBOX nbox = neighbour->bounding_box();
+ BlobRegionType n_type = neighbour->blob_type();
+ if ((n_type == BRT_TEXT || n_type == BRT_VERT_TEXT) &&
+ !ImageFind::BlankImageInBetween(part_box, nbox, im_box, rerotation,
+ nontext_map))
+ continue; // Text not visible the other side of image.
+ if (BLOBNBOX::IsLineType(n_type))
+ continue; // Don't use horizontal lines as neighbours.
+ int x_gap = std::max(part_box.x_gap(nbox), 0);
+ int y_gap = std::max(part_box.y_gap(nbox), 0);
+ int n_dist = x_gap * dist_scaling.x() + y_gap* dist_scaling.y();
+ if (debug) {
+ tprintf("Part has x-gap=%d, y=%d, dist=%d at:",
+ x_gap, y_gap, n_dist);
+ nbox.print();
+ }
+ // Truncate the number of boxes, so text doesn't get too much advantage.
+ int n_boxes = std::min(neighbour->boxes_count(), kSmoothDecisionMargin);
+ BlobTextFlowType n_flow = neighbour->flow();
+ GenericVector<int>* count_vector = nullptr;
+ if (n_flow == BTFT_STRONG_CHAIN) {
+ if (n_type == BRT_TEXT)
+ count_vector = &dists[NPT_HTEXT];
+ else
+ count_vector = &dists[NPT_VTEXT];
+ if (debug) {
+ tprintf("%s %d\n", n_type == BRT_TEXT ? "Htext" : "Vtext", n_boxes);
+ }
+ } else if ((n_type == BRT_TEXT || n_type == BRT_VERT_TEXT) &&
+ (n_flow == BTFT_CHAIN || n_flow == BTFT_NEIGHBOURS)) {
+ // Medium text counts as weak, and all else counts as image.
+ if (n_type == BRT_TEXT)
+ count_vector = &dists[NPT_WEAK_HTEXT];
+ else
+ count_vector = &dists[NPT_WEAK_VTEXT];
+ if (debug) tprintf("Weak %d\n", n_boxes);
+ } else {
+ count_vector = &dists[NPT_IMAGE];
+ if (debug) tprintf("Image %d\n", n_boxes);
+ }
+ if (count_vector != nullptr) {
+ for (int i = 0; i < n_boxes; ++i)
+ count_vector->push_back(n_dist);
+ }
+ if (debug) {
+ neighbour->Print();
+ }
+ }
+ for (int i = 0; i < NPT_COUNT; ++i)
+ dists[i].sort();
+}
+
+// Improves the margins of the part ColPartition by searching for
+// neighbours that vertically overlap significantly.
+// columns may be nullptr, and indicates the assigned column structure this
+// is applicable to part.
+void ColPartitionGrid::FindPartitionMargins(ColPartitionSet* columns,
+ ColPartition* part) {
+ // Set up a rectangle search x-bounded by the column and y by the part.
+ TBOX box = part->bounding_box();
+ int y = part->MidY();
+ // Initial left margin is based on the column, if there is one.
+ int left_margin = bleft().x();
+ int right_margin = tright().x();
+ if (columns != nullptr) {
+ ColPartition* column = columns->ColumnContaining(box.left(), y);
+ if (column != nullptr)
+ left_margin = column->LeftAtY(y);
+ column = columns->ColumnContaining(box.right(), y);
+ if (column != nullptr)
+ right_margin = column->RightAtY(y);
+ }
+ left_margin -= kColumnWidthFactor;
+ right_margin += kColumnWidthFactor;
+ // Search for ColPartitions that reduce the margin.
+ left_margin = FindMargin(box.left() + box.height(), true, left_margin,
+ box.bottom(), box.top(), part);
+ part->set_left_margin(left_margin);
+ // Search for ColPartitions that reduce the margin.
+ right_margin = FindMargin(box.right() - box.height(), false, right_margin,
+ box.bottom(), box.top(), part);
+ part->set_right_margin(right_margin);
+}
+
+// Starting at x, and going in the specified direction, up to x_limit, finds
+// the margin for the given y range by searching sideways,
+// and ignoring not_this.
+int ColPartitionGrid::FindMargin(int x, bool right_to_left, int x_limit,
+ int y_bottom, int y_top,
+ const ColPartition* not_this) {
+ int height = y_top - y_bottom;
+ // Iterate the ColPartitions in the grid.
+ ColPartitionGridSearch side_search(this);
+ side_search.SetUniqueMode(true);
+ side_search.StartSideSearch(x, y_bottom, y_top);
+ ColPartition* part;
+ while ((part = side_search.NextSideSearch(right_to_left)) != nullptr) {
+ // Ignore itself.
+ if (part == not_this) // || part->IsLineType())
+ continue;
+ // Must overlap by enough, based on the min of the heights, so
+ // large partitions can't smash through small ones.
+ TBOX box = part->bounding_box();
+ int min_overlap = std::min(height, static_cast<int>(box.height()));
+ min_overlap = static_cast<int>(min_overlap * kMarginOverlapFraction + 0.5);
+ int y_overlap = std::min(y_top, static_cast<int>(box.top())) - std::max(y_bottom, static_cast<int>(box.bottom()));
+ if (y_overlap < min_overlap)
+ continue;
+ // Must be going the right way.
+ int x_edge = right_to_left ? box.right() : box.left();
+ if ((x_edge < x) != right_to_left)
+ continue;
+ // If we have gone past x_limit, then x_limit will do.
+ if ((x_edge < x_limit) == right_to_left)
+ break;
+ // It reduces x limit, so save the new one.
+ x_limit = x_edge;
+ }
+ return x_limit;
+}
+
+
+} // namespace tesseract.
diff --git a/tesseract/src/textord/colpartitiongrid.h b/tesseract/src/textord/colpartitiongrid.h
new file mode 100644
index 00000000..85ab7f3d
--- /dev/null
+++ b/tesseract/src/textord/colpartitiongrid.h
@@ -0,0 +1,252 @@
+///////////////////////////////////////////////////////////////////////
+// File: colpartitiongrid.h
+// Description: Class collecting code that acts on a BBGrid of ColPartitions.
+// Author: Ray Smith
+//
+// (C) Copyright 2009, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TEXTORD_COLPARTITIONGRID_H_
+#define TESSERACT_TEXTORD_COLPARTITIONGRID_H_
+
+#include "bbgrid.h"
+#include "colpartition.h"
+#include "colpartitionset.h"
+
+namespace tesseract {
+
+class TabFind;
+
+// ColPartitionGrid is a BBGrid of ColPartition.
+// It collects functions that work on the grid.
+class TESS_API ColPartitionGrid : public BBGrid<ColPartition,
+ ColPartition_CLIST,
+ ColPartition_C_IT> {
+ public:
+ ColPartitionGrid() = default;
+ ColPartitionGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright);
+
+ ~ColPartitionGrid() override = default;
+
+ // Handles a click event in a display window.
+ void HandleClick(int x, int y) override;
+
+ // Merges ColPartitions in the grid that look like they belong in the same
+ // textline.
+ // For all partitions in the grid, calls the box_cb permanent callback
+ // to compute the search box, searches the box, and if a candidate is found,
+ // calls the confirm_cb to check any more rules. If the confirm_cb returns
+ // true, then the partitions are merged.
+ // Both callbacks are deleted before returning.
+ void Merges(std::function<bool(ColPartition*, TBOX*)> box_cb,
+ std::function<bool(const ColPartition*,
+ const ColPartition*)> confirm_cb);
+
+ // For the given partition, calls the box_cb permanent callback
+ // to compute the search box, searches the box, and if a candidate is found,
+ // calls the confirm_cb to check any more rules. If the confirm_cb returns
+ // true, then the partitions are merged.
+ // Returns true if the partition is consumed by one or more merges.
+ bool MergePart(std::function<bool(ColPartition*, TBOX*)> box_cb,
+ std::function<bool(const ColPartition*,
+ const ColPartition*)> confirm_cb,
+ ColPartition* part);
+
+ // Computes and returns the total overlap of all partitions in the grid.
+ // If overlap_grid is non-null, it is filled with a grid that holds empty
+ // partitions representing the union of all overlapped partitions.
+ int ComputeTotalOverlap(ColPartitionGrid** overlap_grid);
+
+ // Finds all the ColPartitions in the grid that overlap with the given
+ // box and returns them SortByBoxLeft(ed) and uniqued in the given list.
+ // Any partition equal to not_this (may be nullptr) is excluded.
+ void FindOverlappingPartitions(const TBOX& box, const ColPartition* not_this,
+ ColPartition_CLIST* parts);
+
+ // Finds and returns the best candidate ColPartition to merge with part,
+ // selected from the candidates list, based on the minimum increase in
+ // pairwise overlap among all the partitions overlapped by the combined box.
+ // If overlap_increase is not nullptr then it returns the increase in overlap
+ // that would result from the merge.
+ // See colpartitiongrid.cpp for a diagram.
+ ColPartition* BestMergeCandidate(
+ const ColPartition* part, ColPartition_CLIST* candidates, bool debug,
+ std::function<bool(const ColPartition*,
+ const ColPartition*)> confirm_cb,
+ int* overlap_increase);
+
+ // Split partitions where it reduces overlap between their bounding boxes.
+ // ColPartitions are after all supposed to be a partitioning of the blobs
+ // AND of the space on the page!
+ // Blobs that cause overlaps get removed, put in individual partitions
+ // and added to the big_parts list. They are most likely characters on
+ // 2 textlines that touch, or something big like a dropcap.
+ void SplitOverlappingPartitions(ColPartition_LIST* big_parts);
+
+ // Filters partitions of source_type by looking at local neighbours.
+ // Where a majority of neighbours have a text type, the partitions are
+ // changed to text, where the neighbours have image type, they are changed
+ // to image, and partitions that have no definite neighbourhood type are
+ // left unchanged.
+ // im_box and rerotation are used to map blob coordinates onto the
+ // nontext_map, which is used to prevent the spread of text neighbourhoods
+ // into images.
+ // Returns true if anything was changed.
+ bool GridSmoothNeighbours(BlobTextFlowType source_type, Pix* nontext_map,
+ const TBOX& im_box, const FCOORD& rerotation);
+
+ // Reflects the grid and its colpartitions in the y-axis, assuming that
+ // all blob boxes have already been done.
+ void ReflectInYAxis();
+
+ // Rotates the grid and its colpartitions by the given angle, assuming that
+ // all blob boxes have already been done.
+ void Deskew(const FCOORD& deskew);
+
+ // Transforms the grid of partitions to the output blocks, putting each
+ // partition into a separate block. We don't really care about the order,
+ // as we just want to get as much text as possible without trying to organize
+ // it into proper blocks or columns.
+ void ExtractPartitionsAsBlocks(BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks);
+
+ // Sets the left and right tabs of the partitions in the grid.
+ void SetTabStops(TabFind* tabgrid);
+
+ // Makes the ColPartSets and puts them in the PartSetVector ready
+ // for finding column bounds. Returns false if no partitions were found.
+ // Each ColPartition in the grid is placed in a single ColPartSet based
+ // on the bottom-left of its bounding box.
+ bool MakeColPartSets(PartSetVector* part_sets);
+
+ // Makes a single ColPartitionSet consisting of a single ColPartition that
+ // represents the total horizontal extent of the significant content on the
+ // page. Used for the single column setting in place of automatic detection.
+ // Returns nullptr if the page is empty of significant content.
+ ColPartitionSet* MakeSingleColumnSet(WidthCallback cb);
+
+ // Mark the BLOBNBOXes in each partition as being owned by that partition.
+ void ClaimBoxes();
+
+ // Retypes all the blobs referenced by the partitions in the grid.
+ // Image blobs are sliced on the grid boundaries to give the tab finder
+ // a better handle on the edges of the images, and the actual blobs are
+ // returned in the im_blobs list, as they are not owned by the block.
+ void ReTypeBlobs(BLOBNBOX_LIST* im_blobs);
+
+ // The boxes within the partitions have changed (by deskew) so recompute
+ // the bounds of all the partitions and reinsert them into the grid.
+ void RecomputeBounds(int gridsize, const ICOORD& bleft,
+ const ICOORD& tright, const ICOORD& vertical);
+
+ // Improves the margins of the ColPartitions in the grid by calling
+ // FindPartitionMargins on each.
+ void GridFindMargins(ColPartitionSet** best_columns);
+
+ // Improves the margins of the ColPartitions in the list by calling
+ // FindPartitionMargins on each.
+ void ListFindMargins(ColPartitionSet** best_columns,
+ ColPartition_LIST* parts);
+
+ // Deletes all the partitions in the grid after disowning all the blobs.
+ void DeleteParts();
+
+ // Deletes all the partitions in the grid that are of type BRT_UNKNOWN and
+ // all the blobs in them.
+ void DeleteUnknownParts(TO_BLOCK* block);
+
+ // Deletes all the partitions in the grid that are NOT of flow type
+ // BTFT_LEADER.
+ void DeleteNonLeaderParts();
+
+ // Finds and marks text partitions that represent figure captions.
+ void FindFigureCaptions();
+
+ //////// Functions that manipulate ColPartitions in the grid ///////
+ //////// to find chains of partner partitions of the same type. ///////
+ // For every ColPartition in the grid, finds its upper and lower neighbours.
+ void FindPartitionPartners();
+ // Finds the best partner in the given direction for the given partition.
+ // Stores the result with AddPartner.
+ void FindPartitionPartners(bool upper, ColPartition* part);
+ // Finds the best partner in the given direction for the given partition.
+ // Stores the result with AddPartner.
+ void FindVPartitionPartners(bool to_the_left, ColPartition* part);
+ // For every ColPartition with multiple partners in the grid, reduces the
+ // number of partners to 0 or 1. If get_desperate is true, goes to more
+ // desperate merge methods to merge flowing text before breaking partnerships.
+ void RefinePartitionPartners(bool get_desperate);
+
+ private:
+ // Finds and returns a list of candidate ColPartitions to merge with part.
+ // The candidates must overlap search_box, and when merged must not
+ // overlap any other partitions that are not overlapped by each individually.
+ void FindMergeCandidates(const ColPartition* part, const TBOX& search_box,
+ bool debug, ColPartition_CLIST* candidates);
+
+ // Smoothes the region type/flow type of the given part by looking at local
+ // neighbours and the given image mask. Searches a padded rectangle with the
+ // padding truncated on one size of the part's box in turn for each side,
+ // using the result (if any) that has the least distance to all neighbours
+ // that contribute to the decision. This biases in favor of rectangular
+ // regions without completely enforcing them.
+ // If a good decision cannot be reached, the part is left unchanged.
+ // im_box and rerotation are used to map blob coordinates onto the
+ // nontext_map, which is used to prevent the spread of text neighbourhoods
+ // into images.
+ // Returns true if the partition was changed.
+ bool SmoothRegionType(Pix* nontext_map,
+ const TBOX& im_box,
+ const FCOORD& rerotation,
+ bool debug,
+ ColPartition* part);
+ // Executes the search for SmoothRegionType in a single direction.
+ // Creates a bounding box that is padded in all directions except direction,
+ // and searches it for other partitions. Finds the nearest collection of
+ // partitions that makes a decisive result (if any) and returns the type
+ // and the distance of the collection. If there are any pixels in the
+ // nontext_map, then the decision is biased towards image.
+ BlobRegionType SmoothInOneDirection(BlobNeighbourDir direction,
+ Pix* nontext_map,
+ const TBOX& im_box,
+ const FCOORD& rerotation,
+ bool debug,
+ const ColPartition& part,
+ int* best_distance);
+ // Counts the partitions in the given search_box by appending the gap
+ // distance (scaled by dist_scaling) of the part from the base_part to the
+ // vector of the appropriate type for the partition. Prior to return, the
+ // vectors in the dists array are sorted in increasing order.
+ // dists must be an array of GenericVectors of size NPT_COUNT.
+ void AccumulatePartDistances(const ColPartition& base_part,
+ const ICOORD& dist_scaling,
+ const TBOX& search_box,
+ Pix* nontext_map,
+ const TBOX& im_box,
+ const FCOORD& rerotation,
+ bool debug,
+ GenericVector<int>* dists);
+
+ // Improves the margins of the ColPartition by searching for
+ // neighbours that vertically overlap significantly.
+ void FindPartitionMargins(ColPartitionSet* columns, ColPartition* part);
+
+ // Starting at x, and going in the specified direction, up to x_limit, finds
+ // the margin for the given y range by searching sideways,
+ // and ignoring not_this.
+ int FindMargin(int x, bool right_to_left, int x_limit,
+ int y_bottom, int y_top, const ColPartition* not_this);
+};
+
+} // namespace tesseract.
+
+#endif // TESSERACT_TEXTORD_COLPARTITIONGRID_H_
diff --git a/tesseract/src/textord/colpartitionset.cpp b/tesseract/src/textord/colpartitionset.cpp
new file mode 100644
index 00000000..c53235e6
--- /dev/null
+++ b/tesseract/src/textord/colpartitionset.cpp
@@ -0,0 +1,667 @@
+///////////////////////////////////////////////////////////////////////
+// File: colpartitionset.cpp
+// Description: Class to hold a list of ColPartitions of the page that
+// correspond roughly to columns.
+// Author: Ray Smith
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "colpartitionset.h"
+#include "workingpartset.h"
+#include "tablefind.h"
+
+namespace tesseract {
+
+// Minimum width of a column to be interesting as a multiple of resolution.
+const double kMinColumnWidth = 2.0 / 3;
+
+ELISTIZE(ColPartitionSet)
+
+ColPartitionSet::ColPartitionSet(ColPartition_LIST* partitions) {
+ ColPartition_IT it(&parts_);
+ it.add_list_after(partitions);
+ ComputeCoverage();
+}
+
+ColPartitionSet::ColPartitionSet(ColPartition* part) {
+ ColPartition_IT it(&parts_);
+ it.add_after_then_move(part);
+ ComputeCoverage();
+}
+
+// Returns the number of columns of good width.
+int ColPartitionSet::GoodColumnCount() const {
+ int num_good_cols = 0;
+ // This is a read-only iteration of the list.
+ ColPartition_IT it(const_cast<ColPartition_LIST*>(&parts_));
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ if (it.data()->good_width()) ++num_good_cols;
+ }
+ return num_good_cols;
+}
+
+// Return an element of the parts_ list from its index.
+ColPartition* ColPartitionSet::GetColumnByIndex(int index) {
+ ColPartition_IT it(&parts_);
+ it.mark_cycle_pt();
+ for (int i = 0; i < index && !it.cycled_list(); ++i, it.forward());
+ if (it.cycled_list())
+ return nullptr;
+ return it.data();
+}
+
+// Return the ColPartition that contains the given coords, if any, else nullptr.
+ColPartition* ColPartitionSet::ColumnContaining(int x, int y) {
+ ColPartition_IT it(&parts_);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ ColPartition* part = it.data();
+ if (part->ColumnContains(x, y))
+ return part;
+ }
+ return nullptr;
+}
+
+// Extract all the parts from the list, relinquishing ownership.
+void ColPartitionSet::RelinquishParts() {
+ ColPartition_IT it(&parts_);
+ while (!it.empty()) {
+ it.extract();
+ it.forward();
+ }
+}
+
+// Attempt to improve this by adding partitions or expanding partitions.
+void ColPartitionSet::ImproveColumnCandidate(WidthCallback cb,
+ PartSetVector* src_sets) {
+ int set_size = src_sets->size();
+ // Iterate over the provided column sets, as each one may have something
+ // to improve this.
+ for (int i = 0; i < set_size; ++i) {
+ ColPartitionSet* column_set = src_sets->get(i);
+ if (column_set == nullptr)
+ continue;
+ // Iterate over the parts in this and column_set, adding bigger or
+ // new parts in column_set to this.
+ ColPartition_IT part_it(&parts_);
+ ASSERT_HOST(!part_it.empty());
+ int prev_right = INT32_MIN;
+ part_it.mark_cycle_pt();
+ ColPartition_IT col_it(&column_set->parts_);
+ for (col_it.mark_cycle_pt(); !col_it.cycled_list(); col_it.forward()) {
+ ColPartition* col_part = col_it.data();
+ if (col_part->blob_type() < BRT_UNKNOWN)
+ continue; // Ignore image partitions.
+ int col_left = col_part->left_key();
+ int col_right = col_part->right_key();
+ // Sync-up part_it (in this) so it matches the col_part in column_set.
+ ColPartition* part = part_it.data();
+ while (!part_it.at_last() && part->right_key() < col_left) {
+ prev_right = part->right_key();
+ part_it.forward();
+ part = part_it.data();
+ }
+ int part_left = part->left_key();
+ int part_right = part->right_key();
+ if (part_right < col_left || col_right < part_left) {
+ // There is no overlap so this is a new partition.
+ AddPartition(col_part->ShallowCopy(), &part_it);
+ continue;
+ }
+ // Check the edges of col_part to see if they can improve part.
+ bool part_width_ok = cb(part->KeyWidth(part_left, part_right));
+ if (col_left < part_left && col_left > prev_right) {
+ // The left edge of the column is better and it doesn't overlap,
+ // so we can potentially expand it.
+ int col_box_left = col_part->BoxLeftKey();
+ bool tab_width_ok = cb(part->KeyWidth(col_left, part_right));
+ bool box_width_ok = cb(part->KeyWidth(col_box_left, part_right));
+ if (tab_width_ok || (!part_width_ok)) {
+ // The tab is leaving the good column metric at least as good as
+ // it was before, so use the tab.
+ part->CopyLeftTab(*col_part, false);
+ part->SetColumnGoodness(cb);
+ } else if (col_box_left < part_left &&
+ (box_width_ok || !part_width_ok)) {
+ // The box is leaving the good column metric at least as good as
+ // it was before, so use the box.
+ part->CopyLeftTab(*col_part, true);
+ part->SetColumnGoodness(cb);
+ }
+ part_left = part->left_key();
+ }
+ if (col_right > part_right &&
+ (part_it.at_last() ||
+ part_it.data_relative(1)->left_key() > col_right)) {
+ // The right edge is better, so we can possibly expand it.
+ int col_box_right = col_part->BoxRightKey();
+ bool tab_width_ok = cb(part->KeyWidth(part_left, col_right));
+ bool box_width_ok = cb(part->KeyWidth(part_left, col_box_right));
+ if (tab_width_ok || (!part_width_ok)) {
+ // The tab is leaving the good column metric at least as good as
+ // it was before, so use the tab.
+ part->CopyRightTab(*col_part, false);
+ part->SetColumnGoodness(cb);
+ } else if (col_box_right > part_right &&
+ (box_width_ok || !part_width_ok)) {
+ // The box is leaving the good column metric at least as good as
+ // it was before, so use the box.
+ part->CopyRightTab(*col_part, true);
+ part->SetColumnGoodness(cb);
+ }
+ }
+ }
+ }
+ ComputeCoverage();
+}
+
+// If this set is good enough to represent a new partitioning into columns,
+// add it to the vector of sets, otherwise delete it.
+void ColPartitionSet::AddToColumnSetsIfUnique(PartSetVector* column_sets,
+ WidthCallback cb) {
+ bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(),
+ bounding_box_.bottom());
+ if (debug) {
+ tprintf("Considering new column candidate:\n");
+ Print();
+ }
+ if (!LegalColumnCandidate()) {
+ if (debug) {
+ tprintf("Not a legal column candidate:\n");
+ Print();
+ }
+ delete this;
+ return;
+ }
+ for (int i = 0; i < column_sets->size(); ++i) {
+ ColPartitionSet* columns = column_sets->get(i);
+ // In ordering the column set candidates, good_coverage_ is king,
+ // followed by good_column_count_ and then bad_coverage_.
+ bool better = good_coverage_ > columns->good_coverage_;
+ if (good_coverage_ == columns->good_coverage_) {
+ better = good_column_count_ > columns->good_column_count_;
+ if (good_column_count_ == columns->good_column_count_) {
+ better = bad_coverage_ > columns->bad_coverage_;
+ }
+ }
+ if (better) {
+ // The new one is better so add it.
+ if (debug)
+ tprintf("Good one\n");
+ column_sets->insert(this, i);
+ return;
+ }
+ if (columns->CompatibleColumns(false, this, cb)) {
+ if (debug)
+ tprintf("Duplicate\n");
+ delete this;
+ return; // It is not unique.
+ }
+ }
+ if (debug)
+ tprintf("Added to end\n");
+ column_sets->push_back(this);
+}
+
+// Return true if the partitions in other are all compatible with the columns
+// in this.
+bool ColPartitionSet::CompatibleColumns(bool debug, ColPartitionSet* other,
+ WidthCallback cb) {
+ if (debug) {
+ tprintf("CompatibleColumns testing compatibility\n");
+ Print();
+ other->Print();
+ }
+ if (other->parts_.empty()) {
+ if (debug)
+ tprintf("CompatibleColumns true due to empty other\n");
+ return true;
+ }
+ ColPartition_IT it(&other->parts_);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ ColPartition* part = it.data();
+ if (part->blob_type() < BRT_UNKNOWN) {
+ if (debug) {
+ tprintf("CompatibleColumns ignoring image partition\n");
+ part->Print();
+ }
+ continue; // Image partitions are irrelevant to column compatibility.
+ }
+ int y = part->MidY();
+ int left = part->bounding_box().left();
+ int right = part->bounding_box().right();
+ ColPartition* left_col = ColumnContaining(left, y);
+ ColPartition* right_col = ColumnContaining(right, y);
+ if (right_col == nullptr || left_col == nullptr) {
+ if (debug) {
+ tprintf("CompatibleColumns false due to partition edge outside\n");
+ part->Print();
+ }
+ return false; // A partition edge lies outside of all columns
+ }
+ if (right_col != left_col && cb(right - left)) {
+ if (debug) {
+ tprintf("CompatibleColumns false due to good width in multiple cols\n");
+ part->Print();
+ }
+ return false; // Partition with a good width must be in a single column.
+ }
+
+ ColPartition_IT it2= it;
+ while (!it2.at_last()) {
+ it2.forward();
+ ColPartition* next_part = it2.data();
+ if (!BLOBNBOX::IsTextType(next_part->blob_type()))
+ continue; // Non-text partitions are irrelevant.
+ int next_left = next_part->bounding_box().left();
+ if (next_left == right) {
+ break; // They share the same edge, so one must be a pull-out.
+ }
+ // Search to see if right and next_left fall within a single column.
+ ColPartition* next_left_col = ColumnContaining(next_left, y);
+ if (right_col == next_left_col) {
+ // There is a column break in this column.
+ // This can be due to a figure caption within a column, a pull-out
+ // block, or a simple broken textline that remains to be merged:
+ // all allowed, or a change in column layout: not allowed.
+ // If both partitions are of good width, then it is likely
+ // a change in column layout, otherwise probably an allowed situation.
+ if (part->good_width() && next_part->good_width()) {
+ if (debug) {
+ int next_right = next_part->bounding_box().right();
+ tprintf("CompatibleColumns false due to 2 parts of good width\n");
+ tprintf("part1 %d-%d, part2 %d-%d\n",
+ left, right, next_left, next_right);
+ right_col->Print();
+ }
+ return false;
+ }
+ }
+ break;
+ }
+ }
+ if (debug)
+ tprintf("CompatibleColumns true!\n");
+ return true;
+}
+
+// Returns the total width of all blobs in the part_set that do not lie
+// within an approved column. Used as a cost measure for using this
+// column set over another that might be compatible.
+int ColPartitionSet::UnmatchedWidth(ColPartitionSet* part_set) {
+ int total_width = 0;
+ ColPartition_IT it(&part_set->parts_);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ ColPartition* part = it.data();
+ if (!BLOBNBOX::IsTextType(part->blob_type())) {
+ continue; // Non-text partitions are irrelevant to column compatibility.
+ }
+ int y = part->MidY();
+ BLOBNBOX_C_IT box_it(part->boxes());
+ for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) {
+ const TBOX& box = it.data()->bounding_box();
+ // Assume that the whole blob is outside any column iff its x-middle
+ // is outside.
+ int x = (box.left() + box.right()) / 2;
+ ColPartition* col = ColumnContaining(x, y);
+ if (col == nullptr)
+ total_width += box.width();
+ }
+ }
+ return total_width;
+}
+
+// Return true if this ColPartitionSet makes a legal column candidate by
+// having legal individual partitions and non-overlapping adjacent pairs.
+bool ColPartitionSet::LegalColumnCandidate() {
+ ColPartition_IT it(&parts_);
+ if (it.empty())
+ return false;
+ bool any_text_parts = false;
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ ColPartition* part = it.data();
+ if (BLOBNBOX::IsTextType(part->blob_type())) {
+ if (!part->IsLegal())
+ return false; // Individual partition is illegal.
+ any_text_parts = true;
+ }
+ if (!it.at_last()) {
+ ColPartition* next_part = it.data_relative(1);
+ if (next_part->left_key() < part->right_key()) {
+ return false;
+ }
+ }
+ }
+ return any_text_parts;
+}
+
+// Return a copy of this. If good_only will only copy the Good ColPartitions.
+ColPartitionSet* ColPartitionSet::Copy(bool good_only) {
+ ColPartition_LIST copy_parts;
+ ColPartition_IT src_it(&parts_);
+ ColPartition_IT dest_it(&copy_parts);
+ for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
+ ColPartition* part = src_it.data();
+ if (BLOBNBOX::IsTextType(part->blob_type()) &&
+ (!good_only || part->good_width() || part->good_column()))
+ dest_it.add_after_then_move(part->ShallowCopy());
+ }
+ if (dest_it.empty())
+ return nullptr;
+ return new ColPartitionSet(&copy_parts);
+}
+
+// Return the bounding boxes of columns at the given y-range
+void ColPartitionSet::GetColumnBoxes(int y_bottom, int y_top,
+ ColSegment_LIST *segments) {
+ ColPartition_IT it(&parts_);
+ ColSegment_IT col_it(segments);
+ col_it.move_to_last();
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ ColPartition* part = it.data();
+ ICOORD bot_left(part->LeftAtY(y_top), y_bottom);
+ ICOORD top_right(part->RightAtY(y_bottom), y_top);
+ auto *col_seg = new ColSegment();
+ col_seg->InsertBox(TBOX(bot_left, top_right));
+ col_it.add_after_then_move(col_seg);
+ }
+}
+
+#ifndef GRAPHICS_DISABLED
+
+// Display the edges of the columns at the given y coords.
+void ColPartitionSet::DisplayColumnEdges(int y_bottom, int y_top,
+ ScrollView* win) {
+ ColPartition_IT it(&parts_);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ ColPartition* part = it.data();
+ win->Line(part->LeftAtY(y_top), y_top, part->LeftAtY(y_bottom), y_bottom);
+ win->Line(part->RightAtY(y_top), y_top, part->RightAtY(y_bottom), y_bottom);
+ }
+}
+
+#endif // !GRAPHICS_DISABLED
+
+// Return the ColumnSpanningType that best explains the columns overlapped
+// by the given coords(left,right,y), with the given margins.
+// Also return the first and last column index touched by the coords and
+// the leftmost spanned column.
+// Column indices are 2n + 1 for real columns (0 based) and even values
+// represent the gaps in between columns, with 0 being left of the leftmost.
+// resolution refers to the ppi resolution of the image.
+ColumnSpanningType ColPartitionSet::SpanningType(int resolution,
+ int left, int right,
+ int height, int y,
+ int left_margin,
+ int right_margin,
+ int* first_col,
+ int* last_col,
+ int* first_spanned_col) {
+ *first_col = -1;
+ *last_col = -1;
+ *first_spanned_col = -1;
+ int margin_columns = 0;
+ ColPartition_IT it(&parts_);
+ int col_index = 1;
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), col_index += 2) {
+ ColPartition* part = it.data();
+ if (part->ColumnContains(left, y) ||
+ (it.at_first() && part->ColumnContains(left + height, y))) {
+ // In the default case, first_col is set, but columns_spanned remains
+ // zero, so first_col will get reset in the first column genuinely
+ // spanned, but we can tell the difference from a noise partition
+ // that touches no column.
+ *first_col = col_index;
+ if (part->ColumnContains(right, y) ||
+ (it.at_last() && part->ColumnContains(right - height, y))) {
+ // Both within a single column.
+ *last_col = col_index;
+ return CST_FLOWING;
+ }
+ if (left_margin <= part->LeftAtY(y)) {
+ // It completely spans this column.
+ *first_spanned_col = col_index;
+ margin_columns = 1;
+ }
+ } else if (part->ColumnContains(right, y) ||
+ (it.at_last() && part->ColumnContains(right - height, y))) {
+ if (*first_col < 0) {
+ // It started in-between.
+ *first_col = col_index - 1;
+ }
+ if (right_margin >= part->RightAtY(y)) {
+ // It completely spans this column.
+ if (margin_columns == 0)
+ *first_spanned_col = col_index;
+ ++margin_columns;
+ }
+ *last_col = col_index;
+ break;
+ } else if (left < part->LeftAtY(y) && right > part->RightAtY(y)) {
+ // Neither left nor right are contained within, so it spans this
+ // column.
+ if (*first_col < 0) {
+ // It started in between the previous column and the current column.
+ *first_col = col_index - 1;
+ }
+ if (margin_columns == 0)
+ *first_spanned_col = col_index;
+ *last_col = col_index;
+ } else if (right < part->LeftAtY(y)) {
+ // We have gone past the end.
+ *last_col = col_index - 1;
+ if (*first_col < 0) {
+ // It must lie completely between columns =>noise.
+ *first_col = col_index - 1;
+ }
+ break;
+ }
+ }
+ if (*first_col < 0)
+ *first_col = col_index - 1; // The last in-between.
+ if (*last_col < 0)
+ *last_col = col_index - 1; // The last in-between.
+ ASSERT_HOST(*first_col >= 0 && *last_col >= 0);
+ ASSERT_HOST(*first_col <= *last_col);
+ if (*first_col == *last_col && right - left < kMinColumnWidth * resolution) {
+ // Neither end was in a column, and it didn't span any, so it lies
+ // entirely between columns, therefore noise.
+ return CST_NOISE;
+ } else if (margin_columns <= 1) {
+ // An exception for headings that stick outside of single-column text.
+ if (margin_columns == 1 && parts_.singleton()) {
+ return CST_HEADING;
+ }
+ // It is a pullout, as left and right were not in the same column, but
+ // it doesn't go to the edge of its start and end.
+ return CST_PULLOUT;
+ }
+ // Its margins went to the edges of first and last columns => heading.
+ return CST_HEADING;
+}
+
+// The column_set has changed. Close down all in-progress WorkingPartSets in
+// columns that do not match and start new ones for the new columns in this.
+// As ColPartitions are turned into BLOCKs, the used ones are put in
+// used_parts, as they still need to be referenced in the grid.
+void ColPartitionSet::ChangeWorkColumns(const ICOORD& bleft,
+ const ICOORD& tright,
+ int resolution,
+ ColPartition_LIST* used_parts,
+ WorkingPartSet_LIST* working_set_list) {
+ // Move the input list to a temporary location so we can delete its elements
+ // as we add them to the output working_set.
+ WorkingPartSet_LIST work_src;
+ WorkingPartSet_IT src_it(&work_src);
+ src_it.add_list_after(working_set_list);
+ src_it.move_to_first();
+ WorkingPartSet_IT dest_it(working_set_list);
+ // Completed blocks and to_blocks are accumulated and given to the first new
+ // one whenever we keep a column, or at the end.
+ BLOCK_LIST completed_blocks;
+ TO_BLOCK_LIST to_blocks;
+ WorkingPartSet* first_new_set = nullptr;
+ WorkingPartSet* working_set = nullptr;
+ ColPartition_IT col_it(&parts_);
+ for (col_it.mark_cycle_pt(); !col_it.cycled_list(); col_it.forward()) {
+ ColPartition* column = col_it.data();
+ // Any existing column to the left of column is completed.
+ while (!src_it.empty() &&
+ ((working_set = src_it.data())->column() == nullptr ||
+ working_set->column()->right_key() <= column->left_key())) {
+ src_it.extract();
+ working_set->ExtractCompletedBlocks(bleft, tright, resolution,
+ used_parts, &completed_blocks,
+ &to_blocks);
+ delete working_set;
+ src_it.forward();
+ }
+ // Make a new between-column WorkingSet for before the current column.
+ working_set = new WorkingPartSet(nullptr);
+ dest_it.add_after_then_move(working_set);
+ if (first_new_set == nullptr)
+ first_new_set = working_set;
+ // A matching column gets to stay, and first_new_set gets all the
+ // completed_sets.
+ working_set = src_it.empty() ? nullptr : src_it.data();
+ if (working_set != nullptr &&
+ working_set->column()->MatchingColumns(*column)) {
+ working_set->set_column(column);
+ dest_it.add_after_then_move(src_it.extract());
+ src_it.forward();
+ first_new_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
+ first_new_set = nullptr;
+ } else {
+ // Just make a new working set for the current column.
+ working_set = new WorkingPartSet(column);
+ dest_it.add_after_then_move(working_set);
+ }
+ }
+ // Complete any remaining src working sets.
+ while (!src_it.empty()) {
+ working_set = src_it.extract();
+ working_set->ExtractCompletedBlocks(bleft, tright, resolution,
+ used_parts, &completed_blocks,
+ &to_blocks);
+ delete working_set;
+ src_it.forward();
+ }
+ // Make a new between-column WorkingSet for after the last column.
+ working_set = new WorkingPartSet(nullptr);
+ dest_it.add_after_then_move(working_set);
+ if (first_new_set == nullptr)
+ first_new_set = working_set;
+ // The first_new_set now gets any accumulated completed_parts/blocks.
+ first_new_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
+}
+
+// Accumulate the widths and gaps into the given variables.
+void ColPartitionSet::AccumulateColumnWidthsAndGaps(int* total_width,
+ int* width_samples,
+ int* total_gap,
+ int* gap_samples) {
+ ColPartition_IT it(&parts_);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ ColPartition* part = it.data();
+ *total_width += part->ColumnWidth();
+ ++*width_samples;
+ if (!it.at_last()) {
+ ColPartition* next_part = it.data_relative(1);
+ int part_left = part->right_key();
+ int part_right = next_part->left_key();
+ int gap = part->KeyWidth(part_left, part_right);
+ *total_gap += gap;
+ ++*gap_samples;
+ }
+ }
+}
+
+// Provide debug output for this ColPartitionSet and all the ColPartitions.
+void ColPartitionSet::Print() {
+ ColPartition_IT it(&parts_);
+ tprintf("Partition set of %d parts, %d good, coverage=%d+%d"
+ " (%d,%d)->(%d,%d)\n",
+ it.length(), good_column_count_, good_coverage_, bad_coverage_,
+ bounding_box_.left(), bounding_box_.bottom(),
+ bounding_box_.right(), bounding_box_.top());
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ ColPartition* part = it.data();
+ part->Print();
+ }
+}
+
+// PRIVATE CODE.
+
+// Add the given partition to the list in the appropriate place.
+void ColPartitionSet::AddPartition(ColPartition* new_part,
+ ColPartition_IT* it) {
+ AddPartitionCoverageAndBox(*new_part);
+ int new_right = new_part->right_key();
+ if (it->data()->left_key() >= new_right)
+ it->add_before_stay_put(new_part);
+ else
+ it->add_after_stay_put(new_part);
+}
+
+// Compute the coverage and good column count. Coverage is the amount of the
+// width of the page (in pixels) that is covered by ColPartitions, which are
+// used to provide candidate column layouts.
+// Coverage is split into good and bad. Good coverage is provided by
+// ColPartitions of a frequent width (according to the callback function
+// provided by TabFinder::WidthCB, which accesses stored statistics on the
+// widths of ColPartitions) and bad coverage is provided by all other
+// ColPartitions, even if they have tab vectors at both sides. Thus:
+// |-----------------------------------------------------------------|
+// | Double width heading |
+// |-----------------------------------------------------------------|
+// |-------------------------------| |-------------------------------|
+// | Common width ColParition | | Common width ColPartition |
+// |-------------------------------| |-------------------------------|
+// the layout with two common-width columns has better coverage than the
+// double width heading, because the coverage is "good," even though less in
+// total coverage than the heading, because the heading coverage is "bad."
+void ColPartitionSet::ComputeCoverage() {
+ // Count the number of good columns and sum their width.
+ ColPartition_IT it(&parts_);
+ good_column_count_ = 0;
+ good_coverage_ = 0;
+ bad_coverage_ = 0;
+ bounding_box_ = TBOX();
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ ColPartition* part = it.data();
+ AddPartitionCoverageAndBox(*part);
+ }
+}
+
+// Adds the coverage, column count and box for a single partition,
+// without adding it to the list. (Helper factored from ComputeCoverage.)
+void ColPartitionSet::AddPartitionCoverageAndBox(const ColPartition& part) {
+ bounding_box_ += part.bounding_box();
+ int coverage = part.ColumnWidth();
+ if (part.good_width()) {
+ good_coverage_ += coverage;
+ good_column_count_ += 2;
+ } else {
+ if (part.blob_type() < BRT_UNKNOWN)
+ coverage /= 2;
+ if (part.good_column())
+ ++good_column_count_;
+ bad_coverage_ += coverage;
+ }
+}
+
+} // namespace tesseract.
diff --git a/tesseract/src/textord/colpartitionset.h b/tesseract/src/textord/colpartitionset.h
new file mode 100644
index 00000000..57b61b34
--- /dev/null
+++ b/tesseract/src/textord/colpartitionset.h
@@ -0,0 +1,171 @@
+///////////////////////////////////////////////////////////////////////
+// File: colpartitionset.h
+// Description: Class to hold a list of ColPartitions of the page that
+// correspond roughly to columns.
+// Author: Ray Smith
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TEXTORD_COLPARTITIONSET_H_
+#define TESSERACT_TEXTORD_COLPARTITIONSET_H_
+
+#include "colpartition.h" // For ColPartition_LIST.
+#include "genericvector.h" // For GenericVector.
+#include "rect.h" // For TBOX.
+#include "tabvector.h" // For BLOBNBOX_CLIST.
+
+namespace tesseract {
+
+class WorkingPartSet_LIST;
+class ColSegment_LIST;
+class ColPartitionSet;
+using PartSetVector = GenericVector<ColPartitionSet*>;
+
+// ColPartitionSet is a class that holds a list of ColPartitions.
+// Its main use is in holding a candidate partitioning of the width of the
+// image into columns, where each member ColPartition is a single column.
+// ColPartitionSets are used in building the column layout of a page.
+class ColPartitionSet : public ELIST_LINK {
+ public:
+ ColPartitionSet() = default;
+ explicit ColPartitionSet(ColPartition_LIST* partitions);
+ explicit ColPartitionSet(ColPartition* partition);
+
+ ~ColPartitionSet() = default;
+
+ // Simple accessors.
+ const TBOX& bounding_box() const {
+ return bounding_box_;
+ }
+ bool Empty() const {
+ return parts_.empty();
+ }
+ int ColumnCount() const {
+ return parts_.length();
+ }
+
+ // Returns the number of columns of good width.
+ int GoodColumnCount() const;
+
+ // Return an element of the parts_ list from its index.
+ ColPartition* GetColumnByIndex(int index);
+
+ // Return the ColPartition that contains the given coords, if any, else nullptr.
+ ColPartition* ColumnContaining(int x, int y);
+
+ // Return the bounding boxes of columns at the given y-range
+ void GetColumnBoxes(int y_bottom, int y_top, ColSegment_LIST *segments);
+
+ // Extract all the parts from the list, relinquishing ownership.
+ void RelinquishParts();
+
+ // Attempt to improve this by adding partitions or expanding partitions.
+ void ImproveColumnCandidate(WidthCallback cb, PartSetVector* src_sets);
+
+ // If this set is good enough to represent a new partitioning into columns,
+ // add it to the vector of sets, otherwise delete it.
+ void AddToColumnSetsIfUnique(PartSetVector* column_sets, WidthCallback cb);
+
+ // Return true if the partitions in other are all compatible with the columns
+ // in this.
+ bool CompatibleColumns(bool debug, ColPartitionSet* other, WidthCallback cb);
+
+ // Returns the total width of all blobs in the part_set that do not lie
+ // within an approved column. Used as a cost measure for using this
+ // column set over another that might be compatible.
+ int UnmatchedWidth(ColPartitionSet* part_set);
+
+ // Return true if this ColPartitionSet makes a legal column candidate by
+ // having legal individual partitions and non-overlapping adjacent pairs.
+ bool LegalColumnCandidate();
+
+ // Return a copy of this. If good_only will only copy the Good ColPartitions.
+ ColPartitionSet* Copy(bool good_only);
+
+ // Display the edges of the columns at the given y coords.
+ void DisplayColumnEdges(int y_bottom, int y_top, ScrollView* win);
+
+ // Return the ColumnSpanningType that best explains the columns overlapped
+ // by the given coords(left,right,y), with the given margins.
+ // Also return the first and last column index touched by the coords and
+ // the leftmost spanned column.
+ // Column indices are 2n + 1 for real columns (0 based) and even values
+ // represent the gaps in between columns, with 0 being left of the leftmost.
+ // resolution refers to the ppi resolution of the image. It may be 0 if only
+ // the first_col and last_col are required.
+ ColumnSpanningType SpanningType(int resolution,
+ int left, int right, int height, int y,
+ int left_margin, int right_margin,
+ int* first_col, int* last_col,
+ int* first_spanned_col);
+
+ // The column_set has changed. Close down all in-progress WorkingPartSets in
+ // columns that do not match and start new ones for the new columns in this.
+ // As ColPartitions are turned into BLOCKs, the used ones are put in
+ // used_parts, as they still need to be referenced in the grid.
+ void ChangeWorkColumns(const ICOORD& bleft, const ICOORD& tright,
+ int resolution, ColPartition_LIST* used_parts,
+ WorkingPartSet_LIST* working_set);
+
+ // Accumulate the widths and gaps into the given variables.
+ void AccumulateColumnWidthsAndGaps(int* total_width, int* width_samples,
+ int* total_gap, int* gap_samples);
+
+ // Provide debug output for this ColPartitionSet and all the ColPartitions.
+ void Print();
+
+ private:
+ // Add the given partition to the list in the appropriate place.
+ void AddPartition(ColPartition* new_part, ColPartition_IT* it);
+
+ // Compute the coverage and good column count. Coverage is the amount of the
+ // width of the page (in pixels) that is covered by ColPartitions, which are
+ // used to provide candidate column layouts.
+ // Coverage is split into good and bad. Good coverage is provided by
+ // ColPartitions of a frequent width (according to the callback function
+ // provided by TabFinder::WidthCB, which accesses stored statistics on the
+ // widths of ColPartitions) and bad coverage is provided by all other
+ // ColPartitions, even if they have tab vectors at both sides. Thus:
+ // |-----------------------------------------------------------------|
+ // | Double width heading |
+ // |-----------------------------------------------------------------|
+ // |-------------------------------| |-------------------------------|
+ // | Common width ColParition | | Common width ColPartition |
+ // |-------------------------------| |-------------------------------|
+ // the layout with two common-width columns has better coverage than the
+ // double width heading, because the coverage is "good," even though less in
+ // total coverage than the heading, because the heading coverage is "bad."
+ void ComputeCoverage();
+
+ // Adds the coverage, column count and box for a single partition,
+ // without adding it to the list. (Helper factored from ComputeCoverage.)
+ void AddPartitionCoverageAndBox(const ColPartition& part);
+
+ // The partitions in this column candidate.
+ ColPartition_LIST parts_;
+ // The number of partitions that have a frequent column width.
+ int good_column_count_;
+ // Total width of all the good ColPartitions.
+ int good_coverage_;
+ // Total width of all the bad ColPartitions.
+ int bad_coverage_;
+ // Bounding box of all partitions in the set.
+ TBOX bounding_box_;
+};
+
+ELISTIZEH(ColPartitionSet)
+
+} // namespace tesseract.
+
+#endif // TESSERACT_TEXTORD_COLPARTITION_H_
diff --git a/tesseract/src/textord/devanagari_processing.cpp b/tesseract/src/textord/devanagari_processing.cpp
new file mode 100644
index 00000000..2ea0d942
--- /dev/null
+++ b/tesseract/src/textord/devanagari_processing.cpp
@@ -0,0 +1,502 @@
+/**********************************************************************
+ * File: devanagari_processing.cpp
+ * Description: Methods to process images containing devanagari symbols,
+ * prior to classification.
+ * Author: Shobhit Saxena
+ * Created: Mon Nov 17 20:26:01 IST 2008
+ *
+ * (C) Copyright 2008, Google Inc.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "devanagari_processing.h"
+
+#include "debugpixa.h"
+#include "statistc.h"
+#include "tordmain.h"
+
+#include "allheaders.h"
+
+namespace tesseract {
+
+// Flags controlling the debugging information for shiro-rekha splitting
+// strategies.
+INT_VAR(devanagari_split_debuglevel, 0,
+ "Debug level for split shiro-rekha process.");
+
+BOOL_VAR(devanagari_split_debugimage, 0,
+ "Whether to create a debug image for split shiro-rekha process.");
+
+ShiroRekhaSplitter::ShiroRekhaSplitter() {
+ orig_pix_ = nullptr;
+ segmentation_block_list_ = nullptr;
+ splitted_image_ = nullptr;
+ global_xheight_ = kUnspecifiedXheight;
+ perform_close_ = false;
+ debug_image_ = nullptr;
+ pageseg_split_strategy_ = NO_SPLIT;
+ ocr_split_strategy_ = NO_SPLIT;
+}
+
+ShiroRekhaSplitter::~ShiroRekhaSplitter() {
+ Clear();
+}
+
+void ShiroRekhaSplitter::Clear() {
+ pixDestroy(&orig_pix_);
+ pixDestroy(&splitted_image_);
+ pageseg_split_strategy_ = NO_SPLIT;
+ ocr_split_strategy_ = NO_SPLIT;
+ pixDestroy(&debug_image_);
+ segmentation_block_list_ = nullptr;
+ global_xheight_ = kUnspecifiedXheight;
+ perform_close_ = false;
+}
+
+// On setting the input image, a clone of it is owned by this class.
+void ShiroRekhaSplitter::set_orig_pix(Pix* pix) {
+ if (orig_pix_) {
+ pixDestroy(&orig_pix_);
+ }
+ orig_pix_ = pixClone(pix);
+}
+
+// Top-level method to perform splitting based on current settings.
+// Returns true if a split was actually performed.
+// split_for_pageseg should be true if the splitting is being done prior to
+// page segmentation. This mode uses the flag
+// pageseg_devanagari_split_strategy to determine the splitting strategy.
+bool ShiroRekhaSplitter::Split(bool split_for_pageseg, DebugPixa* pixa_debug) {
+ SplitStrategy split_strategy = split_for_pageseg ? pageseg_split_strategy_ :
+ ocr_split_strategy_;
+ if (split_strategy == NO_SPLIT) {
+ return false; // Nothing to do.
+ }
+ ASSERT_HOST(split_strategy == MINIMAL_SPLIT ||
+ split_strategy == MAXIMAL_SPLIT);
+ ASSERT_HOST(orig_pix_);
+ if (devanagari_split_debuglevel > 0) {
+ tprintf("Splitting shiro-rekha ...\n");
+ tprintf("Split strategy = %s\n",
+ split_strategy == MINIMAL_SPLIT ? "Minimal" : "Maximal");
+ tprintf("Initial pageseg available = %s\n",
+ segmentation_block_list_ ? "yes" : "no");
+ }
+ // Create a copy of original image to store the splitting output.
+ pixDestroy(&splitted_image_);
+ splitted_image_ = pixCopy(nullptr, orig_pix_);
+
+ // Initialize debug image if required.
+ if (devanagari_split_debugimage) {
+ pixDestroy(&debug_image_);
+ debug_image_ = pixConvertTo32(orig_pix_);
+ }
+
+ // Determine all connected components in the input image. A close operation
+ // may be required prior to this, depending on the current settings.
+ Pix* pix_for_ccs = pixClone(orig_pix_);
+ if (perform_close_ && global_xheight_ != kUnspecifiedXheight &&
+ !segmentation_block_list_) {
+ if (devanagari_split_debuglevel > 0) {
+ tprintf("Performing a global close operation..\n");
+ }
+ // A global measure is available for xheight, but no local information
+ // exists.
+ pixDestroy(&pix_for_ccs);
+ pix_for_ccs = pixCopy(nullptr, orig_pix_);
+ PerformClose(pix_for_ccs, global_xheight_);
+ }
+ Pixa* ccs;
+ Boxa* tmp_boxa = pixConnComp(pix_for_ccs, &ccs, 8);
+ boxaDestroy(&tmp_boxa);
+ pixDestroy(&pix_for_ccs);
+
+ // Iterate over all connected components. Get their bounding boxes and clip
+ // out the image regions corresponding to these boxes from the original image.
+ // Conditionally run splitting on each of them.
+ Boxa* regions_to_clear = boxaCreate(0);
+ int num_ccs = 0;
+ if (ccs != nullptr) num_ccs = pixaGetCount(ccs);
+ for (int i = 0; i < num_ccs; ++i) {
+ Box* box = ccs->boxa->box[i];
+ Pix* word_pix = pixClipRectangle(orig_pix_, box, nullptr);
+ ASSERT_HOST(word_pix);
+ int xheight = GetXheightForCC(box);
+ if (xheight == kUnspecifiedXheight && segmentation_block_list_ &&
+ devanagari_split_debugimage) {
+ pixRenderBoxArb(debug_image_, box, 1, 255, 0, 0);
+ }
+ // If some xheight measure is available, attempt to pre-eliminate small
+ // blobs from the shiro-rekha process. This is primarily to save the CCs
+ // corresponding to punctuation marks/small dots etc which are part of
+ // larger graphemes.
+ if (xheight == kUnspecifiedXheight ||
+ (box->w > xheight / 3 && box->h > xheight / 2)) {
+ SplitWordShiroRekha(split_strategy, word_pix, xheight,
+ box->x, box->y, regions_to_clear);
+ } else if (devanagari_split_debuglevel > 0) {
+ tprintf("CC dropped from splitting: %d,%d (%d, %d)\n",
+ box->x, box->y, box->w, box->h);
+ }
+ pixDestroy(&word_pix);
+ }
+ // Actually clear the boxes now.
+ for (int i = 0; i < boxaGetCount(regions_to_clear); ++i) {
+ Box* box = boxaGetBox(regions_to_clear, i, L_CLONE);
+ pixClearInRect(splitted_image_, box);
+ boxDestroy(&box);
+ }
+ boxaDestroy(&regions_to_clear);
+ pixaDestroy(&ccs);
+ if (devanagari_split_debugimage && pixa_debug != nullptr) {
+ pixa_debug->AddPix(debug_image_,
+ split_for_pageseg ? "pageseg_split" : "ocr_split");
+ }
+ return true;
+}
+
+// Method to perform a close operation on the input image. The xheight
+// estimate decides the size of sel used.
+void ShiroRekhaSplitter::PerformClose(Pix* pix, int xheight_estimate) {
+ pixCloseBrick(pix, pix, xheight_estimate / 8, xheight_estimate / 3);
+}
+
+// This method resolves the cc bbox to a particular row and returns the row's
+// xheight.
+int ShiroRekhaSplitter::GetXheightForCC(Box* cc_bbox) {
+ if (!segmentation_block_list_) {
+ return global_xheight_;
+ }
+ // Compute the box coordinates in Tesseract's coordinate system.
+ TBOX bbox(cc_bbox->x,
+ pixGetHeight(orig_pix_) - cc_bbox->y - cc_bbox->h - 1,
+ cc_bbox->x + cc_bbox->w,
+ pixGetHeight(orig_pix_) - cc_bbox->y - 1);
+ // Iterate over all blocks.
+ BLOCK_IT block_it(segmentation_block_list_);
+ for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
+ BLOCK* block = block_it.data();
+ // Iterate over all rows in the block.
+ ROW_IT row_it(block->row_list());
+ for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+ ROW* row = row_it.data();
+ if (!row->bounding_box().major_overlap(bbox)) {
+ continue;
+ }
+ // Row could be skewed, warped, etc. Use the position of the box to
+ // determine the baseline position of the row for that x-coordinate.
+ // Create a square TBOX whose baseline's mid-point lies at this point
+ // and side is row's xheight. Take the overlap of this box with the input
+ // box and check if it is a 'major overlap'. If so, this box lies in this
+ // row. In that case, return the xheight for this row.
+ float box_middle = 0.5 * (bbox.left() + bbox.right());
+ int baseline = static_cast<int>(row->base_line(box_middle) + 0.5);
+ TBOX test_box(box_middle - row->x_height() / 2,
+ baseline,
+ box_middle + row->x_height() / 2,
+ static_cast<int>(baseline + row->x_height()));
+ // Compute overlap. If it is is a major overlap, this is the right row.
+ if (bbox.major_overlap(test_box)) {
+ return row->x_height();
+ }
+ }
+ }
+ // No row found for this bbox.
+ return kUnspecifiedXheight;
+}
+
+// Returns a list of regions (boxes) which should be cleared in the original
+// image so as to perform shiro-rekha splitting. Pix is assumed to carry one
+// (or less) word only. Xheight measure could be the global estimate, the row
+// estimate, or unspecified. If unspecified, over splitting may occur, since a
+// conservative estimate of stroke width along with an associated multiplier
+// is used in its place. It is advisable to have a specified xheight when
+// splitting for classification/training.
+// A vertical projection histogram of all the on-pixels in the input pix is
+// computed. The maxima of this histogram is regarded as an approximate location
+// of the shiro-rekha. By descending on the maxima's peak on both sides,
+// stroke width of shiro-rekha is estimated.
+// A horizontal projection histogram is computed for a sub-image of the input
+// image, which extends from just below the shiro-rekha down to a certain
+// leeway. The leeway depends on the input xheight, if provided, else a
+// conservative multiplier on approximate stroke width is used (which may lead
+// to over-splitting).
+void ShiroRekhaSplitter::SplitWordShiroRekha(SplitStrategy split_strategy,
+ Pix* pix,
+ int xheight,
+ int word_left,
+ int word_top,
+ Boxa* regions_to_clear) {
+ if (split_strategy == NO_SPLIT) {
+ return;
+ }
+ int width = pixGetWidth(pix);
+ int height = pixGetHeight(pix);
+ // Statistically determine the yextents of the shiro-rekha.
+ int shirorekha_top, shirorekha_bottom, shirorekha_ylevel;
+ GetShiroRekhaYExtents(pix, &shirorekha_top, &shirorekha_bottom,
+ &shirorekha_ylevel);
+ // Since the shiro rekha is also a stroke, its width is equal to the stroke
+ // width.
+ int stroke_width = shirorekha_bottom - shirorekha_top + 1;
+
+ // Some safeguards to protect CCs we do not want to be split.
+ // These are particularly useful when the word wasn't eliminated earlier
+ // because xheight information was unavailable.
+ if (shirorekha_ylevel > height / 2) {
+ // Shirorekha shouldn't be in the bottom half of the word.
+ if (devanagari_split_debuglevel > 0) {
+ tprintf("Skipping splitting CC at (%d, %d): shirorekha in lower half..\n",
+ word_left, word_top);
+ }
+ return;
+ }
+ if (stroke_width > height / 3) {
+ // Even the boldest of fonts shouldn't do this.
+ if (devanagari_split_debuglevel > 0) {
+ tprintf("Skipping splitting CC at (%d, %d): stroke width too huge..\n",
+ word_left, word_top);
+ }
+ return;
+ }
+
+ // Clear the ascender and descender regions of the word.
+ // Obtain a vertical projection histogram for the resulting image.
+ Box* box_to_clear = boxCreate(0, shirorekha_top - stroke_width / 3,
+ width, 5 * stroke_width / 3);
+ Pix* word_in_xheight = pixCopy(nullptr, pix);
+ pixClearInRect(word_in_xheight, box_to_clear);
+ // Also clear any pixels which are below shirorekha_bottom + some leeway.
+ // The leeway is set to xheight if the information is available, else it is a
+ // multiplier applied to the stroke width.
+ int leeway_to_keep = stroke_width * 3;
+ if (xheight != kUnspecifiedXheight) {
+ // This is because the xheight-region typically includes the shiro-rekha
+ // inside it, i.e., the top of the xheight range corresponds to the top of
+ // shiro-rekha.
+ leeway_to_keep = xheight - stroke_width;
+ }
+ box_to_clear->y = shirorekha_bottom + leeway_to_keep;
+ box_to_clear->h = height - box_to_clear->y;
+ pixClearInRect(word_in_xheight, box_to_clear);
+ boxDestroy(&box_to_clear);
+
+ PixelHistogram vert_hist;
+ vert_hist.ConstructVerticalCountHist(word_in_xheight);
+ pixDestroy(&word_in_xheight);
+
+ // If the number of black pixel in any column of the image is less than a
+ // fraction of the stroke width, treat it as noise / a stray mark. Perform
+ // these changes inside the vert_hist data itself, as that is used later on as
+ // a bit vector for the final split decision at every column.
+ for (int i = 0; i < width; ++i) {
+ if (vert_hist.hist()[i] <= stroke_width / 4)
+ vert_hist.hist()[i] = 0;
+ else
+ vert_hist.hist()[i] = 1;
+ }
+ // In order to split the line at any point, we make sure that the width of the
+ // gap is at least half the stroke width.
+ int i = 0;
+ int cur_component_width = 0;
+ while (i < width) {
+ if (!vert_hist.hist()[i]) {
+ int j = 0;
+ while (i + j < width && !vert_hist.hist()[i+j])
+ ++j;
+ if (j >= stroke_width / 2 && cur_component_width >= stroke_width / 2) {
+ // Perform a shiro-rekha split. The intervening region lies from i to
+ // i+j-1.
+ // A minimal single-pixel split makes the estimation of intra- and
+ // inter-word spacing easier during page layout analysis,
+ // whereas a maximal split may be needed for OCR, depending on
+ // how the engine was trained.
+ bool minimal_split = (split_strategy == MINIMAL_SPLIT);
+ int split_width = minimal_split ? 1 : j;
+ int split_left = minimal_split ? i + (j / 2) - (split_width / 2) : i;
+ if (!minimal_split || (i != 0 && i + j != width)) {
+ Box* box_to_clear =
+ boxCreate(word_left + split_left,
+ word_top + shirorekha_top - stroke_width / 3,
+ split_width,
+ 5 * stroke_width / 3);
+ if (box_to_clear) {
+ boxaAddBox(regions_to_clear, box_to_clear, L_CLONE);
+ // Mark this in the debug image if needed.
+ if (devanagari_split_debugimage) {
+ pixRenderBoxArb(debug_image_, box_to_clear, 1, 128, 255, 128);
+ }
+ boxDestroy(&box_to_clear);
+ cur_component_width = 0;
+ }
+ }
+ }
+ i += j;
+ } else {
+ ++i;
+ ++cur_component_width;
+ }
+ }
+}
+
+// Refreshes the words in the segmentation block list by using blobs in the
+// input block list.
+// The segmentation block list must be set.
+void ShiroRekhaSplitter::RefreshSegmentationWithNewBlobs(
+ C_BLOB_LIST* new_blobs) {
+ // The segmentation block list must have been specified.
+ ASSERT_HOST(segmentation_block_list_);
+ if (devanagari_split_debuglevel > 0) {
+ tprintf("Before refreshing blobs:\n");
+ PrintSegmentationStats(segmentation_block_list_);
+ tprintf("New Blobs found: %d\n", new_blobs->length());
+ }
+
+ C_BLOB_LIST not_found_blobs;
+ RefreshWordBlobsFromNewBlobs(segmentation_block_list_,
+ new_blobs,
+ ((devanagari_split_debugimage && debug_image_) ?
+ &not_found_blobs : nullptr));
+
+ if (devanagari_split_debuglevel > 0) {
+ tprintf("After refreshing blobs:\n");
+ PrintSegmentationStats(segmentation_block_list_);
+ }
+ if (devanagari_split_debugimage && debug_image_) {
+ // Plot out the original blobs for which no match was found in the new
+ // all_blobs list.
+ C_BLOB_IT not_found_it(&not_found_blobs);
+ for (not_found_it.mark_cycle_pt(); !not_found_it.cycled_list();
+ not_found_it.forward()) {
+ C_BLOB* not_found = not_found_it.data();
+ TBOX not_found_box = not_found->bounding_box();
+ Box* box_to_plot = GetBoxForTBOX(not_found_box);
+ pixRenderBoxArb(debug_image_, box_to_plot, 1, 255, 0, 255);
+ boxDestroy(&box_to_plot);
+ }
+
+ // Plot out the blobs unused from all blobs.
+ C_BLOB_IT all_blobs_it(new_blobs);
+ for (all_blobs_it.mark_cycle_pt(); !all_blobs_it.cycled_list();
+ all_blobs_it.forward()) {
+ C_BLOB* a_blob = all_blobs_it.data();
+ Box* box_to_plot = GetBoxForTBOX(a_blob->bounding_box());
+ pixRenderBoxArb(debug_image_, box_to_plot, 3, 0, 127, 0);
+ boxDestroy(&box_to_plot);
+ }
+ }
+}
+
+// Returns a new box object for the corresponding TBOX, based on the original
+// image's coordinate system.
+Box* ShiroRekhaSplitter::GetBoxForTBOX(const TBOX& tbox) const {
+ return boxCreate(tbox.left(), pixGetHeight(orig_pix_) - tbox.top() - 1,
+ tbox.width(), tbox.height());
+}
+
+// This method returns the computed mode-height of blobs in the pix.
+// It also prunes very small blobs from calculation.
+int ShiroRekhaSplitter::GetModeHeight(Pix* pix) {
+ Boxa* boxa = pixConnComp(pix, nullptr, 8);
+ STATS heights(0, pixGetHeight(pix));
+ heights.clear();
+ for (int i = 0; i < boxaGetCount(boxa); ++i) {
+ Box* box = boxaGetBox(boxa, i, L_CLONE);
+ if (box->h >= 3 || box->w >= 3) {
+ heights.add(box->h, 1);
+ }
+ boxDestroy(&box);
+ }
+ boxaDestroy(&boxa);
+ return heights.mode();
+}
+
+// This method returns y-extents of the shiro-rekha computed from the input
+// word image.
+void ShiroRekhaSplitter::GetShiroRekhaYExtents(Pix* word_pix,
+ int* shirorekha_top,
+ int* shirorekha_bottom,
+ int* shirorekha_ylevel) {
+ // Compute a histogram from projecting the word on a vertical line.
+ PixelHistogram hist_horiz;
+ hist_horiz.ConstructHorizontalCountHist(word_pix);
+ // Get the ylevel where the top-line exists. This is basically the global
+ // maxima in the horizontal histogram.
+ int topline_onpixel_count = 0;
+ int topline_ylevel = hist_horiz.GetHistogramMaximum(&topline_onpixel_count);
+
+ // Get the upper and lower extents of the shiro rekha.
+ int thresh = (topline_onpixel_count * 70) / 100;
+ int ulimit = topline_ylevel;
+ int llimit = topline_ylevel;
+ while (ulimit > 0 && hist_horiz.hist()[ulimit] >= thresh)
+ --ulimit;
+ while (llimit < pixGetHeight(word_pix) && hist_horiz.hist()[llimit] >= thresh)
+ ++llimit;
+
+ if (shirorekha_top) *shirorekha_top = ulimit;
+ if (shirorekha_bottom) *shirorekha_bottom = llimit;
+ if (shirorekha_ylevel) *shirorekha_ylevel = topline_ylevel;
+}
+
+// This method returns the global-maxima for the histogram. The frequency of
+// the global maxima is returned in count, if specified.
+int PixelHistogram::GetHistogramMaximum(int* count) const {
+ int best_value = 0;
+ for (int i = 0; i < length_; ++i) {
+ if (hist_[i] > hist_[best_value]) {
+ best_value = i;
+ }
+ }
+ if (count) {
+ *count = hist_[best_value];
+ }
+ return best_value;
+}
+
+// Methods to construct histograms from images.
+void PixelHistogram::ConstructVerticalCountHist(Pix* pix) {
+ Clear();
+ int width = pixGetWidth(pix);
+ int height = pixGetHeight(pix);
+ hist_ = new int[width];
+ length_ = width;
+ int wpl = pixGetWpl(pix);
+ l_uint32 *data = pixGetData(pix);
+ for (int i = 0; i < width; ++i)
+ hist_[i] = 0;
+ for (int i = 0; i < height; ++i) {
+ l_uint32 *line = data + i * wpl;
+ for (int j = 0; j < width; ++j)
+ if (GET_DATA_BIT(line, j))
+ ++(hist_[j]);
+ }
+}
+
+void PixelHistogram::ConstructHorizontalCountHist(Pix* pix) {
+ Clear();
+ Numa* counts = pixCountPixelsByRow(pix, nullptr);
+ length_ = numaGetCount(counts);
+ hist_ = new int[length_];
+ for (int i = 0; i < length_; ++i) {
+ l_int32 val = 0;
+ numaGetIValue(counts, i, &val);
+ hist_[i] = val;
+ }
+ numaDestroy(&counts);
+}
+
+} // namespace tesseract.
diff --git a/tesseract/src/textord/devanagari_processing.h b/tesseract/src/textord/devanagari_processing.h
new file mode 100644
index 00000000..cd0bfeb6
--- /dev/null
+++ b/tesseract/src/textord/devanagari_processing.h
@@ -0,0 +1,210 @@
+// Copyright 2008 Google Inc. All Rights Reserved.
+// Author: shobhitsaxena@google.com (Shobhit Saxena)
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
+#define TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
+
+#include "allheaders.h"
+#include "ocrblock.h"
+#include "params.h"
+
+struct Pix;
+struct Box;
+struct Boxa;
+
+namespace tesseract {
+
+extern
+INT_VAR_H(devanagari_split_debuglevel, 0,
+ "Debug level for split shiro-rekha process.");
+
+extern
+BOOL_VAR_H(devanagari_split_debugimage, 0,
+ "Whether to create a debug image for split shiro-rekha process.");
+
+class TBOX;
+class DebugPixa;
+
+class PixelHistogram {
+ public:
+ PixelHistogram() {
+ hist_ = nullptr;
+ length_ = 0;
+ }
+
+ ~PixelHistogram() {
+ Clear();
+ }
+
+ void Clear() {
+ delete[] hist_;
+ length_ = 0;
+ }
+
+ int* hist() const { return hist_; }
+
+ int length() const {
+ return length_;
+ }
+
+ // Methods to construct histograms from images. These clear any existing data.
+ void ConstructVerticalCountHist(Pix* pix);
+ void ConstructHorizontalCountHist(Pix* pix);
+
+ // This method returns the global-maxima for the histogram. The frequency of
+ // the global maxima is returned in count, if specified.
+ int GetHistogramMaximum(int* count) const;
+
+ private:
+ int* hist_;
+ int length_;
+};
+
+class ShiroRekhaSplitter {
+ public:
+ enum SplitStrategy {
+ NO_SPLIT = 0, // No splitting is performed for the phase.
+ MINIMAL_SPLIT, // Blobs are split minimally.
+ MAXIMAL_SPLIT // Blobs are split maximally.
+ };
+
+ ShiroRekhaSplitter();
+ virtual ~ShiroRekhaSplitter();
+
+ // Top-level method to perform splitting based on current settings.
+ // Returns true if a split was actually performed.
+ // If split_for_pageseg is true, the pageseg_split_strategy_ is used for
+ // splitting. If false, the ocr_split_strategy_ is used.
+ bool Split(bool split_for_pageseg, DebugPixa* pixa_debug);
+
+ // Clears the memory held by this object.
+ void Clear();
+
+ // Refreshes the words in the segmentation block list by using blobs in the
+ // input blob list.
+ // The segmentation block list must be set.
+ void RefreshSegmentationWithNewBlobs(C_BLOB_LIST* new_blobs);
+
+ // Returns true if the split strategies for pageseg and ocr are different.
+ bool HasDifferentSplitStrategies() const {
+ return pageseg_split_strategy_ != ocr_split_strategy_;
+ }
+
+ // This only keeps a copy of the block list pointer. At split call, the list
+ // object should still be alive. This block list is used as a golden
+ // segmentation when performing splitting.
+ void set_segmentation_block_list(BLOCK_LIST* block_list) {
+ segmentation_block_list_ = block_list;
+ }
+
+ static const int kUnspecifiedXheight = -1;
+
+ void set_global_xheight(int xheight) {
+ global_xheight_ = xheight;
+ }
+
+ void set_perform_close(bool perform) {
+ perform_close_ = perform;
+ }
+
+ // Returns the image obtained from shiro-rekha splitting. The returned object
+ // is owned by this class. Callers may want to clone the returned pix to keep
+ // it alive beyond the life of ShiroRekhaSplitter object.
+ Pix* splitted_image() {
+ return splitted_image_;
+ }
+
+ // On setting the input image, a clone of it is owned by this class.
+ void set_orig_pix(Pix* pix);
+
+ // Returns the input image provided to the object. This object is owned by
+ // this class. Callers may want to clone the returned pix to work with it.
+ Pix* orig_pix() {
+ return orig_pix_;
+ }
+
+ SplitStrategy ocr_split_strategy() const {
+ return ocr_split_strategy_;
+ }
+
+ void set_ocr_split_strategy(SplitStrategy strategy) {
+ ocr_split_strategy_ = strategy;
+ }
+
+ SplitStrategy pageseg_split_strategy() const {
+ return pageseg_split_strategy_;
+ }
+
+ void set_pageseg_split_strategy(SplitStrategy strategy) {
+ pageseg_split_strategy_ = strategy;
+ }
+
+ BLOCK_LIST* segmentation_block_list() {
+ return segmentation_block_list_;
+ }
+
+ // This method returns the computed mode-height of blobs in the pix.
+ // It also prunes very small blobs from calculation. Could be used to provide
+ // a global xheight estimate for images which have the same point-size text.
+ static int GetModeHeight(Pix* pix);
+
+ private:
+ // Method to perform a close operation on the input image. The xheight
+ // estimate decides the size of sel used.
+ static void PerformClose(Pix* pix, int xheight_estimate);
+
+ // This method resolves the cc bbox to a particular row and returns the row's
+ // xheight. This uses block_list_ if available, else just returns the
+ // global_xheight_ estimate currently set in the object.
+ int GetXheightForCC(Box* cc_bbox);
+
+ // Returns a list of regions (boxes) which should be cleared in the original
+ // image so as to perform shiro-rekha splitting. Pix is assumed to carry one
+ // (or less) word only. Xheight measure could be the global estimate, the row
+ // estimate, or unspecified. If unspecified, over splitting may occur, since a
+ // conservative estimate of stroke width along with an associated multiplier
+ // is used in its place. It is advisable to have a specified xheight when
+ // splitting for classification/training.
+ void SplitWordShiroRekha(SplitStrategy split_strategy,
+ Pix* pix,
+ int xheight,
+ int word_left,
+ int word_top,
+ Boxa* regions_to_clear);
+
+ // Returns a new box object for the corresponding TBOX, based on the original
+ // image's coordinate system.
+ Box* GetBoxForTBOX(const TBOX& tbox) const;
+
+ // This method returns y-extents of the shiro-rekha computed from the input
+ // word image.
+ static void GetShiroRekhaYExtents(Pix* word_pix,
+ int* shirorekha_top,
+ int* shirorekha_bottom,
+ int* shirorekha_ylevel);
+
+ Pix* orig_pix_; // Just a clone of the input image passed.
+ Pix* splitted_image_; // Image produced after the last splitting round. The
+ // object is owned by this class.
+ SplitStrategy pageseg_split_strategy_;
+ SplitStrategy ocr_split_strategy_;
+ Pix* debug_image_;
+ // This block list is used as a golden segmentation when performing splitting.
+ BLOCK_LIST* segmentation_block_list_;
+ int global_xheight_;
+ bool perform_close_; // Whether a morphological close operation should be
+ // performed before CCs are run through splitting.
+};
+
+} // namespace tesseract.
+
+#endif // TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
diff --git a/tesseract/src/textord/drawtord.cpp b/tesseract/src/textord/drawtord.cpp
new file mode 100644
index 00000000..3e02653a
--- /dev/null
+++ b/tesseract/src/textord/drawtord.cpp
@@ -0,0 +1,423 @@
+/**********************************************************************
+ * File: drawtord.cpp (Formerly drawto.c)
+ * Description: Draw things to do with textord.
+ * Author: Ray Smith
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "drawtord.h"
+
+#include "pithsync.h"
+#include "topitch.h"
+
+namespace tesseract {
+
+#define TO_WIN_XPOS 0 //default window pos
+#define TO_WIN_YPOS 0
+#define TO_WIN_NAME "Textord"
+ //title of window
+
+BOOL_VAR (textord_show_fixed_cuts, false,
+"Draw fixed pitch cell boundaries");
+
+ScrollView* to_win = nullptr;
+
+#ifndef GRAPHICS_DISABLED
+
+/**********************************************************************
+ * create_to_win
+ *
+ * Create the to window used to show the fit.
+ **********************************************************************/
+
+ScrollView* create_to_win(ICOORD page_tr) {
+ if (to_win != nullptr) return to_win;
+ to_win = new ScrollView(TO_WIN_NAME, TO_WIN_XPOS, TO_WIN_YPOS,
+ page_tr.x() + 1, page_tr.y() + 1,
+ page_tr.x(), page_tr.y(), true);
+ return to_win;
+}
+
+
+void close_to_win() {
+ // to_win is leaked, but this enables the user to view the contents.
+ if (to_win != nullptr) {
+ to_win->Update();
+ }
+}
+
+
+/**********************************************************************
+ * plot_box_list
+ *
+ * Draw a list of blobs.
+ **********************************************************************/
+
+void plot_box_list( //make gradients win
+ ScrollView* win, //window to draw in
+ BLOBNBOX_LIST *list, //blob list
+ ScrollView::Color body_colour //colour to draw
+ ) {
+ BLOBNBOX_IT it = list; //iterator
+
+ win->Pen(body_colour);
+ win->Brush(ScrollView::NONE);
+ for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
+ it.data ()->bounding_box ().plot (win);
+ }
+}
+
+
+/**********************************************************************
+ * plot_to_row
+ *
+ * Draw the blobs of a row in a given colour and draw the line fit.
+ **********************************************************************/
+
+void plot_to_row( //draw a row
+ TO_ROW *row, //row to draw
+ ScrollView::Color colour, //colour to draw in
+ FCOORD rotation //rotation for line
+ ) {
+ FCOORD plot_pt; //point to plot
+ //blobs
+ BLOBNBOX_IT it = row->blob_list ();
+ float left, right; //end of row
+
+ if (it.empty ()) {
+ tprintf ("No blobs in row at %g\n", row->parallel_c ());
+ return;
+ }
+ left = it.data ()->bounding_box ().left ();
+ it.move_to_last ();
+ right = it.data ()->bounding_box ().right ();
+ plot_blob_list (to_win, row->blob_list (), colour, ScrollView::BROWN);
+ to_win->Pen(colour);
+ plot_pt = FCOORD (left, row->line_m () * left + row->line_c ());
+ plot_pt.rotate (rotation);
+ to_win->SetCursor(plot_pt.x (), plot_pt.y ());
+ plot_pt = FCOORD (right, row->line_m () * right + row->line_c ());
+ plot_pt.rotate (rotation);
+ to_win->DrawTo(plot_pt.x (), plot_pt.y ());
+}
+
+
+/**********************************************************************
+ * plot_parallel_row
+ *
+ * Draw the blobs of a row in a given colour and draw the line fit.
+ **********************************************************************/
+
+void plot_parallel_row( //draw a row
+ TO_ROW *row, //row to draw
+ float gradient, //gradients of lines
+ int32_t left, //edge of block
+ ScrollView::Color colour, //colour to draw in
+ FCOORD rotation //rotation for line
+ ) {
+ FCOORD plot_pt; //point to plot
+ //blobs
+ BLOBNBOX_IT it = row->blob_list ();
+ auto fleft = static_cast<float>(left); //floating version
+ float right; //end of row
+
+ // left=it.data()->bounding_box().left();
+ it.move_to_last ();
+ right = it.data ()->bounding_box ().right ();
+ plot_blob_list (to_win, row->blob_list (), colour, ScrollView::BROWN);
+ to_win->Pen(colour);
+ plot_pt = FCOORD (fleft, gradient * left + row->max_y ());
+ plot_pt.rotate (rotation);
+ to_win->SetCursor(plot_pt.x (), plot_pt.y ());
+ plot_pt = FCOORD (fleft, gradient * left + row->min_y ());
+ plot_pt.rotate (rotation);
+ to_win->DrawTo(plot_pt.x (), plot_pt.y ());
+ plot_pt = FCOORD (fleft, gradient * left + row->parallel_c ());
+ plot_pt.rotate (rotation);
+ to_win->SetCursor(plot_pt.x (), plot_pt.y ());
+ plot_pt = FCOORD (right, gradient * right + row->parallel_c ());
+ plot_pt.rotate (rotation);
+ to_win->DrawTo(plot_pt.x (), plot_pt.y ());
+}
+
+
+/**********************************************************************
+ * draw_occupation
+ *
+ * Draw the row occupation with points above the threshold in white
+ * and points below the threshold in black.
+ **********************************************************************/
+
+void
+draw_occupation ( //draw projection
+int32_t xleft, //edge of block
+int32_t ybottom, //bottom of block
+int32_t min_y, //coordinate limits
+int32_t max_y, int32_t occupation[], //projection counts
+int32_t thresholds[] //for drop out
+) {
+ int32_t line_index; //pixel coord
+ ScrollView::Color colour; //of histogram
+ auto fleft = static_cast<float>(xleft); //float version
+
+ colour = ScrollView::WHITE;
+ to_win->Pen(colour);
+ to_win->SetCursor(fleft, static_cast<float>(ybottom));
+ for (line_index = min_y; line_index <= max_y; line_index++) {
+ if (occupation[line_index - min_y] < thresholds[line_index - min_y]) {
+ if (colour != ScrollView::BLUE) {
+ colour = ScrollView::BLUE;
+ to_win->Pen(colour);
+ }
+ }
+ else {
+ if (colour != ScrollView::WHITE) {
+ colour = ScrollView::WHITE;
+ to_win->Pen(colour);
+ }
+ }
+ to_win->DrawTo(fleft + occupation[line_index - min_y] / 10.0, static_cast<float>(line_index));
+ }
+ colour=ScrollView::STEEL_BLUE;
+ to_win->Pen(colour);
+ to_win->SetCursor(fleft, static_cast<float>(ybottom));
+ for (line_index = min_y; line_index <= max_y; line_index++) {
+ to_win->DrawTo(fleft + thresholds[line_index - min_y] / 10.0, static_cast<float>(line_index));
+ }
+}
+
+
+/**********************************************************************
+ * draw_meanlines
+ *
+ * Draw the meanlines of the given block in the given colour.
+ **********************************************************************/
+
+void draw_meanlines( //draw a block
+ TO_BLOCK *block, //block to draw
+ float gradient, //gradients of lines
+ int32_t left, //edge of block
+ ScrollView::Color colour, //colour to draw in
+ FCOORD rotation //rotation for line
+ ) {
+ FCOORD plot_pt; //point to plot
+ //rows
+ TO_ROW_IT row_it = block->get_rows ();
+ TO_ROW *row; //current row
+ BLOBNBOX_IT blob_it; //blobs
+ float right; //end of row
+ to_win->Pen(colour);
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+ row = row_it.data ();
+ blob_it.set_to_list (row->blob_list ());
+ blob_it.move_to_last ();
+ right = blob_it.data ()->bounding_box ().right ();
+ plot_pt =
+ FCOORD (static_cast<float>(left),
+ gradient * left + row->parallel_c () + row->xheight);
+ plot_pt.rotate (rotation);
+ to_win->SetCursor(plot_pt.x (), plot_pt.y ());
+ plot_pt =
+ FCOORD (right,
+ gradient * right + row->parallel_c () + row->xheight);
+ plot_pt.rotate (rotation);
+ to_win->DrawTo (plot_pt.x (), plot_pt.y ());
+ }
+}
+
+
+/**********************************************************************
+ * plot_word_decisions
+ *
+ * Plot a row with words in different colours and fuzzy spaces
+ * highlighted.
+ **********************************************************************/
+
+void plot_word_decisions( //draw words
+ ScrollView* win, //window tro draw in
+ int16_t pitch, //of block
+ TO_ROW *row //row to draw
+ ) {
+ ScrollView::Color colour = ScrollView::MAGENTA; //current colour
+ ScrollView::Color rect_colour; //fuzzy colour
+ int32_t prev_x; //end of prev blob
+ int16_t blob_count; //blobs in word
+ BLOBNBOX *blob; //current blob
+ TBOX blob_box; //bounding box
+ //iterator
+ BLOBNBOX_IT blob_it = row->blob_list ();
+ BLOBNBOX_IT start_it = blob_it;//word start
+
+ rect_colour = ScrollView::BLACK;
+ prev_x = -INT16_MAX;
+ blob_count = 0;
+ for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
+ blob = blob_it.data ();
+ blob_box = blob->bounding_box ();
+ if (!blob->joined_to_prev ()
+ && blob_box.left () - prev_x > row->max_nonspace) {
+ if ((blob_box.left () - prev_x >= row->min_space
+ || blob_box.left () - prev_x > row->space_threshold)
+ && blob_count > 0) {
+ if (pitch > 0 && textord_show_fixed_cuts)
+ plot_fp_cells (win, colour, &start_it, pitch, blob_count,
+ &row->projection, row->projection_left,
+ row->projection_right,
+ row->xheight * textord_projection_scale);
+ blob_count = 0;
+ start_it = blob_it;
+ }
+ if (colour == ScrollView::MAGENTA)
+ colour = ScrollView::RED;
+ else
+ colour = static_cast<ScrollView::Color>(colour + 1);
+ if (blob_box.left () - prev_x < row->min_space) {
+ if (blob_box.left () - prev_x > row->space_threshold)
+ rect_colour = ScrollView::GOLDENROD;
+ else
+ rect_colour = ScrollView::CORAL;
+ //fill_color_index(win, rect_colour);
+ win->Brush(rect_colour);
+ win->Rectangle (prev_x, blob_box.bottom (),
+ blob_box.left (), blob_box.top ());
+ }
+ }
+ if (!blob->joined_to_prev())
+ prev_x = blob_box.right();
+ if (blob->cblob () != nullptr)
+ blob->cblob ()->plot (win, colour, colour);
+ if (!blob->joined_to_prev() && blob->cblob() != nullptr)
+ blob_count++;
+ }
+ if (pitch > 0 && textord_show_fixed_cuts && blob_count > 0)
+ plot_fp_cells (win, colour, &start_it, pitch, blob_count,
+ &row->projection, row->projection_left,
+ row->projection_right,
+ row->xheight * textord_projection_scale);
+}
+
+
+/**********************************************************************
+ * plot_fp_cells
+ *
+ * Make a list of fixed pitch cuts and draw them.
+ **********************************************************************/
+
+void plot_fp_cells( //draw words
+ ScrollView* win, //window tro draw in
+ ScrollView::Color colour, //colour of lines
+ BLOBNBOX_IT *blob_it, //blobs
+ int16_t pitch, //of block
+ int16_t blob_count, //no of real blobs
+ STATS *projection, //vertical
+ int16_t projection_left, //edges //scale factor
+ int16_t projection_right,
+ float projection_scale) {
+ int16_t occupation; //occupied cells
+ TBOX word_box; //bounding box
+ FPSEGPT_LIST seg_list; //list of cuts
+ FPSEGPT_IT seg_it;
+ FPSEGPT *segpt; //current point
+
+ if (pitsync_linear_version)
+ check_pitch_sync2 (blob_it, blob_count, pitch, 2, projection,
+ projection_left, projection_right,
+ projection_scale, occupation, &seg_list, 0, 0);
+ else
+ check_pitch_sync (blob_it, blob_count, pitch, 2, projection, &seg_list);
+ word_box = blob_it->data ()->bounding_box ();
+ for (; blob_count > 0; blob_count--)
+ word_box += box_next (blob_it);
+ seg_it.set_to_list (&seg_list);
+ for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
+ segpt = seg_it.data ();
+ if (segpt->faked) {
+ colour = ScrollView::WHITE;
+ win->Pen(colour); }
+ else {
+ win->Pen(colour); }
+ win->Line(segpt->position (), word_box.bottom (),segpt->position (), word_box.top ());
+ }
+}
+
+
+/**********************************************************************
+ * plot_fp_cells2
+ *
+ * Make a list of fixed pitch cuts and draw them.
+ **********************************************************************/
+
+void plot_fp_cells2( //draw words
+ ScrollView* win, //window tro draw in
+ ScrollView::Color colour, //colour of lines
+ TO_ROW *row, //for location
+ FPSEGPT_LIST *seg_list //segments to plot
+ ) {
+ TBOX word_box; //bounding box
+ FPSEGPT_IT seg_it = seg_list;
+ //blobs in row
+ BLOBNBOX_IT blob_it = row->blob_list ();
+ FPSEGPT *segpt; //current point
+
+ word_box = blob_it.data ()->bounding_box ();
+ for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();)
+ word_box += box_next (&blob_it);
+ for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
+ segpt = seg_it.data ();
+ if (segpt->faked) {
+ colour = ScrollView::WHITE;
+ win->Pen(colour); }
+ else {
+ win->Pen(colour); }
+ win->Line(segpt->position (), word_box.bottom (),segpt->position (), word_box.top ());
+ }
+}
+
+
+/**********************************************************************
+ * plot_row_cells
+ *
+ * Make a list of fixed pitch cuts and draw them.
+ **********************************************************************/
+
+void plot_row_cells( //draw words
+ ScrollView* win, //window tro draw in
+ ScrollView::Color colour, //colour of lines
+ TO_ROW *row, //for location
+ float xshift, //amount of shift
+ ICOORDELT_LIST *cells //cells to draw
+ ) {
+ TBOX word_box; //bounding box
+ ICOORDELT_IT cell_it = cells;
+ //blobs in row
+ BLOBNBOX_IT blob_it = row->blob_list ();
+ ICOORDELT *cell; //current cell
+
+ word_box = blob_it.data ()->bounding_box ();
+ for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();)
+ word_box += box_next (&blob_it);
+ win->Pen(colour);
+ for (cell_it.mark_cycle_pt (); !cell_it.cycled_list (); cell_it.forward ()) {
+ cell = cell_it.data ();
+ win->Line(cell->x () + xshift, word_box.bottom (), cell->x () + xshift, word_box.top ());
+ }
+}
+
+#endif // !GRAPHICS_DISABLED
+
+} // namespace tesseract
diff --git a/tesseract/src/textord/drawtord.h b/tesseract/src/textord/drawtord.h
new file mode 100644
index 00000000..e88c4896
--- /dev/null
+++ b/tesseract/src/textord/drawtord.h
@@ -0,0 +1,103 @@
+/**********************************************************************
+ * File: drawtord.h (Formerly drawto.h)
+ * Description: Draw things to do with textord.
+ * Author: Ray Smith
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef DRAWTORD_H
+#define DRAWTORD_H
+
+#include "params.h"
+#include "scrollview.h"
+#include "pitsync1.h"
+#include "blobbox.h"
+
+namespace tesseract {
+
+#define NO_SMD "none"
+
+extern BOOL_VAR_H (textord_show_fixed_cuts, false,
+"Draw fixed pitch cell boundaries");
+extern STRING_VAR_H (to_debugfile, DEBUG_WIN_NAME, "Name of debugfile");
+extern STRING_VAR_H (to_smdfile, NO_SMD, "Name of SMD file");
+extern ScrollView* to_win;
+extern FILE *to_debug;
+// Creates a static display window for textord, and returns a pointer to it.
+ScrollView* create_to_win(ICOORD page_tr);
+void close_to_win(); // Destroy the textord window.
+void create_todebug_win(); //make gradients win
+void plot_box_list( //make gradients win
+ ScrollView* win, //window to draw in
+ BLOBNBOX_LIST *list, //blob list
+ ScrollView::Color body_colour //colour to draw
+ );
+void plot_to_row( //draw a row
+ TO_ROW *row, //row to draw
+ ScrollView::Color colour, //colour to draw in
+ FCOORD rotation //rotation for line
+ );
+void plot_parallel_row( //draw a row
+ TO_ROW *row, //row to draw
+ float gradient, //gradients of lines
+ int32_t left, //edge of block
+ ScrollView::Color colour, //colour to draw in
+ FCOORD rotation //rotation for line
+ );
+void draw_occupation ( //draw projection
+int32_t xleft, //edge of block
+int32_t ybottom, //bottom of block
+int32_t min_y, //coordinate limits
+int32_t max_y, int32_t occupation[], //projection counts
+int32_t thresholds[] //for drop out
+);
+void draw_meanlines( //draw a block
+ TO_BLOCK *block, //block to draw
+ float gradient, //gradients of lines
+ int32_t left, //edge of block
+ ScrollView::Color colour, //colour to draw in
+ FCOORD rotation //rotation for line
+ );
+void plot_word_decisions( //draw words
+ ScrollView* win, //window tro draw in
+ int16_t pitch, //of block
+ TO_ROW *row //row to draw
+ );
+void plot_fp_cells( //draw words
+ ScrollView* win, //window tro draw in
+ ScrollView::Color colour, //colour of lines
+ BLOBNBOX_IT *blob_it, //blobs
+ int16_t pitch, //of block
+ int16_t blob_count, //no of real blobs
+ STATS *projection, //vertical
+ int16_t projection_left, //edges //scale factor
+ int16_t projection_right,
+ float projection_scale);
+void plot_fp_cells2( //draw words
+ ScrollView* win, //window tro draw in
+ ScrollView::Color colour, //colour of lines
+ TO_ROW *row, //for location
+ FPSEGPT_LIST *seg_list //segments to plot
+ );
+void plot_row_cells( //draw words
+ ScrollView* win, //window tro draw in
+ ScrollView::Color colour, //colour of lines
+ TO_ROW *row, //for location
+ float xshift, //amount of shift
+ ICOORDELT_LIST *cells //cells to draw
+ );
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/textord/edgblob.cpp b/tesseract/src/textord/edgblob.cpp
new file mode 100644
index 00000000..4383907f
--- /dev/null
+++ b/tesseract/src/textord/edgblob.cpp
@@ -0,0 +1,462 @@
+/**********************************************************************
+ * File: edgblob.cpp (Formerly edgeloop.c)
+ * Description: Functions to clean up an outline before approximation.
+ * Author: Ray Smith
+ *
+ *(C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0(the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+ // Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "edgblob.h"
+
+#include "scanedg.h"
+#include "edgloop.h"
+
+namespace tesseract {
+
+// Control parameters used in outline_complexity(), which rejects an outline
+// if any one of the 3 conditions is satisfied:
+// - number of children exceeds edges_max_children_per_outline
+// - number of nested layers exceeds edges_max_children_layers
+// - joint complexity exceeds edges_children_count_limit(as in child_count())
+static BOOL_VAR(edges_use_new_outline_complexity, false,
+ "Use the new outline complexity module");
+static INT_VAR(edges_max_children_per_outline, 10,
+ "Max number of children inside a character outline");
+static INT_VAR(edges_max_children_layers, 5,
+ "Max layers of nested children inside a character outline");
+static BOOL_VAR(edges_debug, false,
+ "turn on debugging for this module");
+
+static INT_VAR(edges_children_per_grandchild, 10,
+ "Importance ratio for chucking outlines");
+static INT_VAR(edges_children_count_limit, 45,
+ "Max holes allowed in blob");
+static BOOL_VAR(edges_children_fix, false,
+ "Remove boxy parents of char-like children");
+static INT_VAR(edges_min_nonhole, 12,
+ "Min pixels for potential char in box");
+static INT_VAR(edges_patharea_ratio, 40,
+ "Max lensq/area for acceptable child outline");
+static double_VAR(edges_childarea, 0.5,
+ "Min area fraction of child outline");
+static double_VAR(edges_boxarea, 0.875,
+ "Min area fraction of grandchild for box");
+
+/**
+ * @name OL_BUCKETS::OL_BUCKETS
+ *
+ * Construct an array of buckets for associating outlines into blobs.
+ */
+
+OL_BUCKETS::OL_BUCKETS(
+ICOORD bleft, // corners
+ICOORD tright): bl(bleft), tr(tright) {
+ bxdim =(tright.x() - bleft.x()) / BUCKETSIZE + 1;
+ bydim =(tright.y() - bleft.y()) / BUCKETSIZE + 1;
+ // make array
+ buckets.reset(new C_OUTLINE_LIST[bxdim * bydim]);
+ index = 0;
+}
+
+
+/**
+ * @name OL_BUCKETS::operator(
+ *
+ * Return a pointer to a list of C_OUTLINEs corresponding to the
+ * given pixel coordinates.
+ */
+
+C_OUTLINE_LIST *
+OL_BUCKETS::operator()( // array access
+int16_t x, // image coords
+int16_t y) {
+ return &buckets[(y-bl.y()) / BUCKETSIZE * bxdim + (x-bl.x()) / BUCKETSIZE];
+}
+
+
+/**
+ * @name OL_BUCKETS::outline_complexity
+ *
+ * This is the new version of count_child.
+ *
+ * The goal of this function is to determine if an outline and its
+ * interiors could be part of a character blob. This is done by
+ * computing a "complexity" index for the outline, which is the return
+ * value of this function, and checking it against a threshold.
+ * The max_count is used for short-circuiting the recursion and forcing
+ * a rejection that guarantees to fail the threshold test.
+ * The complexity F for outline X with N children X[i] is
+ * F(X) = N + sum_i F(X[i]) * edges_children_per_grandchild
+ * so each layer of nesting increases complexity exponentially.
+ * An outline can be rejected as a text blob candidate if its complexity
+ * is too high, has too many children(likely a container), or has too
+ * many layers of nested inner loops. This has the side-effect of
+ * flattening out boxed or reversed video text regions.
+ */
+
+int32_t OL_BUCKETS::outline_complexity(
+ C_OUTLINE *outline, // parent outline
+ int32_t max_count, // max output
+ int16_t depth // recurion depth
+ ) {
+ int16_t xmin, xmax; // coord limits
+ int16_t ymin, ymax;
+ int16_t xindex, yindex; // current bucket
+ C_OUTLINE *child; // current child
+ int32_t child_count; // no of children
+ int32_t grandchild_count; // no of grandchildren
+ C_OUTLINE_IT child_it; // search iterator
+
+ TBOX olbox = outline->bounding_box();
+ xmin =(olbox.left() - bl.x()) / BUCKETSIZE;
+ xmax =(olbox.right() - bl.x()) / BUCKETSIZE;
+ ymin =(olbox.bottom() - bl.y()) / BUCKETSIZE;
+ ymax =(olbox.top() - bl.y()) / BUCKETSIZE;
+ child_count = 0;
+ grandchild_count = 0;
+ if (++depth > edges_max_children_layers) // nested loops are too deep
+ return max_count + depth;
+
+ for (yindex = ymin; yindex <= ymax; yindex++) {
+ for (xindex = xmin; xindex <= xmax; xindex++) {
+ child_it.set_to_list(&buckets[yindex * bxdim + xindex]);
+ if (child_it.empty())
+ continue;
+ for (child_it.mark_cycle_pt(); !child_it.cycled_list();
+ child_it.forward()) {
+ child = child_it.data();
+ if (child == outline || !(*child < *outline))
+ continue;
+ child_count++;
+
+ if (child_count > edges_max_children_per_outline) { // too fragmented
+ if (edges_debug)
+ tprintf("Discard outline on child_count=%d > "
+ "max_children_per_outline=%d\n",
+ child_count,
+ static_cast<int32_t>(edges_max_children_per_outline));
+ return max_count + child_count;
+ }
+
+ // Compute the "complexity" of each child recursively
+ int32_t remaining_count = max_count - child_count - grandchild_count;
+ if (remaining_count > 0)
+ grandchild_count += edges_children_per_grandchild *
+ outline_complexity(child, remaining_count, depth);
+ if (child_count + grandchild_count > max_count) { // too complex
+ if (edges_debug)
+ tprintf("Disgard outline on child_count=%d + grandchild_count=%d "
+ "> max_count=%d\n",
+ child_count, grandchild_count, max_count);
+ return child_count + grandchild_count;
+ }
+ }
+ }
+ }
+ return child_count + grandchild_count;
+}
+
+
+/**
+ * @name OL_BUCKETS::count_children
+ *
+ * Find number of descendants of this outline.
+ */
+// TODO(rays) Merge with outline_complexity.
+int32_t OL_BUCKETS::count_children( // recursive count
+ C_OUTLINE *outline, // parent outline
+ int32_t max_count // max output
+ ) {
+ bool parent_box; // could it be boxy
+ int16_t xmin, xmax; // coord limits
+ int16_t ymin, ymax;
+ int16_t xindex, yindex; // current bucket
+ C_OUTLINE *child; // current child
+ int32_t child_count; // no of children
+ int32_t grandchild_count; // no of grandchildren
+ int32_t parent_area; // potential box
+ float max_parent_area; // potential box
+ int32_t child_area; // current child
+ int32_t child_length; // current child
+ TBOX olbox;
+ C_OUTLINE_IT child_it; // search iterator
+
+ olbox = outline->bounding_box();
+ xmin =(olbox.left() - bl.x()) / BUCKETSIZE;
+ xmax =(olbox.right() - bl.x()) / BUCKETSIZE;
+ ymin =(olbox.bottom() - bl.y()) / BUCKETSIZE;
+ ymax =(olbox.top() - bl.y()) / BUCKETSIZE;
+ child_count = 0;
+ grandchild_count = 0;
+ parent_area = 0;
+ max_parent_area = 0;
+ parent_box = true;
+ for (yindex = ymin; yindex <= ymax; yindex++) {
+ for (xindex = xmin; xindex <= xmax; xindex++) {
+ child_it.set_to_list(&buckets[yindex * bxdim + xindex]);
+ if (child_it.empty())
+ continue;
+ for (child_it.mark_cycle_pt(); !child_it.cycled_list();
+ child_it.forward()) {
+ child = child_it.data();
+ if (child != outline && *child < *outline) {
+ child_count++;
+ if (child_count <= max_count) {
+ int max_grand =(max_count - child_count) /
+ edges_children_per_grandchild;
+ if (max_grand > 0)
+ grandchild_count += count_children(child, max_grand) *
+ edges_children_per_grandchild;
+ else
+ grandchild_count += count_children(child, 1);
+ }
+ if (child_count + grandchild_count > max_count) {
+ if (edges_debug)
+ tprintf("Discarding parent with child count=%d, gc=%d\n",
+ child_count,grandchild_count);
+ return child_count + grandchild_count;
+ }
+ if (parent_area == 0) {
+ parent_area = outline->outer_area();
+ if (parent_area < 0)
+ parent_area = -parent_area;
+ max_parent_area = outline->bounding_box().area() * edges_boxarea;
+ if (parent_area < max_parent_area)
+ parent_box = false;
+ }
+ if (parent_box &&
+ (!edges_children_fix ||
+ child->bounding_box().height() > edges_min_nonhole)) {
+ child_area = child->outer_area();
+ if (child_area < 0)
+ child_area = -child_area;
+ if (edges_children_fix) {
+ if (parent_area - child_area < max_parent_area) {
+ parent_box = false;
+ continue;
+ }
+ if (grandchild_count > 0) {
+ if (edges_debug)
+ tprintf("Discarding parent of area %d, child area=%d, max%g "
+ "with gc=%d\n",
+ parent_area, child_area, max_parent_area,
+ grandchild_count);
+ return max_count + 1;
+ }
+ child_length = child->pathlength();
+ if (child_length * child_length >
+ child_area * edges_patharea_ratio) {
+ if (edges_debug)
+ tprintf("Discarding parent of area %d, child area=%d, max%g "
+ "with child length=%d\n",
+ parent_area, child_area, max_parent_area,
+ child_length);
+ return max_count + 1;
+ }
+ }
+ if (child_area < child->bounding_box().area() * edges_childarea) {
+ if (edges_debug)
+ tprintf("Discarding parent of area %d, child area=%d, max%g "
+ "with child rect=%d\n",
+ parent_area, child_area, max_parent_area,
+ child->bounding_box().area());
+ return max_count + 1;
+ }
+ }
+ }
+ }
+ }
+ }
+ return child_count + grandchild_count;
+}
+
+
+
+
+/**
+ * @name OL_BUCKETS::extract_children
+ *
+ * Find number of descendants of this outline.
+ */
+
+void OL_BUCKETS::extract_children( // recursive count
+ C_OUTLINE *outline, // parent outline
+ C_OUTLINE_IT *it // destination iterator
+ ) {
+ int16_t xmin, xmax; // coord limits
+ int16_t ymin, ymax;
+ int16_t xindex, yindex; // current bucket
+ TBOX olbox;
+ C_OUTLINE_IT child_it; // search iterator
+
+ olbox = outline->bounding_box();
+ xmin =(olbox.left() - bl.x()) / BUCKETSIZE;
+ xmax =(olbox.right() - bl.x()) / BUCKETSIZE;
+ ymin =(olbox.bottom() - bl.y()) / BUCKETSIZE;
+ ymax =(olbox.top() - bl.y()) / BUCKETSIZE;
+ for (yindex = ymin; yindex <= ymax; yindex++) {
+ for (xindex = xmin; xindex <= xmax; xindex++) {
+ child_it.set_to_list(&buckets[yindex * bxdim + xindex]);
+ for (child_it.mark_cycle_pt(); !child_it.cycled_list();
+ child_it.forward()) {
+ if (*child_it.data() < *outline) {
+ it->add_after_then_move(child_it.extract());
+ }
+ }
+ }
+ }
+}
+
+
+/**
+ * @name extract_edges
+ *
+ * Run the edge detector over the block and return a list of blobs.
+ */
+
+void extract_edges(Pix* pix, // thresholded image
+ BLOCK *block) { // block to scan
+ C_OUTLINE_LIST outlines; // outlines in block
+ C_OUTLINE_IT out_it = &outlines;
+
+ block_edges(pix, &(block->pdblk), &out_it);
+ ICOORD bleft; // block box
+ ICOORD tright;
+ block->pdblk.bounding_box(bleft, tright);
+ // make blobs
+ outlines_to_blobs(block, bleft, tright, &outlines);
+}
+
+
+/**
+ * @name outlines_to_blobs
+ *
+ * Gather together outlines into blobs using the usual bucket sort.
+ */
+
+void outlines_to_blobs( // find blobs
+ BLOCK *block, // block to scan
+ ICOORD bleft,
+ ICOORD tright,
+ C_OUTLINE_LIST *outlines) {
+ // make buckets
+ OL_BUCKETS buckets(bleft, tright);
+
+ fill_buckets(outlines, &buckets);
+ empty_buckets(block, &buckets);
+}
+
+
+/**
+ * @name fill_buckets
+ *
+ * Run the edge detector over the block and return a list of blobs.
+ */
+
+void fill_buckets( // find blobs
+ C_OUTLINE_LIST *outlines, // outlines in block
+ OL_BUCKETS *buckets // output buckets
+ ) {
+ TBOX ol_box; // outline box
+ C_OUTLINE_IT out_it = outlines; // iterator
+ C_OUTLINE_IT bucket_it; // iterator in bucket
+ C_OUTLINE *outline; // current outline
+
+ for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
+ outline = out_it.extract(); // take off list
+ // get box
+ ol_box = outline->bounding_box();
+ bucket_it.set_to_list((*buckets) (ol_box.left(), ol_box.bottom()));
+ bucket_it.add_to_end(outline);
+ }
+}
+
+
+/**
+ * @name empty_buckets
+ *
+ * Run the edge detector over the block and return a list of blobs.
+ */
+
+void empty_buckets( // find blobs
+ BLOCK *block, // block to scan
+ OL_BUCKETS *buckets // output buckets
+ ) {
+ bool good_blob; // healthy blob
+ C_OUTLINE_LIST outlines; // outlines in block
+ // iterator
+ C_OUTLINE_IT out_it = &outlines;
+ C_OUTLINE_IT bucket_it = buckets->start_scan();
+ C_OUTLINE_IT parent_it; // parent outline
+ C_BLOB_IT good_blobs = block->blob_list();
+ C_BLOB_IT junk_blobs = block->reject_blobs();
+
+ while (!bucket_it.empty()) {
+ out_it.set_to_list(&outlines);
+ do {
+ parent_it = bucket_it; // find outermost
+ do {
+ bucket_it.forward();
+ } while (!bucket_it.at_first() &&
+ !(*parent_it.data() < *bucket_it.data()));
+ } while (!bucket_it.at_first());
+
+ // move to new list
+ out_it.add_after_then_move(parent_it.extract());
+ good_blob = capture_children(buckets, &junk_blobs, &out_it);
+ C_BLOB::ConstructBlobsFromOutlines(good_blob, &outlines, &good_blobs,
+ &junk_blobs);
+
+ bucket_it.set_to_list(buckets->scan_next());
+ }
+}
+
+
+/**
+ * @name capture_children
+ *
+ * Find all neighbouring outlines that are children of this outline
+ * and either move them to the output list or declare this outline
+ * illegal and return false.
+ */
+
+bool capture_children( // find children
+ OL_BUCKETS* buckets, // bucket sort clanss
+ C_BLOB_IT* reject_it, // dead grandchildren
+ C_OUTLINE_IT* blob_it // output outlines
+) {
+ C_OUTLINE *outline; // master outline
+ int32_t child_count; // no of children
+
+ outline = blob_it->data();
+ if (edges_use_new_outline_complexity)
+ child_count = buckets->outline_complexity(outline,
+ edges_children_count_limit,
+ 0);
+ else
+ child_count = buckets->count_children(outline,
+ edges_children_count_limit);
+ if (child_count > edges_children_count_limit)
+ return false;
+
+ if (child_count > 0)
+ buckets->extract_children(outline, blob_it);
+ return true;
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/textord/edgblob.h b/tesseract/src/textord/edgblob.h
new file mode 100644
index 00000000..a3b7ac1b
--- /dev/null
+++ b/tesseract/src/textord/edgblob.h
@@ -0,0 +1,100 @@
+/**********************************************************************
+ * File: edgblob.h (Formerly edgeloop.h)
+ * Description: Functions to clean up an outline before approximation.
+ * Author: Ray Smith
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef EDGBLOB_H
+#define EDGBLOB_H
+
+#include "scrollview.h"
+#include "params.h"
+#include "ocrblock.h"
+#include "coutln.h"
+#include "crakedge.h"
+
+#include <memory>
+
+namespace tesseract {
+
+#define BUCKETSIZE 16
+
+class OL_BUCKETS
+{
+ public:
+ OL_BUCKETS( //constructor
+ ICOORD bleft, //corners
+ ICOORD tright);
+
+ ~OL_BUCKETS () = default;
+
+ C_OUTLINE_LIST *operator () (//array access
+ int16_t x, //image coords
+ int16_t y);
+ //first non-empty bucket
+ C_OUTLINE_LIST *start_scan() {
+ for (index = 0; buckets[index].empty () && index < bxdim * bydim - 1;
+ index++);
+ return &buckets[index];
+ }
+ //next non-empty bucket
+ C_OUTLINE_LIST *scan_next() {
+ for (; buckets[index].empty () && index < bxdim * bydim - 1; index++);
+ return &buckets[index];
+ }
+ int32_t count_children( //recursive sum
+ C_OUTLINE *outline, //parent outline
+ int32_t max_count); // max output
+ int32_t outline_complexity( // new version of count_children
+ C_OUTLINE *outline, // parent outline
+ int32_t max_count, // max output
+ int16_t depth); // level of recursion
+ void extract_children( //single level get
+ C_OUTLINE *outline, //parent outline
+ C_OUTLINE_IT *it); //destination iterator
+
+ private:
+ std::unique_ptr<C_OUTLINE_LIST[]> buckets; //array of buckets
+ int16_t bxdim; //size of array
+ int16_t bydim;
+ ICOORD bl; //corners
+ ICOORD tr;
+ int32_t index; //for extraction scan
+};
+
+void extract_edges(Pix* pix, // thresholded image
+ BLOCK* block); // block to scan
+void outlines_to_blobs( //find blobs
+ BLOCK *block, //block to scan
+ ICOORD bleft, //block box //outlines in block
+ ICOORD tright,
+ C_OUTLINE_LIST *outlines);
+void fill_buckets( //find blobs
+ C_OUTLINE_LIST *outlines, //outlines in block
+ OL_BUCKETS *buckets //output buckets
+ );
+void empty_buckets( //find blobs
+ BLOCK *block, //block to scan
+ OL_BUCKETS *buckets //output buckets
+ );
+bool capture_children( //find children
+ OL_BUCKETS* buckets, //bucket sort clanss
+ C_BLOB_IT* reject_it, //dead grandchildren
+ C_OUTLINE_IT* blob_it //output outlines
+);
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/textord/edgloop.cpp b/tesseract/src/textord/edgloop.cpp
new file mode 100644
index 00000000..33cf3a02
--- /dev/null
+++ b/tesseract/src/textord/edgloop.cpp
@@ -0,0 +1,162 @@
+/**********************************************************************
+ * File: edgloop.cpp (Formerly edgeloop.c)
+ * Description: Functions to clean up an outline before approximation.
+ * Author: Ray Smith
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+ // Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "scanedg.h"
+
+#include "edgloop.h"
+
+namespace tesseract {
+
+#define MINEDGELENGTH 8 // min decent length
+
+/**********************************************************************
+ * complete_edge
+ *
+ * Complete the edge by cleaning it up.
+ **********************************************************************/
+
+void complete_edge(CRACKEDGE *start, //start of loop
+ C_OUTLINE_IT* outline_it) {
+ ScrollView::Color colour; //colour to draw in
+ int16_t looplength; //steps in loop
+ ICOORD botleft; //bounding box
+ ICOORD topright;
+ C_OUTLINE *outline; //new outline
+
+ //check length etc.
+ colour = check_path_legal (start);
+
+ if (colour == ScrollView::RED || colour == ScrollView::BLUE) {
+ looplength = loop_bounding_box (start, botleft, topright);
+ outline = new C_OUTLINE (start, botleft, topright, looplength);
+ //add to list
+ outline_it->add_after_then_move (outline);
+ }
+}
+
+
+/**********************************************************************
+ * check_path_legal
+ *
+ * Check that the outline is legal for length and for chaincode sum.
+ * The return value is RED for a normal black-inside outline,
+ * BLUE for a white-inside outline, MAGENTA if it is too short,
+ * YELLOW if it is too long, and GREEN if it is illegal.
+ * These colours are used to draw the raw outline.
+ **********************************************************************/
+
+ScrollView::Color check_path_legal( //certify outline
+ CRACKEDGE *start //start of loop
+ ) {
+ int lastchain; //last chain code
+ int chaindiff; //chain code diff
+ int32_t length; //length of loop
+ int32_t chainsum; //sum of chain diffs
+ CRACKEDGE *edgept; //current point
+ constexpr ERRCODE ED_ILLEGAL_SUM("Illegal sum of chain codes");
+
+ length = 0;
+ chainsum = 0; //sum of chain codes
+ edgept = start;
+ lastchain = edgept->prev->stepdir; //previous chain code
+ do {
+ length++;
+ if (edgept->stepdir != lastchain) {
+ //chain code difference
+ chaindiff = edgept->stepdir - lastchain;
+ if (chaindiff > 2)
+ chaindiff -= 4;
+ else if (chaindiff < -2)
+ chaindiff += 4;
+ chainsum += chaindiff; //sum differences
+ lastchain = edgept->stepdir;
+ }
+ edgept = edgept->next;
+ }
+ while (edgept != start && length < C_OUTLINE::kMaxOutlineLength);
+
+ if ((chainsum != 4 && chainsum != -4)
+ || edgept != start || length < MINEDGELENGTH) {
+ if (edgept != start) {
+ return ScrollView::YELLOW;
+ } else if (length < MINEDGELENGTH) {
+ return ScrollView::MAGENTA;
+ } else {
+ ED_ILLEGAL_SUM.error ("check_path_legal", TESSLOG, "chainsum=%d",
+ chainsum);
+ return ScrollView::GREEN;
+ }
+ }
+ //colour on inside
+ return chainsum < 0 ? ScrollView::BLUE : ScrollView::RED;
+}
+
+/**********************************************************************
+ * loop_bounding_box
+ *
+ * Find the bounding box of the edge loop.
+ **********************************************************************/
+
+int16_t loop_bounding_box( //get bounding box
+ CRACKEDGE *&start, //edge loop
+ ICOORD &botleft, //bounding box
+ ICOORD &topright) {
+ int16_t length; //length of loop
+ int16_t leftmost; //on top row
+ CRACKEDGE *edgept; //current point
+ CRACKEDGE *realstart; //topleft start
+
+ edgept = start;
+ realstart = start;
+ botleft = topright = ICOORD (edgept->pos.x (), edgept->pos.y ());
+ leftmost = edgept->pos.x ();
+ length = 0; //coutn length
+ do {
+ edgept = edgept->next;
+ if (edgept->pos.x () < botleft.x ())
+ //get bounding box
+ botleft.set_x (edgept->pos.x ());
+ else if (edgept->pos.x () > topright.x ())
+ topright.set_x (edgept->pos.x ());
+ if (edgept->pos.y () < botleft.y ())
+ //get bounding box
+ botleft.set_y (edgept->pos.y ());
+ else if (edgept->pos.y () > topright.y ()) {
+ realstart = edgept;
+ leftmost = edgept->pos.x ();
+ topright.set_y (edgept->pos.y ());
+ }
+ else if (edgept->pos.y () == topright.y ()
+ && edgept->pos.x () < leftmost) {
+ //leftmost on line
+ leftmost = edgept->pos.x ();
+ realstart = edgept;
+ }
+ length++; //count elements
+ }
+ while (edgept != start);
+ start = realstart; //shift it to topleft
+ return length;
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/textord/edgloop.h b/tesseract/src/textord/edgloop.h
new file mode 100644
index 00000000..26cd2f21
--- /dev/null
+++ b/tesseract/src/textord/edgloop.h
@@ -0,0 +1,44 @@
+/**********************************************************************
+ * File: edgloop.h (Formerly edgeloop.h)
+ * Description: Functions to clean up an outline before approximation.
+ * Author: Ray Smith
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef EDGLOOP_H
+#define EDGLOOP_H
+
+#include "scrollview.h"
+#include "params.h"
+#include "pdblock.h"
+#include "coutln.h"
+#include "crakedge.h"
+
+namespace tesseract {
+
+#define BUCKETSIZE 16
+
+void complete_edge(CRACKEDGE *start, //start of loop
+ C_OUTLINE_IT* outline_it);
+ScrollView::Color check_path_legal( //certify outline
+ CRACKEDGE *start //start of loop
+ );
+int16_t loop_bounding_box( //get bounding box
+ CRACKEDGE *&start, //edge loop
+ ICOORD &botleft, //bounding box
+ ICOORD &topright);
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/textord/equationdetectbase.cpp b/tesseract/src/textord/equationdetectbase.cpp
new file mode 100644
index 00000000..1d40ed8e
--- /dev/null
+++ b/tesseract/src/textord/equationdetectbase.cpp
@@ -0,0 +1,64 @@
+///////////////////////////////////////////////////////////////////////
+// File: equationdetectbase.cpp
+// Description: The base class equation detection class.
+// Author: Zongyi (Joe) Liu (joeliu@google.com)
+// Created: Fri Aug 31 11:13:01 PST 2011
+//
+// (C) Copyright 2011, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "allheaders.h"
+#include "blobbox.h"
+#include "equationdetectbase.h"
+
+namespace tesseract {
+
+// Destructor.
+// It is defined here, so the compiler can create a single vtable
+// instead of weak vtables in every compilation unit.
+EquationDetectBase::~EquationDetectBase() = default;
+
+void EquationDetectBase::RenderSpecialText(Pix* pix,
+ BLOBNBOX* blob) {
+ ASSERT_HOST(pix != nullptr && pixGetDepth(pix) == 32 && blob != nullptr);
+ const TBOX& tbox = blob->bounding_box();
+ int height = pixGetHeight(pix);
+ const int box_width = 5;
+
+ // Coordinate translation: tesseract use left bottom as the original, while
+ // leptonica uses left top as the original.
+ Box *box = boxCreate(tbox.left(), height - tbox.top(),
+ tbox.width(), tbox.height());
+ switch (blob->special_text_type()) {
+ case BSTT_MATH: // Red box.
+ pixRenderBoxArb(pix, box, box_width, 255, 0, 0);
+ break;
+ case BSTT_DIGIT: // cyan box.
+ pixRenderBoxArb(pix, box, box_width, 0, 255, 255);
+ break;
+ case BSTT_ITALIC: // Green box.
+ pixRenderBoxArb(pix, box, box_width, 0, 255, 0);
+ break;
+ case BSTT_UNCLEAR: // blue box.
+ pixRenderBoxArb(pix, box, box_width, 0, 255, 0);
+ break;
+ case BSTT_NONE:
+ default:
+ // yellow box.
+ pixRenderBoxArb(pix, box, box_width, 255, 255, 0);
+ break;
+ }
+ boxDestroy(&box);
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/textord/equationdetectbase.h b/tesseract/src/textord/equationdetectbase.h
new file mode 100644
index 00000000..7f84bd09
--- /dev/null
+++ b/tesseract/src/textord/equationdetectbase.h
@@ -0,0 +1,59 @@
+///////////////////////////////////////////////////////////////////////
+// File: equationdetectbase.h
+// Description: The base class equation detection class.
+// Author: Zongyi (Joe) Liu (joeliu@google.com)
+// Created: Fri Aug 31 11:13:01 PST 2011
+//
+// (C) Copyright 2011, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TEXTORD_EQUATIONDETECTBASE_H_
+#define TESSERACT_TEXTORD_EQUATIONDETECTBASE_H_
+
+class BLOBNBOX_LIST;
+class TO_BLOCK;
+struct Pix;
+
+namespace tesseract {
+
+class ColPartitionGrid;
+class ColPartitionSet;
+
+class TESS_API EquationDetectBase {
+ public:
+ EquationDetectBase() = default;
+ virtual ~EquationDetectBase();
+
+ // Iterate over the blobs inside to_block, and set the blobs that we want to
+ // process to BSTT_NONE. (By default, they should be BSTT_SKIP). The function
+ // returns 0 upon success.
+ virtual int LabelSpecialText(TO_BLOCK* to_block) = 0;
+
+ // Interface to find possible equation partition grid from part_grid. This
+ // should be called after IdentifySpecialText function.
+ virtual int FindEquationParts(ColPartitionGrid* part_grid,
+ ColPartitionSet** best_columns) = 0;
+
+ // Debug function: Render a bounding box on pix based on the value of its
+ // special_text_type, specifically:
+ // BSTT_MATH: red box
+ // BSTT_DIGIT: cyan box
+ // BSTT_ITALIC: green box
+ // BSTT_UNCLEAR: blue box
+ // All others: yellow box
+ static void RenderSpecialText(Pix* pix, BLOBNBOX* blob);
+};
+
+} // namespace tesseract
+
+#endif // TESSERACT_TEXTORD_EQUATIONDETECTBASE_H_
diff --git a/tesseract/src/textord/fpchop.cpp b/tesseract/src/textord/fpchop.cpp
new file mode 100644
index 00000000..91444a4d
--- /dev/null
+++ b/tesseract/src/textord/fpchop.cpp
@@ -0,0 +1,890 @@
+/**********************************************************************
+ * File: fpchop.cpp (Formerly fp_chop.c)
+ * Description: Code to chop fixed pitch text into character cells.
+ * Author: Ray Smith
+ *
+ * (C) Copyright 1993, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+ // Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "fpchop.h"
+
+#include "blobbox.h"
+#include "statistc.h"
+#include "drawtord.h"
+#include "tovars.h"
+#include "topitch.h"
+
+namespace tesseract {
+
+INT_VAR (textord_fp_chop_error, 2,
+"Max allowed bending of chop cells");
+double_VAR (textord_fp_chop_snap, 0.5,
+"Max distance of chop pt from vertex");
+
+ELISTIZE(C_OUTLINE_FRAG)
+
+static WERD* add_repeated_word(
+ WERD_IT* rep_it,
+ int16_t& rep_left,
+ int16_t& prev_chop_coord,
+ uint8_t& blanks,
+ float pitch,
+ WERD_IT* word_it
+);
+
+static void fixed_chop_cblob(
+ C_BLOB* blob,
+ int16_t chop_coord,
+ float pitch_error,
+ C_OUTLINE_LIST* left_outlines,
+ C_OUTLINE_LIST* right_outlines
+);
+
+static void fixed_split_coutline(
+ C_OUTLINE* srcline,
+ int16_t chop_coord,
+ float pitch_error,
+ C_OUTLINE_IT* left_it,
+ C_OUTLINE_IT* right_it
+);
+
+static bool fixed_chop_coutline(
+ C_OUTLINE* srcline,
+ int16_t chop_coord,
+ float pitch_error,
+ C_OUTLINE_FRAG_LIST* left_frags,
+ C_OUTLINE_FRAG_LIST* right_frags
+);
+
+static void save_chop_cfragment(
+ int16_t head_index,
+ ICOORD head_pos,
+ int16_t tail_index,
+ ICOORD tail_pos,
+ C_OUTLINE* srcline,
+ C_OUTLINE_FRAG_LIST* frags
+);
+
+static void add_frag_to_list(
+ C_OUTLINE_FRAG* frag,
+ C_OUTLINE_FRAG_LIST* frags
+);
+
+static void close_chopped_cfragments(
+ C_OUTLINE_FRAG_LIST* frags,
+ C_OUTLINE_LIST* children,
+ float pitch_error,
+ C_OUTLINE_IT* dest_it
+);
+
+static C_OUTLINE* join_chopped_fragments(
+ C_OUTLINE_FRAG* bottom,
+ C_OUTLINE_FRAG* top
+);
+
+static void join_segments(
+ C_OUTLINE_FRAG* bottom,
+ C_OUTLINE_FRAG* top
+);
+
+/**********************************************************************
+ * fixed_pitch_words
+ *
+ * Make a ROW from a fixed pitch TO_ROW.
+ **********************************************************************/
+ROW *fixed_pitch_words( //find lines
+ TO_ROW *row, //row to do
+ FCOORD rotation //for drawing
+ ) {
+ bool bol; //start of line
+ uint8_t blanks; //in front of word
+ uint8_t new_blanks; //blanks in empty cell
+ int16_t chop_coord; //chop boundary
+ int16_t prev_chop_coord; //start of cell
+ int16_t rep_left; //left edge of rep word
+ ROW *real_row; //output row
+ C_OUTLINE_LIST left_coutlines;
+ C_OUTLINE_LIST right_coutlines;
+ C_BLOB_LIST cblobs;
+ C_BLOB_IT cblob_it = &cblobs;
+ WERD_LIST words;
+ WERD_IT word_it = &words; //new words
+ //repeated blobs
+ WERD_IT rep_it = &row->rep_words;
+ WERD *word; //new word
+ int32_t xstarts[2]; //row ends
+ int32_t prev_x; //end of prev blob
+ //iterator
+ BLOBNBOX_IT box_it = row->blob_list ();
+ //boundaries
+ ICOORDELT_IT cell_it = &row->char_cells;
+
+#ifndef GRAPHICS_DISABLED
+ if (textord_show_page_cuts && to_win != nullptr) {
+ plot_row_cells (to_win, ScrollView::RED, row, 0, &row->char_cells);
+ }
+#endif
+
+ prev_x = -INT16_MAX;
+ bol = true;
+ blanks = 0;
+ if (rep_it.empty ())
+ rep_left = INT16_MAX;
+ else
+ rep_left = rep_it.data ()->bounding_box ().left ();
+ if (box_it.empty ())
+ return nullptr; //empty row
+ xstarts[0] = box_it.data ()->bounding_box ().left ();
+ if (rep_left < xstarts[0]) {
+ xstarts[0] = rep_left;
+ }
+ if (cell_it.empty () || row->char_cells.singleton ()) {
+ tprintf ("Row without enough char cells!\n");
+ tprintf ("Leftmost blob is at (%d,%d)\n",
+ box_it.data ()->bounding_box ().left (),
+ box_it.data ()->bounding_box ().bottom ());
+ return nullptr;
+ }
+ ASSERT_HOST (!cell_it.empty () && !row->char_cells.singleton ());
+ prev_chop_coord = cell_it.data ()->x ();
+ word = nullptr;
+ while (rep_left < cell_it.data ()->x ()) {
+ word = add_repeated_word (&rep_it, rep_left, prev_chop_coord,
+ blanks, row->fixed_pitch, &word_it);
+ }
+ cell_it.mark_cycle_pt ();
+ if (prev_chop_coord >= cell_it.data ()->x ())
+ cell_it.forward ();
+ for (; !cell_it.cycled_list (); cell_it.forward ()) {
+ chop_coord = cell_it.data ()->x ();
+ while (!box_it.empty ()
+ && box_it.data ()->bounding_box ().left () <= chop_coord) {
+ if (box_it.data ()->bounding_box ().right () > prev_x)
+ prev_x = box_it.data ()->bounding_box ().right ();
+ split_to_blob (box_it.extract (), chop_coord,
+ textord_fp_chop_error + 0.5f,
+ &left_coutlines,
+ &right_coutlines);
+ box_it.forward ();
+ while (!box_it.empty() && box_it.data()->cblob() == nullptr) {
+ delete box_it.extract();
+ box_it.forward();
+ }
+ }
+ if (!right_coutlines.empty() && left_coutlines.empty())
+ split_to_blob (nullptr, chop_coord,
+ textord_fp_chop_error + 0.5f,
+ &left_coutlines,
+ &right_coutlines);
+ if (!left_coutlines.empty()) {
+ cblob_it.add_after_then_move(new C_BLOB(&left_coutlines));
+ } else {
+ if (rep_left < chop_coord) {
+ if (rep_left > prev_chop_coord)
+ new_blanks = static_cast<uint8_t>(floor ((rep_left - prev_chop_coord)
+ / row->fixed_pitch + 0.5));
+ else
+ new_blanks = 0;
+ }
+ else {
+ if (chop_coord > prev_chop_coord)
+ new_blanks = static_cast<uint8_t>(floor ((chop_coord - prev_chop_coord)
+ / row->fixed_pitch + 0.5));
+ else
+ new_blanks = 0;
+ }
+ if (!cblob_it.empty()) {
+ if (blanks < 1 && word != nullptr && !word->flag (W_REP_CHAR))
+ blanks = 1;
+ word = new WERD (&cblobs, blanks, nullptr);
+ cblob_it.set_to_list (&cblobs);
+ word->set_flag (W_DONT_CHOP, true);
+ word_it.add_after_then_move (word);
+ if (bol) {
+ word->set_flag (W_BOL, true);
+ bol = false;
+ }
+ blanks = new_blanks;
+ }
+ else
+ blanks += new_blanks;
+ while (rep_left < chop_coord) {
+ word = add_repeated_word (&rep_it, rep_left, prev_chop_coord,
+ blanks, row->fixed_pitch, &word_it);
+ }
+ }
+ if (prev_chop_coord < chop_coord)
+ prev_chop_coord = chop_coord;
+ }
+ if (!cblob_it.empty()) {
+ word = new WERD(&cblobs, blanks, nullptr);
+ word->set_flag (W_DONT_CHOP, true);
+ word_it.add_after_then_move (word);
+ if (bol)
+ word->set_flag (W_BOL, true);
+ }
+ ASSERT_HOST (word != nullptr);
+ while (!rep_it.empty ()) {
+ add_repeated_word (&rep_it, rep_left, prev_chop_coord,
+ blanks, row->fixed_pitch, &word_it);
+ }
+ //at end of line
+ word_it.data ()->set_flag (W_EOL, true);
+ if (prev_chop_coord > prev_x)
+ prev_x = prev_chop_coord;
+ xstarts[1] = prev_x + 1;
+ real_row = new ROW (row, static_cast<int16_t>(row->kern_size), static_cast<int16_t>(row->space_size));
+ word_it.set_to_list (real_row->word_list ());
+ //put words in row
+ word_it.add_list_after (&words);
+ real_row->recalc_bounding_box ();
+ return real_row;
+}
+
+
+/**********************************************************************
+ * add_repeated_word
+ *
+ * Add repeated word into the row at the given point.
+ **********************************************************************/
+
+static
+WERD *add_repeated_word( //move repeated word
+ WERD_IT *rep_it, //repeated words
+ int16_t &rep_left, //left edge of word
+ int16_t &prev_chop_coord, //previous word end
+ uint8_t &blanks, //no of blanks
+ float pitch, //char cell size
+ WERD_IT *word_it //list of words
+ ) {
+ WERD *word; //word to move
+ int16_t new_blanks; //extra blanks
+
+ if (rep_left > prev_chop_coord) {
+ new_blanks = static_cast<uint8_t>(floor ((rep_left - prev_chop_coord) / pitch + 0.5));
+ blanks += new_blanks;
+ }
+ word = rep_it->extract ();
+ prev_chop_coord = word->bounding_box ().right ();
+ word_it->add_after_then_move (word);
+ word->set_blanks (blanks);
+ rep_it->forward ();
+ if (rep_it->empty ())
+ rep_left = INT16_MAX;
+ else
+ rep_left = rep_it->data ()->bounding_box ().left ();
+ blanks = 0;
+ return word;
+}
+
+
+/**********************************************************************
+ * split_to_blob
+ *
+ * Split a BLOBNBOX across a vertical chop line and put the pieces
+ * into a left outline list and a right outline list.
+ **********************************************************************/
+
+void split_to_blob( //split the blob
+ BLOBNBOX *blob, //blob to split
+ int16_t chop_coord, //place to chop
+ float pitch_error, //allowed deviation
+ C_OUTLINE_LIST *left_coutlines, //for cblobs
+ C_OUTLINE_LIST *right_coutlines) {
+ C_BLOB *real_cblob; //cblob to chop
+
+ if (blob != nullptr) {
+ real_cblob = blob->cblob();
+ } else {
+ real_cblob = nullptr;
+ }
+ if (!right_coutlines->empty() || real_cblob != nullptr)
+ fixed_chop_cblob(real_cblob,
+ chop_coord,
+ pitch_error,
+ left_coutlines,
+ right_coutlines);
+
+ delete blob;
+}
+
+/**********************************************************************
+ * fixed_chop_cblob
+ *
+ * Chop the given cblob (if any) and the existing right outlines to
+ * produce a list of outlines left of the chop point and more to the right.
+ **********************************************************************/
+
+static
+void fixed_chop_cblob( //split the blob
+ C_BLOB *blob, //blob to split
+ int16_t chop_coord, //place to chop
+ float pitch_error, //allowed deviation
+ C_OUTLINE_LIST *left_outlines, //left half of chop
+ C_OUTLINE_LIST *right_outlines //right half of chop
+ ) {
+ C_OUTLINE *old_right; //already there
+ C_OUTLINE_LIST new_outlines; //new right ones
+ //output iterator
+ C_OUTLINE_IT left_it = left_outlines;
+ //in/out iterator
+ C_OUTLINE_IT right_it = right_outlines;
+ C_OUTLINE_IT new_it = &new_outlines;
+ C_OUTLINE_IT blob_it; //outlines in blob
+
+ if (!right_it.empty ()) {
+ while (!right_it.empty ()) {
+ old_right = right_it.extract ();
+ right_it.forward ();
+ fixed_split_coutline(old_right,
+ chop_coord,
+ pitch_error,
+ &left_it,
+ &new_it);
+ }
+ right_it.add_list_before (&new_outlines);
+ }
+ if (blob != nullptr) {
+ blob_it.set_to_list (blob->out_list ());
+ for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
+ blob_it.forward ())
+ fixed_split_coutline (blob_it.extract (), chop_coord, pitch_error,
+ &left_it, &right_it);
+ delete blob;
+ }
+}
+
+
+/**********************************************************************
+ * fixed_split_outline
+ *
+ * Chop the given outline (if necessary) placing the fragments which
+ * fall either side of the chop line into the appropriate list.
+ **********************************************************************/
+
+static
+void fixed_split_coutline( //chop the outline
+ C_OUTLINE *srcline, //source outline
+ int16_t chop_coord, //place to chop
+ float pitch_error, //allowed deviation
+ C_OUTLINE_IT *left_it, //left half of chop
+ C_OUTLINE_IT *right_it //right half of chop
+ ) {
+ C_OUTLINE *child; //child outline
+ TBOX srcbox; //box of outline
+ C_OUTLINE_LIST left_ch; //left children
+ C_OUTLINE_LIST right_ch; //right children
+ C_OUTLINE_FRAG_LIST left_frags;//chopped fragments
+ C_OUTLINE_FRAG_LIST right_frags;;
+ C_OUTLINE_IT left_ch_it = &left_ch;
+ //for whole children
+ C_OUTLINE_IT right_ch_it = &right_ch;
+ //for holes
+ C_OUTLINE_IT child_it = srcline->child ();
+
+ srcbox = srcline->bounding_box();
+ if (srcbox.left() + srcbox.right() <= chop_coord * 2
+ && srcbox.right() < chop_coord + pitch_error) {
+ // Whole outline is in the left side or not far over the chop_coord,
+ // so put the whole thing on the left.
+ left_it->add_after_then_move(srcline);
+ } else if (srcbox.left() + srcbox.right() > chop_coord * 2
+ && srcbox.left () > chop_coord - pitch_error) {
+ // Whole outline is in the right side or not far over the chop_coord,
+ // so put the whole thing on the right.
+ right_it->add_before_stay_put(srcline);
+ } else {
+ // Needs real chopping.
+ if (fixed_chop_coutline(srcline, chop_coord, pitch_error,
+ &left_frags, &right_frags)) {
+ for (child_it.mark_cycle_pt(); !child_it.cycled_list();
+ child_it.forward()) {
+ child = child_it.extract();
+ srcbox = child->bounding_box();
+ if (srcbox.right() < chop_coord) {
+ // Whole child is on the left.
+ left_ch_it.add_after_then_move(child);
+ } else if (srcbox.left() > chop_coord) {
+ // Whole child is on the right.
+ right_ch_it.add_after_then_move (child);
+ } else {
+ // No pitch_error is allowed when chopping children to prevent
+ // impossible outlines from being created.
+ if (fixed_chop_coutline(child, chop_coord, 0.0f,
+ &left_frags, &right_frags)) {
+ delete child;
+ } else {
+ if (srcbox.left() + srcbox.right() <= chop_coord * 2)
+ left_ch_it.add_after_then_move(child);
+ else
+ right_ch_it.add_after_then_move(child);
+ }
+ }
+ }
+ close_chopped_cfragments(&left_frags, &left_ch, pitch_error, left_it);
+ close_chopped_cfragments(&right_frags, &right_ch, pitch_error, right_it);
+ ASSERT_HOST(left_ch.empty() && right_ch.empty());
+ // No children left.
+ delete srcline; // Smashed up.
+ } else {
+ // Chop failed. Just use middle coord.
+ if (srcbox.left() + srcbox.right() <= chop_coord * 2)
+ left_it->add_after_then_move(srcline); // Stick whole in left.
+ else
+ right_it->add_before_stay_put(srcline);
+ }
+ }
+}
+
+
+/**********************************************************************
+ * fixed_chop_coutline
+ *
+ * Chop the given coutline (if necessary) placing the fragments which
+ * fall either side of the chop line into the appropriate list.
+ * If the coutline lies too heavily to one side to chop, false is returned.
+ **********************************************************************/
+
+static
+bool fixed_chop_coutline( //chop the outline
+ C_OUTLINE* srcline, //source outline
+ int16_t chop_coord, //place to chop
+ float pitch_error, //allowed deviation
+ C_OUTLINE_FRAG_LIST* left_frags, //left half of chop
+ C_OUTLINE_FRAG_LIST* right_frags //right half of chop
+) {
+ bool first_frag; //fragment
+ int16_t left_edge; //of outline
+ int16_t startindex; //in first fragment
+ int32_t length; //of outline
+ int16_t stepindex; //into outline
+ int16_t head_index; //start of fragment
+ ICOORD head_pos; //start of fragment
+ int16_t tail_index; //end of fragment
+ ICOORD tail_pos; //end of fragment
+ ICOORD pos; //current point
+ int16_t first_index = 0; //first tail
+ ICOORD first_pos; //first tail
+
+ length = srcline->pathlength ();
+ pos = srcline->start_pos ();
+ left_edge = pos.x ();
+ tail_index = 0;
+ tail_pos = pos;
+ for (stepindex = 0; stepindex < length; stepindex++) {
+ if (pos.x () < left_edge) {
+ left_edge = pos.x ();
+ tail_index = stepindex;
+ tail_pos = pos;
+ }
+ pos += srcline->step (stepindex);
+ }
+ if (left_edge >= chop_coord - pitch_error)
+ return false; //not worth it
+
+ startindex = tail_index;
+ first_frag = true;
+ head_index = tail_index;
+ head_pos = tail_pos;
+ do {
+ do {
+ tail_pos += srcline->step (tail_index);
+ tail_index++;
+ if (tail_index == length)
+ tail_index = 0;
+ }
+ while (tail_pos.x () != chop_coord && tail_index != startindex);
+ if (tail_index == startindex) {
+ if (first_frag)
+ return false; //doesn't cross line
+ else
+ break;
+ }
+ ASSERT_HOST (head_index != tail_index);
+ if (!first_frag) {
+ save_chop_cfragment(head_index,
+ head_pos,
+ tail_index,
+ tail_pos,
+ srcline,
+ left_frags);
+ }
+ else {
+ first_index = tail_index;
+ first_pos = tail_pos;
+ first_frag = false;
+ }
+ while (srcline->step (tail_index).x () == 0) {
+ tail_pos += srcline->step (tail_index);
+ tail_index++;
+ if (tail_index == length)
+ tail_index = 0;
+ }
+ head_index = tail_index;
+ head_pos = tail_pos;
+ while (srcline->step (tail_index).x () > 0) {
+ do {
+ tail_pos += srcline->step (tail_index);
+ tail_index++;
+ if (tail_index == length)
+ tail_index = 0;
+ }
+ while (tail_pos.x () != chop_coord);
+ ASSERT_HOST (head_index != tail_index);
+ save_chop_cfragment(head_index,
+ head_pos,
+ tail_index,
+ tail_pos,
+ srcline,
+ right_frags);
+ while (srcline->step (tail_index).x () == 0) {
+ tail_pos += srcline->step (tail_index);
+ tail_index++;
+ if (tail_index == length)
+ tail_index = 0;
+ }
+ head_index = tail_index;
+ head_pos = tail_pos;
+ }
+ }
+ while (tail_index != startindex);
+ save_chop_cfragment(head_index,
+ head_pos,
+ first_index,
+ first_pos,
+ srcline,
+ left_frags);
+ return true; //did some chopping
+}
+
+/**********************************************************************
+ * save_chop_cfragment
+ *
+ * Store the given fragment in the given fragment list.
+ **********************************************************************/
+
+static
+void save_chop_cfragment( //chop the outline
+ int16_t head_index, //head of fragment
+ ICOORD head_pos, //head of fragment
+ int16_t tail_index, //tail of fragment
+ ICOORD tail_pos, //tail of fragment
+ C_OUTLINE *srcline, //source of edgesteps
+ C_OUTLINE_FRAG_LIST *frags //fragment list
+ ) {
+ int16_t jump; //gap across end
+ int16_t stepcount; //total steps
+ C_OUTLINE_FRAG *head; //head of fragment
+ C_OUTLINE_FRAG *tail; //tail of fragment
+ int16_t tail_y; //ycoord of tail
+
+ ASSERT_HOST (tail_pos.x () == head_pos.x ());
+ ASSERT_HOST (tail_index != head_index);
+ stepcount = tail_index - head_index;
+ if (stepcount < 0)
+ stepcount += srcline->pathlength ();
+ jump = tail_pos.y () - head_pos.y ();
+ if (jump < 0)
+ jump = -jump;
+ if (jump == stepcount)
+ return; //its a nop
+ tail_y = tail_pos.y ();
+ head = new C_OUTLINE_FRAG (head_pos, tail_pos, srcline,
+ head_index, tail_index);
+ tail = new C_OUTLINE_FRAG (head, tail_y);
+ head->other_end = tail;
+ add_frag_to_list(head, frags);
+ add_frag_to_list(tail, frags);
+}
+
+
+/**********************************************************************
+ * C_OUTLINE_FRAG::C_OUTLINE_FRAG
+ *
+ * Constructors for C_OUTLINE_FRAG.
+ **********************************************************************/
+
+C_OUTLINE_FRAG::C_OUTLINE_FRAG( //record fragment
+ ICOORD start_pt, //start coord
+ ICOORD end_pt, //end coord
+ C_OUTLINE *outline, //source of steps
+ int16_t start_index,
+ int16_t end_index) {
+ start = start_pt;
+ end = end_pt;
+ ycoord = start_pt.y ();
+ stepcount = end_index - start_index;
+ if (stepcount < 0)
+ stepcount += outline->pathlength ();
+ ASSERT_HOST (stepcount > 0);
+ steps = new DIR128[stepcount];
+ if (end_index > start_index) {
+ for (int i = start_index; i < end_index; ++i)
+ steps[i - start_index] = outline->step_dir(i);
+ }
+ else {
+ int len = outline->pathlength();
+ int i = start_index;
+ for (; i < len; ++i)
+ steps[i - start_index] = outline->step_dir(i);
+ if (end_index > 0)
+ for (; i < end_index + len; ++i)
+ steps[i - start_index] = outline->step_dir(i - len);
+ }
+ other_end = nullptr;
+ delete close();
+}
+
+
+C_OUTLINE_FRAG::C_OUTLINE_FRAG( //record fragment
+ C_OUTLINE_FRAG *head, //other end
+ int16_t tail_y) {
+ ycoord = tail_y;
+ other_end = head;
+ start = head->start;
+ end = head->end;
+ steps = nullptr;
+ stepcount = 0;
+}
+
+
+/**********************************************************************
+ * add_frag_to_list
+ *
+ * Insert the fragment in the list at the appropriate place to keep
+ * them in ascending ycoord order.
+ **********************************************************************/
+
+static
+void add_frag_to_list( //ordered add
+ C_OUTLINE_FRAG *frag, //fragment to add
+ C_OUTLINE_FRAG_LIST *frags //fragment list
+ ) {
+ //output list
+ C_OUTLINE_FRAG_IT frag_it = frags;
+
+ if (!frags->empty ()) {
+ for (frag_it.mark_cycle_pt (); !frag_it.cycled_list ();
+ frag_it.forward ()) {
+ if (frag_it.data ()->ycoord > frag->ycoord
+ || (frag_it.data ()->ycoord == frag->ycoord
+ && frag->other_end->ycoord < frag->ycoord)) {
+ frag_it.add_before_then_move (frag);
+ return;
+ }
+ }
+ }
+ frag_it.add_to_end (frag);
+}
+
+
+/**********************************************************************
+ * close_chopped_cfragments
+ *
+ * Clear the given list of fragments joining them up into outlines.
+ * Each outline made soaks up any of the child outlines which it encloses.
+ **********************************************************************/
+
+static
+void close_chopped_cfragments( //chop the outline
+ C_OUTLINE_FRAG_LIST *frags, //list to clear
+ C_OUTLINE_LIST *children, //potential children
+ float pitch_error, //allowed shrinkage
+ C_OUTLINE_IT *dest_it //output list
+ ) {
+ //iterator
+ C_OUTLINE_FRAG_IT frag_it = frags;
+ C_OUTLINE_FRAG *bottom_frag; //bottom of cut
+ C_OUTLINE_FRAG *top_frag; //top of cut
+ C_OUTLINE *outline; //new outline
+ C_OUTLINE *child; //current child
+ C_OUTLINE_IT child_it = children;
+ C_OUTLINE_IT olchild_it; //children of outline
+
+ while (!frag_it.empty()) {
+ frag_it.move_to_first();
+ // get bottom one
+ bottom_frag = frag_it.extract();
+ frag_it.forward();
+ top_frag = frag_it.data(); // look at next
+ if ((bottom_frag->steps == nullptr && top_frag->steps == nullptr)
+ || (bottom_frag->steps != nullptr && top_frag->steps != nullptr)) {
+ if (frag_it.data_relative(1)->ycoord == top_frag->ycoord)
+ frag_it.forward();
+ }
+ top_frag = frag_it.extract();
+ if (top_frag->other_end != bottom_frag) {
+ outline = join_chopped_fragments(bottom_frag, top_frag);
+ ASSERT_HOST(outline == nullptr);
+ } else {
+ outline = join_chopped_fragments(bottom_frag, top_frag);
+ if (outline != nullptr) {
+ olchild_it.set_to_list(outline->child());
+ for (child_it.mark_cycle_pt(); !child_it.cycled_list();
+ child_it.forward()) {
+ child = child_it.data();
+ if (*child < *outline)
+ olchild_it.add_to_end(child_it.extract());
+ }
+ if (outline->bounding_box().width() > pitch_error)
+ dest_it->add_after_then_move(outline);
+ else
+ delete outline; // Make it disappear.
+ }
+ }
+ }
+ while (!child_it.empty ()) {
+ dest_it->add_after_then_move (child_it.extract ());
+ child_it.forward ();
+ }
+}
+
+
+/**********************************************************************
+ * join_chopped_fragments
+ *
+ * Join the two lists of POLYPTs such that neither OUTLINE_FRAG
+ * operand keeps responsibility for the fragment.
+ **********************************************************************/
+
+static
+C_OUTLINE *join_chopped_fragments( //join pieces
+ C_OUTLINE_FRAG *bottom, //bottom of cut
+ C_OUTLINE_FRAG *top //top of cut
+ ) {
+ C_OUTLINE *outline; //closed loop
+
+ if (bottom->other_end == top) {
+ if (bottom->steps == nullptr)
+ outline = top->close (); //turn to outline
+ else
+ outline = bottom->close ();
+ delete top;
+ delete bottom;
+ return outline;
+ }
+ if (bottom->steps == nullptr) {
+ ASSERT_HOST (top->steps != nullptr);
+ join_segments (bottom->other_end, top);
+ }
+ else {
+ ASSERT_HOST (top->steps == nullptr);
+ join_segments (top->other_end, bottom);
+ }
+ top->other_end->other_end = bottom->other_end;
+ bottom->other_end->other_end = top->other_end;
+ delete bottom;
+ delete top;
+ return nullptr;
+}
+
+/**********************************************************************
+ * join_segments
+ *
+ * Join the two edgestep fragments such that the second comes after
+ * the first and the gap between them is closed.
+ **********************************************************************/
+
+static
+void join_segments( //join pieces
+ C_OUTLINE_FRAG *bottom, //bottom of cut
+ C_OUTLINE_FRAG *top //top of cut
+ ) {
+ DIR128 *steps; //new steps
+ int32_t stepcount; //no of steps
+ int16_t fake_count; //fake steps
+ DIR128 fake_step; //step entry
+
+ ASSERT_HOST (bottom->end.x () == top->start.x ());
+ fake_count = top->start.y () - bottom->end.y ();
+ if (fake_count < 0) {
+ fake_count = -fake_count;
+ fake_step = 32;
+ }
+ else
+ fake_step = 96;
+
+ stepcount = bottom->stepcount + fake_count + top->stepcount;
+ steps = new DIR128[stepcount];
+ memmove (steps, bottom->steps, bottom->stepcount);
+ memset (steps + bottom->stepcount, fake_step.get_dir(), fake_count);
+ memmove (steps + bottom->stepcount + fake_count, top->steps,
+ top->stepcount);
+ delete [] bottom->steps;
+ bottom->steps = steps;
+ bottom->stepcount = stepcount;
+ bottom->end = top->end;
+ bottom->other_end->end = top->end;
+}
+
+
+/**********************************************************************
+ * C_OUTLINE_FRAG::close
+ *
+ * Join the ends of this fragment and turn it into an outline.
+ **********************************************************************/
+
+C_OUTLINE *C_OUTLINE_FRAG::close() { //join pieces
+ DIR128 *new_steps; //new steps
+ int32_t new_stepcount; //no of steps
+ int16_t fake_count; //fake steps
+ DIR128 fake_step; //step entry
+
+ ASSERT_HOST (start.x () == end.x ());
+ fake_count = start.y () - end.y ();
+ if (fake_count < 0) {
+ fake_count = -fake_count;
+ fake_step = 32;
+ }
+ else
+ fake_step = 96;
+
+ new_stepcount = stepcount + fake_count;
+ if (new_stepcount > C_OUTLINE::kMaxOutlineLength)
+ return nullptr; // Can't join them
+ new_steps = new DIR128[new_stepcount];
+ memmove(new_steps, steps, stepcount);
+ memset (new_steps + stepcount, fake_step.get_dir(), fake_count);
+ auto* result = new C_OUTLINE (start, new_steps, new_stepcount);
+ delete [] new_steps;
+ return result;
+}
+
+
+/**********************************************************************
+ * C_OUTLINE_FRAG::operator=
+ *
+ * Copy this fragment.
+ **********************************************************************/
+
+ //join pieces
+C_OUTLINE_FRAG & C_OUTLINE_FRAG::operator= (
+const C_OUTLINE_FRAG & src //fragment to copy
+) {
+ delete [] steps;
+
+ stepcount = src.stepcount;
+ steps = new DIR128[stepcount];
+ memmove (steps, src.steps, stepcount);
+ start = src.start;
+ end = src.end;
+ ycoord = src.ycoord;
+ return *this;
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/textord/fpchop.h b/tesseract/src/textord/fpchop.h
new file mode 100644
index 00000000..cc938ba9
--- /dev/null
+++ b/tesseract/src/textord/fpchop.h
@@ -0,0 +1,84 @@
+/**********************************************************************
+ * File: fpchop.h (Formerly fp_chop.h)
+ * Description: Code to chop fixed pitch text into character cells.
+ * Author: Ray Smith
+ *
+ * (C) Copyright 1993, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef FPCHOP_H
+#define FPCHOP_H
+
+#include "params.h"
+#include "blobbox.h"
+
+namespace tesseract {
+
+class C_OUTLINE_FRAG : public ELIST_LINK
+{
+ public:
+ C_OUTLINE_FRAG() { //empty constructor
+ steps = nullptr;
+ stepcount = 0;
+ }
+ ~C_OUTLINE_FRAG () {
+ delete [] steps;
+ }
+ //start coord
+ C_OUTLINE_FRAG(ICOORD start_pt,
+ ICOORD end_pt, //end coord
+ C_OUTLINE *outline, //source of steps
+ int16_t start_index,
+ int16_t end_index);
+ //other end
+ C_OUTLINE_FRAG(C_OUTLINE_FRAG *head, int16_t tail_y);
+ C_OUTLINE *close(); //copy to outline
+ C_OUTLINE_FRAG & operator= ( //assign
+ const C_OUTLINE_FRAG & src);
+
+ ICOORD start; //start coord
+ ICOORD end; //end coord
+ DIR128 *steps; //step array
+ int32_t stepcount; //no of steps
+ C_OUTLINE_FRAG *other_end; //head if a tail
+ int16_t ycoord; //coord of cut pt
+
+ private:
+ // Copy constructor (currently unused, therefore private).
+ C_OUTLINE_FRAG(const C_OUTLINE_FRAG& other);
+};
+
+ELISTIZEH(C_OUTLINE_FRAG)
+
+extern
+INT_VAR_H (textord_fp_chop_error, 2,
+"Max allowed bending of chop cells");
+extern
+double_VAR_H (textord_fp_chop_snap, 0.5,
+"Max distance of chop pt from vertex");
+
+ROW *fixed_pitch_words( //find lines
+ TO_ROW *row, //row to do
+ FCOORD rotation //for drawing
+ );
+
+void split_to_blob( //split the blob
+ BLOBNBOX *blob, //blob to split
+ int16_t chop_coord, //place to chop
+ float pitch_error, //allowed deviation
+ C_OUTLINE_LIST *left_coutlines, //for cblobs
+ C_OUTLINE_LIST *right_coutlines);
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/textord/gap_map.cpp b/tesseract/src/textord/gap_map.cpp
new file mode 100644
index 00000000..e31328f8
--- /dev/null
+++ b/tesseract/src/textord/gap_map.cpp
@@ -0,0 +1,189 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "gap_map.h"
+
+#include "statistc.h"
+
+namespace tesseract {
+
+BOOL_VAR(gapmap_debug, false, "Say which blocks have tables");
+BOOL_VAR(gapmap_use_ends, false, "Use large space at start and end of rows");
+BOOL_VAR(gapmap_no_isolated_quanta, false,
+"Ensure gaps not less than 2quanta wide");
+double_VAR(gapmap_big_gaps, 1.75, "xht multiplier");
+
+/*************************************************************************
+ * A block gap map is a quantised histogram of whitespace regions in the
+ * block. It is a vertical projection of wide gaps WITHIN lines
+ *
+ * The map is held as an array of counts of rows which have a wide gap
+ * covering that region of the row. Each bucket in the map represents a width
+ * of about half an xheight - (The median of the xhts in the rows is used.)
+ *
+ * The block is considered RECTANGULAR - delimited by the left and right
+ * extremes of the rows in the block. However, ONLY wide gaps WITHIN a row are
+ * counted.
+ *
+ *************************************************************************/
+
+GAPMAP::GAPMAP( //Constructor
+ TO_BLOCK *block //block
+ ) {
+ TO_ROW *row; //current row
+ BLOBNBOX_IT blob_it; //iterator
+ TBOX blob_box;
+ TBOX prev_blob_box;
+ int16_t gap_width;
+ int16_t start_of_row;
+ int16_t end_of_row;
+ STATS xht_stats (0, 128);
+ int16_t min_quantum;
+ int16_t max_quantum;
+ int16_t i;
+
+ /*
+ Find left and right extremes and bucket size
+ */
+ map = nullptr;
+ min_left = INT16_MAX;
+ max_right = -INT16_MAX;
+ total_rows = 0;
+ any_tabs = false;
+
+ // row iterator
+ TO_ROW_IT row_it(block->get_rows());
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+ row = row_it.data ();
+ if (!row->blob_list ()->empty ()) {
+ total_rows++;
+ xht_stats.add (static_cast<int16_t>(floor (row->xheight + 0.5)), 1);
+ blob_it.set_to_list (row->blob_list ());
+ start_of_row = blob_it.data ()->bounding_box ().left ();
+ end_of_row = blob_it.data_relative (-1)->bounding_box ().right ();
+ if (min_left > start_of_row)
+ min_left = start_of_row;
+ if (max_right < end_of_row)
+ max_right = end_of_row;
+ }
+ }
+ if ((total_rows < 3) || (min_left >= max_right)) {
+ bucket_size = 0;
+ map_max = 0;
+ total_rows = 0;
+ min_left = max_right = 0;
+ return;
+ }
+ bucket_size = static_cast<int16_t>(floor (xht_stats.median () + 0.5)) / 2;
+ map_max = (max_right - min_left) / bucket_size;
+ map = new int16_t[map_max + 1];
+ for (i = 0; i <= map_max; i++)
+ map[i] = 0;
+
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+ row = row_it.data ();
+ if (!row->blob_list ()->empty ()) {
+ blob_it.set_to_list (row->blob_list ());
+ blob_it.mark_cycle_pt ();
+ blob_box = box_next (&blob_it);
+ prev_blob_box = blob_box;
+ if (gapmap_use_ends) {
+ /* Leading space */
+ gap_width = blob_box.left () - min_left;
+ if ((gap_width > gapmap_big_gaps * row->xheight)
+ && gap_width > 2) {
+ max_quantum = (blob_box.left () - min_left) / bucket_size;
+ if (max_quantum > map_max) max_quantum = map_max;
+ for (i = 0; i <= max_quantum; i++)
+ map[i]++;
+ }
+ }
+ while (!blob_it.cycled_list ()) {
+ blob_box = box_next (&blob_it);
+ gap_width = blob_box.left () - prev_blob_box.right ();
+ if ((gap_width > gapmap_big_gaps * row->xheight)
+ && gap_width > 2) {
+ min_quantum =
+ (prev_blob_box.right () - min_left) / bucket_size;
+ max_quantum = (blob_box.left () - min_left) / bucket_size;
+ if (max_quantum > map_max) max_quantum = map_max;
+ for (i = min_quantum; i <= max_quantum; i++)
+ map[i]++;
+ }
+ prev_blob_box = blob_box;
+ }
+ if (gapmap_use_ends) {
+ /* Trailing space */
+ gap_width = max_right - prev_blob_box.right ();
+ if ((gap_width > gapmap_big_gaps * row->xheight)
+ && gap_width > 2) {
+ min_quantum =
+ (prev_blob_box.right () - min_left) / bucket_size;
+ if (min_quantum < 0) min_quantum = 0;
+ for (i = min_quantum; i <= map_max; i++)
+ map[i]++;
+ }
+ }
+ }
+ }
+ for (i = 0; i <= map_max; i++) {
+ if (map[i] > total_rows / 2) {
+ if (gapmap_no_isolated_quanta &&
+ (((i == 0) &&
+ (map[i + 1] <= total_rows / 2)) ||
+ ((i == map_max) &&
+ (map[i - 1] <= total_rows / 2)) ||
+ ((i > 0) &&
+ (i < map_max) &&
+ (map[i - 1] <= total_rows / 2) &&
+ (map[i + 1] <= total_rows / 2)))) {
+ map[i] = 0; //prevent isolated quantum
+ }
+ else
+ any_tabs = true;
+ }
+ }
+ if (gapmap_debug && any_tabs)
+ tprintf ("Table found\n");
+}
+
+
+/*************************************************************************
+ * GAPMAP::table_gap()
+ * Is there a bucket in the specified range where more than half the rows in the
+ * block have a wide gap?
+ *************************************************************************/
+
+bool GAPMAP::table_gap( //Is gap a table?
+ int16_t left, //From here
+ int16_t right //To here
+) {
+ int16_t min_quantum;
+ int16_t max_quantum;
+ int16_t i;
+ bool tab_found = false;
+
+ if (!any_tabs)
+ return false;
+
+ min_quantum = (left - min_left) / bucket_size;
+ max_quantum = (right - min_left) / bucket_size;
+ // Clip to the bounds of the array. In some circumstances (big blob followed
+ // by small blob) max_quantum can exceed the map_max bounds, but we clip
+ // here instead, as it provides better long-term safety.
+ if (min_quantum < 0) min_quantum = 0;
+ if (max_quantum > map_max) max_quantum = map_max;
+ for (i = min_quantum; (!tab_found && (i <= max_quantum)); i++)
+ if (map[i] > total_rows / 2)
+ tab_found = true;
+ return tab_found;
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/textord/gap_map.h b/tesseract/src/textord/gap_map.h
new file mode 100644
index 00000000..7ed9aae6
--- /dev/null
+++ b/tesseract/src/textord/gap_map.h
@@ -0,0 +1,53 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef GAP_MAP_H
+#define GAP_MAP_H
+
+#include "blobbox.h"
+
+namespace tesseract {
+
+class GAPMAP
+{
+ public:
+ GAPMAP( //constructor
+ TO_BLOCK *block);
+
+ ~GAPMAP () { //destructor
+ delete[] map;
+ }
+
+ bool table_gap( //Is gap a table?
+ int16_t left, //From here
+ int16_t right); //To here
+
+ private:
+ int16_t total_rows; //in block
+ int16_t min_left; //Left extreme
+ int16_t max_right; //Right extreme
+ int16_t bucket_size; // half an x ht
+ int16_t *map; //empty counts
+ int16_t map_max; //map[0..max_map] defined
+ bool any_tabs;
+};
+
+/*-----------------------------*/
+
+extern BOOL_VAR_H (gapmap_debug, false, "Say which blocks have tables");
+extern BOOL_VAR_H (gapmap_use_ends, false,
+"Use large space at start and end of rows");
+extern BOOL_VAR_H (gapmap_no_isolated_quanta, false,
+"Ensure gaps not less than 2quanta wide");
+extern double_VAR_H (gapmap_big_gaps, 1.75, "xht multiplier");
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/textord/imagefind.cpp b/tesseract/src/textord/imagefind.cpp
new file mode 100644
index 00000000..dc5f19b9
--- /dev/null
+++ b/tesseract/src/textord/imagefind.cpp
@@ -0,0 +1,1366 @@
+///////////////////////////////////////////////////////////////////////
+// File: imagefind.cpp
+// Description: Function to find image and drawing regions in an image
+// and create a corresponding list of empty blobs.
+// Author: Ray Smith
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "imagefind.h"
+
+#include "colpartitiongrid.h"
+#include "linlsq.h"
+#include "statistc.h"
+#include "params.h"
+
+#include "allheaders.h"
+
+#include <algorithm>
+
+namespace tesseract {
+
+static INT_VAR(textord_tabfind_show_images, false, "Show image blobs");
+
+// Fraction of width or height of on pixels that can be discarded from a
+// roughly rectangular image.
+const double kMinRectangularFraction = 0.125;
+// Fraction of width or height to consider image completely used.
+const double kMaxRectangularFraction = 0.75;
+// Fraction of width or height to allow transition from kMinRectangularFraction
+// to kMaxRectangularFraction, equivalent to a dy/dx skew.
+const double kMaxRectangularGradient = 0.1; // About 6 degrees.
+// Minimum image size to be worth looking for images on.
+const int kMinImageFindSize = 100;
+// Scale factor for the rms color fit error.
+const double kRMSFitScaling = 8.0;
+// Min color difference to call it two colors.
+const int kMinColorDifference = 16;
+// Pixel padding for noise blobs and partitions when rendering on the image
+// mask to encourage them to join together. Make it too big and images
+// will fatten out too much and have to be clipped to text.
+const int kNoisePadding = 4;
+
+// Finds image regions within the BINARY source pix (page image) and returns
+// the image regions as a mask image.
+// The returned pix may be nullptr, meaning no images found.
+// If not nullptr, it must be PixDestroyed by the caller.
+// If textord_tabfind_show_images, debug images are appended to pixa_debug.
+Pix* ImageFind::FindImages(Pix* pix, DebugPixa* pixa_debug) {
+ // Not worth looking at small images.
+ if (pixGetWidth(pix) < kMinImageFindSize ||
+ pixGetHeight(pix) < kMinImageFindSize)
+ return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
+
+ // Reduce by factor 2.
+ Pix *pixr = pixReduceRankBinaryCascade(pix, 1, 0, 0, 0);
+ if (textord_tabfind_show_images && pixa_debug != nullptr)
+ pixa_debug->AddPix(pixr, "CascadeReduced");
+
+ // Get the halftone mask directly from Leptonica.
+ //
+ // Leptonica will print an error message and return nullptr if we call
+ // pixGenHalftoneMask(pixr, nullptr, ...) with too small image, so we
+ // want to bypass that.
+ if (pixGetWidth(pixr) < kMinImageFindSize ||
+ pixGetHeight(pixr) < kMinImageFindSize) {
+ pixDestroy(&pixr);
+ return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
+ }
+ // Get the halftone mask.
+ l_int32 ht_found = 0;
+ Pixa* pixadb = (textord_tabfind_show_images && pixa_debug != nullptr)
+ ? pixaCreate(0)
+ : nullptr;
+ Pix* pixht2 = pixGenerateHalftoneMask(pixr, nullptr, &ht_found, pixadb);
+ if (pixadb) {
+ Pix* pixdb = pixaDisplayTiledInColumns(pixadb, 3, 1.0, 20, 2);
+ if (textord_tabfind_show_images && pixa_debug != nullptr)
+ pixa_debug->AddPix(pixdb, "HalftoneMask");
+ pixDestroy(&pixdb);
+ pixaDestroy(&pixadb);
+ }
+ pixDestroy(&pixr);
+ if (!ht_found && pixht2 != nullptr)
+ pixDestroy(&pixht2);
+ if (pixht2 == nullptr)
+ return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
+
+ // Expand back up again.
+ Pix *pixht = pixExpandReplicate(pixht2, 2);
+ if (textord_tabfind_show_images && pixa_debug != nullptr)
+ pixa_debug->AddPix(pixht, "HalftoneReplicated");
+ pixDestroy(&pixht2);
+
+ // Fill to capture pixels near the mask edges that were missed
+ Pix *pixt = pixSeedfillBinary(nullptr, pixht, pix, 8);
+ pixOr(pixht, pixht, pixt);
+ pixDestroy(&pixt);
+
+ // Eliminate lines and bars that may be joined to images.
+ Pix* pixfinemask = pixReduceRankBinaryCascade(pixht, 1, 1, 3, 3);
+ pixDilateBrick(pixfinemask, pixfinemask, 5, 5);
+ if (textord_tabfind_show_images && pixa_debug != nullptr)
+ pixa_debug->AddPix(pixfinemask, "FineMask");
+ Pix* pixreduced = pixReduceRankBinaryCascade(pixht, 1, 1, 1, 1);
+ Pix* pixreduced2 = pixReduceRankBinaryCascade(pixreduced, 3, 3, 3, 0);
+ pixDestroy(&pixreduced);
+ pixDilateBrick(pixreduced2, pixreduced2, 5, 5);
+ Pix* pixcoarsemask = pixExpandReplicate(pixreduced2, 8);
+ pixDestroy(&pixreduced2);
+ if (textord_tabfind_show_images && pixa_debug != nullptr)
+ pixa_debug->AddPix(pixcoarsemask, "CoarseMask");
+ // Combine the coarse and fine image masks.
+ pixAnd(pixcoarsemask, pixcoarsemask, pixfinemask);
+ pixDestroy(&pixfinemask);
+ // Dilate a bit to make sure we get everything.
+ pixDilateBrick(pixcoarsemask, pixcoarsemask, 3, 3);
+ Pix* pixmask = pixExpandReplicate(pixcoarsemask, 16);
+ pixDestroy(&pixcoarsemask);
+ if (textord_tabfind_show_images && pixa_debug != nullptr)
+ pixa_debug->AddPix(pixmask, "MaskDilated");
+ // And the image mask with the line and bar remover.
+ pixAnd(pixht, pixht, pixmask);
+ pixDestroy(&pixmask);
+ if (textord_tabfind_show_images && pixa_debug != nullptr)
+ pixa_debug->AddPix(pixht, "FinalMask");
+ // Make the result image the same size as the input.
+ Pix* result = pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1);
+ pixOr(result, result, pixht);
+ pixDestroy(&pixht);
+ return result;
+}
+
+// Generates a Boxa, Pixa pair from the input binary (image mask) pix,
+// analogous to pixConnComp, except that connected components which are nearly
+// rectangular are replaced with solid rectangles.
+// The returned boxa, pixa may be nullptr, meaning no images found.
+// If not nullptr, they must be destroyed by the caller.
+// Resolution of pix should match the source image (Tesseract::pix_binary_)
+// so the output coordinate systems match.
+void ImageFind::ConnCompAndRectangularize(Pix* pix, DebugPixa* pixa_debug,
+ Boxa** boxa, Pixa** pixa) {
+ *boxa = nullptr;
+ *pixa = nullptr;
+
+ if (textord_tabfind_show_images && pixa_debug != nullptr)
+ pixa_debug->AddPix(pix, "Conncompimage");
+ // Find the individual image regions in the mask image.
+ *boxa = pixConnComp(pix, pixa, 8);
+ // Rectangularize the individual images. If a sharp edge in vertical and/or
+ // horizontal occupancy can be found, it indicates a probably rectangular
+ // image with unwanted bits merged on, so clip to the approximate rectangle.
+ int npixes = 0;
+ if (*boxa != nullptr && *pixa != nullptr) npixes = pixaGetCount(*pixa);
+ for (int i = 0; i < npixes; ++i) {
+ int x_start, x_end, y_start, y_end;
+ Pix* img_pix = pixaGetPix(*pixa, i, L_CLONE);
+ if (textord_tabfind_show_images && pixa_debug != nullptr)
+ pixa_debug->AddPix(img_pix, "A component");
+ if (pixNearlyRectangular(img_pix, kMinRectangularFraction,
+ kMaxRectangularFraction,
+ kMaxRectangularGradient,
+ &x_start, &y_start, &x_end, &y_end)) {
+ Pix* simple_pix = pixCreate(x_end - x_start, y_end - y_start, 1);
+ pixSetAll(simple_pix);
+ pixDestroy(&img_pix);
+ // pixaReplacePix takes ownership of the simple_pix.
+ pixaReplacePix(*pixa, i, simple_pix, nullptr);
+ img_pix = pixaGetPix(*pixa, i, L_CLONE);
+ // Fix the box to match the new pix.
+ l_int32 x, y, width, height;
+ boxaGetBoxGeometry(*boxa, i, &x, &y, &width, &height);
+ Box* simple_box = boxCreate(x + x_start, y + y_start,
+ x_end - x_start, y_end - y_start);
+ boxaReplaceBox(*boxa, i, simple_box);
+ }
+ pixDestroy(&img_pix);
+ }
+}
+
+// Scans horizontally on x=[x_start,x_end), starting with y=*y_start,
+// stepping y+=y_step, until y=y_end. *ystart is input/output.
+// If the number of black pixels in a row, pix_count fits this pattern:
+// 0 or more rows with pix_count < min_count then
+// <= mid_width rows with min_count <= pix_count <= max_count then
+// a row with pix_count > max_count then
+// true is returned, and *y_start = the first y with pix_count >= min_count.
+static bool HScanForEdge(uint32_t* data, int wpl, int x_start, int x_end,
+ int min_count, int mid_width, int max_count,
+ int y_end, int y_step, int* y_start) {
+ int mid_rows = 0;
+ for (int y = *y_start; y != y_end; y += y_step) {
+ // Need pixCountPixelsInRow(pix, y, &pix_count, nullptr) to count in a subset.
+ int pix_count = 0;
+ uint32_t* line = data + wpl * y;
+ for (int x = x_start; x < x_end; ++x) {
+ if (GET_DATA_BIT(line, x))
+ ++pix_count;
+ }
+ if (mid_rows == 0 && pix_count < min_count)
+ continue; // In the min phase.
+ if (mid_rows == 0)
+ *y_start = y; // Save the y_start where we came out of the min phase.
+ if (pix_count > max_count)
+ return true; // Found the pattern.
+ ++mid_rows;
+ if (mid_rows > mid_width)
+ break; // Middle too big.
+ }
+ return false; // Never found max_count.
+}
+
+// Scans vertically on y=[y_start,y_end), starting with x=*x_start,
+// stepping x+=x_step, until x=x_end. *x_start is input/output.
+// If the number of black pixels in a column, pix_count fits this pattern:
+// 0 or more cols with pix_count < min_count then
+// <= mid_width cols with min_count <= pix_count <= max_count then
+// a column with pix_count > max_count then
+// true is returned, and *x_start = the first x with pix_count >= min_count.
+static bool VScanForEdge(uint32_t* data, int wpl, int y_start, int y_end,
+ int min_count, int mid_width, int max_count,
+ int x_end, int x_step, int* x_start) {
+ int mid_cols = 0;
+ for (int x = *x_start; x != x_end; x += x_step) {
+ int pix_count = 0;
+ uint32_t* line = data + y_start * wpl;
+ for (int y = y_start; y < y_end; ++y, line += wpl) {
+ if (GET_DATA_BIT(line, x))
+ ++pix_count;
+ }
+ if (mid_cols == 0 && pix_count < min_count)
+ continue; // In the min phase.
+ if (mid_cols == 0)
+ *x_start = x; // Save the place where we came out of the min phase.
+ if (pix_count > max_count)
+ return true; // found the pattern.
+ ++mid_cols;
+ if (mid_cols > mid_width)
+ break; // Middle too big.
+ }
+ return false; // Never found max_count.
+}
+
+// Returns true if there is a rectangle in the source pix, such that all
+// pixel rows and column slices outside of it have less than
+// min_fraction of the pixels black, and within max_skew_gradient fraction
+// of the pixels on the inside, there are at least max_fraction of the
+// pixels black. In other words, the inside of the rectangle looks roughly
+// rectangular, and the outside of it looks like extra bits.
+// On return, the rectangle is defined by x_start, y_start, x_end and y_end.
+// Note: the algorithm is iterative, allowing it to slice off pixels from
+// one edge, allowing it to then slice off more pixels from another edge.
+bool ImageFind::pixNearlyRectangular(Pix* pix,
+ double min_fraction, double max_fraction,
+ double max_skew_gradient,
+ int* x_start, int* y_start,
+ int* x_end, int* y_end) {
+ ASSERT_HOST(pix != nullptr);
+ *x_start = 0;
+ *x_end = pixGetWidth(pix);
+ *y_start = 0;
+ *y_end = pixGetHeight(pix);
+
+ uint32_t* data = pixGetData(pix);
+ int wpl = pixGetWpl(pix);
+ bool any_cut = false;
+ bool left_done = false;
+ bool right_done = false;
+ bool top_done = false;
+ bool bottom_done = false;
+ do {
+ any_cut = false;
+ // Find the top/bottom edges.
+ int width = *x_end - *x_start;
+ int min_count = static_cast<int>(width * min_fraction);
+ int max_count = static_cast<int>(width * max_fraction);
+ int edge_width = static_cast<int>(width * max_skew_gradient);
+ if (HScanForEdge(data, wpl, *x_start, *x_end, min_count, edge_width,
+ max_count, *y_end, 1, y_start) && !top_done) {
+ top_done = true;
+ any_cut = true;
+ }
+ --(*y_end);
+ if (HScanForEdge(data, wpl, *x_start, *x_end, min_count, edge_width,
+ max_count, *y_start, -1, y_end) && !bottom_done) {
+ bottom_done = true;
+ any_cut = true;
+ }
+ ++(*y_end);
+
+ // Find the left/right edges.
+ int height = *y_end - *y_start;
+ min_count = static_cast<int>(height * min_fraction);
+ max_count = static_cast<int>(height * max_fraction);
+ edge_width = static_cast<int>(height * max_skew_gradient);
+ if (VScanForEdge(data, wpl, *y_start, *y_end, min_count, edge_width,
+ max_count, *x_end, 1, x_start) && !left_done) {
+ left_done = true;
+ any_cut = true;
+ }
+ --(*x_end);
+ if (VScanForEdge(data, wpl, *y_start, *y_end, min_count, edge_width,
+ max_count, *x_start, -1, x_end) && !right_done) {
+ right_done = true;
+ any_cut = true;
+ }
+ ++(*x_end);
+ } while (any_cut);
+
+ // All edges must satisfy the condition of sharp gradient in pixel density
+ // in order for the full rectangle to be present.
+ return left_done && right_done && top_done && bottom_done;
+}
+
+// Given an input pix, and a bounding rectangle, the sides of the rectangle
+// are shrunk inwards until they bound any black pixels found within the
+// original rectangle. Returns false if the rectangle contains no black
+// pixels at all.
+bool ImageFind::BoundsWithinRect(Pix* pix, int* x_start, int* y_start,
+ int* x_end, int* y_end) {
+ Box* input_box = boxCreate(*x_start, *y_start, *x_end - *x_start,
+ *y_end - *y_start);
+ Box* output_box = nullptr;
+ pixClipBoxToForeground(pix, input_box, nullptr, &output_box);
+ bool result = output_box != nullptr;
+ if (result) {
+ l_int32 x, y, width, height;
+ boxGetGeometry(output_box, &x, &y, &width, &height);
+ *x_start = x;
+ *y_start = y;
+ *x_end = x + width;
+ *y_end = y + height;
+ boxDestroy(&output_box);
+ }
+ boxDestroy(&input_box);
+ return result;
+}
+
+// Given a point in 3-D (RGB) space, returns the squared Euclidean distance
+// of the point from the given line, defined by a pair of points in the 3-D
+// (RGB) space, line1 and line2.
+double ImageFind::ColorDistanceFromLine(const uint8_t* line1,
+ const uint8_t* line2,
+ const uint8_t* point) {
+ int line_vector[kRGBRMSColors];
+ int point_vector[kRGBRMSColors];
+ for (int i = 0; i < kRGBRMSColors; ++i) {
+ line_vector[i] = static_cast<int>(line2[i]) - static_cast<int>(line1[i]);
+ point_vector[i] = static_cast<int>(point[i]) - static_cast<int>(line1[i]);
+ }
+ line_vector[L_ALPHA_CHANNEL] = 0;
+ // Now the cross product in 3d.
+ int cross[kRGBRMSColors];
+ cross[COLOR_RED] = line_vector[COLOR_GREEN] * point_vector[COLOR_BLUE]
+ - line_vector[COLOR_BLUE] * point_vector[COLOR_GREEN];
+ cross[COLOR_GREEN] = line_vector[COLOR_BLUE] * point_vector[COLOR_RED]
+ - line_vector[COLOR_RED] * point_vector[COLOR_BLUE];
+ cross[COLOR_BLUE] = line_vector[COLOR_RED] * point_vector[COLOR_GREEN]
+ - line_vector[COLOR_GREEN] * point_vector[COLOR_RED];
+ cross[L_ALPHA_CHANNEL] = 0;
+ // Now the sums of the squares.
+ double cross_sq = 0.0;
+ double line_sq = 0.0;
+ for (int j = 0; j < kRGBRMSColors; ++j) {
+ cross_sq += static_cast<double>(cross[j]) * cross[j];
+ line_sq += static_cast<double>(line_vector[j]) * line_vector[j];
+ }
+ if (line_sq == 0.0) {
+ return 0.0;
+ }
+ return cross_sq / line_sq; // This is the squared distance.
+}
+
+
+// Returns the leptonica combined code for the given RGB triplet.
+uint32_t ImageFind::ComposeRGB(uint32_t r, uint32_t g, uint32_t b) {
+ l_uint32 result;
+ composeRGBPixel(r, g, b, &result);
+ return result;
+}
+
+// Returns the input value clipped to a uint8_t.
+uint8_t ImageFind::ClipToByte(double pixel) {
+ if (pixel < 0.0)
+ return 0;
+ else if (pixel >= 255.0)
+ return 255;
+ return static_cast<uint8_t>(pixel);
+}
+
+// Computes the light and dark extremes of color in the given rectangle of
+// the given pix, which is factor smaller than the coordinate system in rect.
+// The light and dark points are taken to be the upper and lower 8th-ile of
+// the most deviant of R, G and B. The value of the other 2 channels are
+// computed by linear fit against the most deviant.
+// The colors of the two points are returned in color1 and color2, with the
+// alpha channel set to a scaled mean rms of the fits.
+// If color_map1 is not null then it and color_map2 get rect pasted in them
+// with the two calculated colors, and rms map gets a pasted rect of the rms.
+// color_map1, color_map2 and rms_map are assumed to be the same scale as pix.
+void ImageFind::ComputeRectangleColors(const TBOX& rect, Pix* pix, int factor,
+ Pix* color_map1, Pix* color_map2,
+ Pix* rms_map,
+ uint8_t* color1, uint8_t* color2) {
+ ASSERT_HOST(pix != nullptr && pixGetDepth(pix) == 32);
+ // Pad the rectangle outwards by 2 (scaled) pixels if possible to get more
+ // background.
+ int width = pixGetWidth(pix);
+ int height = pixGetHeight(pix);
+ int left_pad = std::max(rect.left() - 2 * factor, 0) / factor;
+ int top_pad = (rect.top() + 2 * factor + (factor - 1)) / factor;
+ top_pad = std::min(height, top_pad);
+ int right_pad = (rect.right() + 2 * factor + (factor - 1)) / factor;
+ right_pad = std::min(width, right_pad);
+ int bottom_pad = std::max(rect.bottom() - 2 * factor, 0) / factor;
+ int width_pad = right_pad - left_pad;
+ int height_pad = top_pad - bottom_pad;
+ if (width_pad < 1 || height_pad < 1 || width_pad + height_pad < 4)
+ return;
+ // Now crop the pix to the rectangle.
+ Box* scaled_box = boxCreate(left_pad, height - top_pad,
+ width_pad, height_pad);
+ Pix* scaled = pixClipRectangle(pix, scaled_box, nullptr);
+
+ // Compute stats over the whole image.
+ STATS red_stats(0, 256);
+ STATS green_stats(0, 256);
+ STATS blue_stats(0, 256);
+ uint32_t* data = pixGetData(scaled);
+ ASSERT_HOST(pixGetWpl(scaled) == width_pad);
+ for (int y = 0; y < height_pad; ++y) {
+ for (int x = 0; x < width_pad; ++x, ++data) {
+ int r = GET_DATA_BYTE(data, COLOR_RED);
+ int g = GET_DATA_BYTE(data, COLOR_GREEN);
+ int b = GET_DATA_BYTE(data, COLOR_BLUE);
+ red_stats.add(r, 1);
+ green_stats.add(g, 1);
+ blue_stats.add(b, 1);
+ }
+ }
+ // Find the RGB component with the greatest 8th-ile-range.
+ // 8th-iles are used instead of quartiles to get closer to the true
+ // foreground color, which is going to be faint at best because of the
+ // pre-scaling of the input image.
+ int best_l8 = static_cast<int>(red_stats.ile(0.125f));
+ int best_u8 = static_cast<int>(ceil(red_stats.ile(0.875f)));
+ int best_i8r = best_u8 - best_l8;
+ int x_color = COLOR_RED;
+ int y1_color = COLOR_GREEN;
+ int y2_color = COLOR_BLUE;
+ int l8 = static_cast<int>(green_stats.ile(0.125f));
+ int u8 = static_cast<int>(ceil(green_stats.ile(0.875f)));
+ if (u8 - l8 > best_i8r) {
+ best_i8r = u8 - l8;
+ best_l8 = l8;
+ best_u8 = u8;
+ x_color = COLOR_GREEN;
+ y1_color = COLOR_RED;
+ }
+ l8 = static_cast<int>(blue_stats.ile(0.125f));
+ u8 = static_cast<int>(ceil(blue_stats.ile(0.875f)));
+ if (u8 - l8 > best_i8r) {
+ best_i8r = u8 - l8;
+ best_l8 = l8;
+ best_u8 = u8;
+ x_color = COLOR_BLUE;
+ y1_color = COLOR_GREEN;
+ y2_color = COLOR_RED;
+ }
+ if (best_i8r >= kMinColorDifference) {
+ LLSQ line1;
+ LLSQ line2;
+ uint32_t* data = pixGetData(scaled);
+ for (int im_y = 0; im_y < height_pad; ++im_y) {
+ for (int im_x = 0; im_x < width_pad; ++im_x, ++data) {
+ int x = GET_DATA_BYTE(data, x_color);
+ int y1 = GET_DATA_BYTE(data, y1_color);
+ int y2 = GET_DATA_BYTE(data, y2_color);
+ line1.add(x, y1);
+ line2.add(x, y2);
+ }
+ }
+ double m1 = line1.m();
+ double c1 = line1.c(m1);
+ double m2 = line2.m();
+ double c2 = line2.c(m2);
+ double rms = line1.rms(m1, c1) + line2.rms(m2, c2);
+ rms *= kRMSFitScaling;
+ // Save the results.
+ color1[x_color] = ClipToByte(best_l8);
+ color1[y1_color] = ClipToByte(m1 * best_l8 + c1 + 0.5);
+ color1[y2_color] = ClipToByte(m2 * best_l8 + c2 + 0.5);
+ color1[L_ALPHA_CHANNEL] = ClipToByte(rms);
+ color2[x_color] = ClipToByte(best_u8);
+ color2[y1_color] = ClipToByte(m1 * best_u8 + c1 + 0.5);
+ color2[y2_color] = ClipToByte(m2 * best_u8 + c2 + 0.5);
+ color2[L_ALPHA_CHANNEL] = ClipToByte(rms);
+ } else {
+ // There is only one color.
+ color1[COLOR_RED] = ClipToByte(red_stats.median());
+ color1[COLOR_GREEN] = ClipToByte(green_stats.median());
+ color1[COLOR_BLUE] = ClipToByte(blue_stats.median());
+ color1[L_ALPHA_CHANNEL] = 0;
+ memcpy(color2, color1, 4);
+ }
+ if (color_map1 != nullptr) {
+ pixSetInRectArbitrary(color_map1, scaled_box,
+ ComposeRGB(color1[COLOR_RED],
+ color1[COLOR_GREEN],
+ color1[COLOR_BLUE]));
+ pixSetInRectArbitrary(color_map2, scaled_box,
+ ComposeRGB(color2[COLOR_RED],
+ color2[COLOR_GREEN],
+ color2[COLOR_BLUE]));
+ pixSetInRectArbitrary(rms_map, scaled_box, color1[L_ALPHA_CHANNEL]);
+ }
+ pixDestroy(&scaled);
+ boxDestroy(&scaled_box);
+}
+
+// ================ CUTTING POLYGONAL IMAGES FROM A RECTANGLE ================
+// The following functions are responsible for cutting a polygonal image from
+// a rectangle: CountPixelsInRotatedBox, AttemptToShrinkBox, CutChunkFromParts
+// with DivideImageIntoParts as the master.
+// Problem statement:
+// We start with a single connected component from the image mask: we get
+// a Pix of the component, and its location on the page (im_box).
+// The objective of cutting a polygonal image from its rectangle is to avoid
+// interfering text, but not text that completely overlaps the image.
+// ------------------------------ ------------------------------
+// | Single input partition | | 1 Cut up output partitions |
+// | | ------------------------------
+// Av|oid | Avoid | |
+// | | |________________________|
+// Int|erfering | Interfering | |
+// | | _____|__________________|
+// T|ext | Text | |
+// | Text-on-image | | Text-on-image |
+// ------------------------------ --------------------------
+// DivideImageIntoParts does this by building a ColPartition_LIST (not in the
+// grid) with each ColPartition representing one of the rectangles needed,
+// starting with a single rectangle for the whole image component, and cutting
+// bits out of it with CutChunkFromParts as needed to avoid text. The output
+// ColPartitions are supposed to be ordered from top to bottom.
+
+// The problem is complicated by the fact that we have rotated the coordinate
+// system to make text lines horizontal, so if we need to look at the component
+// image, we have to rotate the coordinates. Throughout the functions in this
+// section im_box is the rectangle representing the image component in the
+// rotated page coordinates (where we are building our output ColPartitions),
+// rotation is the rotation that we used to get there, and rerotation is the
+// rotation required to get back to original page image coordinates.
+// To get to coordinates in the component image, pix, we rotate the im_box,
+// the point we want to locate, and subtract the rotated point from the top-left
+// of the rotated im_box.
+// im_box is therefore essential to calculating coordinates within the pix.
+
+// Returns true if there are no black pixels in between the boxes.
+// The im_box must represent the bounding box of the pix in tesseract
+// coordinates, which may be negative, due to rotations to make the textlines
+// horizontal. The boxes are rotated by rotation, which should undo such
+// rotations, before mapping them onto the pix.
+bool ImageFind::BlankImageInBetween(const TBOX& box1, const TBOX& box2,
+ const TBOX& im_box, const FCOORD& rotation,
+ Pix* pix) {
+ TBOX search_box(box1);
+ search_box += box2;
+ if (box1.x_gap(box2) >= box1.y_gap(box2)) {
+ if (box1.x_gap(box2) <= 0)
+ return true;
+ search_box.set_left(std::min(box1.right(), box2.right()));
+ search_box.set_right(std::max(box1.left(), box2.left()));
+ } else {
+ if (box1.y_gap(box2) <= 0)
+ return true;
+ search_box.set_top(std::max(box1.bottom(), box2.bottom()));
+ search_box.set_bottom(std::min(box1.top(), box2.top()));
+ }
+ return CountPixelsInRotatedBox(search_box, im_box, rotation, pix) == 0;
+}
+
+// Returns the number of pixels in box in the pix.
+// rotation, pix and im_box are defined in the large comment above.
+int ImageFind::CountPixelsInRotatedBox(TBOX box, const TBOX& im_box,
+ const FCOORD& rotation, Pix* pix) {
+ // Intersect it with the image box.
+ box &= im_box; // This is in-place box intersection.
+ if (box.null_box())
+ return 0;
+ box.rotate(rotation);
+ TBOX rotated_im_box(im_box);
+ rotated_im_box.rotate(rotation);
+ Pix* rect_pix = pixCreate(box.width(), box.height(), 1);
+ pixRasterop(rect_pix, 0, 0, box.width(), box.height(),
+ PIX_SRC, pix, box.left() - rotated_im_box.left(),
+ rotated_im_box.top() - box.top());
+ l_int32 result;
+ pixCountPixels(rect_pix, &result, nullptr);
+ pixDestroy(&rect_pix);
+ return result;
+}
+
+// The box given by slice contains some black pixels, but not necessarily
+// over the whole box. Shrink the x bounds of slice, but not the y bounds
+// until there is at least one black pixel in the outermost columns.
+// rotation, rerotation, pix and im_box are defined in the large comment above.
+static void AttemptToShrinkBox(const FCOORD& rotation, const FCOORD& rerotation,
+ const TBOX& im_box, Pix* pix, TBOX* slice) {
+ TBOX rotated_box(*slice);
+ rotated_box.rotate(rerotation);
+ TBOX rotated_im_box(im_box);
+ rotated_im_box.rotate(rerotation);
+ int left = rotated_box.left() - rotated_im_box.left();
+ int right = rotated_box.right() - rotated_im_box.left();
+ int top = rotated_im_box.top() - rotated_box.top();
+ int bottom = rotated_im_box.top() - rotated_box.bottom();
+ ImageFind::BoundsWithinRect(pix, &left, &top, &right, &bottom);
+ top = rotated_im_box.top() - top;
+ bottom = rotated_im_box.top() - bottom;
+ left += rotated_im_box.left();
+ right += rotated_im_box.left();
+ rotated_box.set_to_given_coords(left, bottom, right, top);
+ rotated_box.rotate(rotation);
+ slice->set_left(rotated_box.left());
+ slice->set_right(rotated_box.right());
+}
+
+// The meat of cutting a polygonal image around text.
+// This function covers the general case of cutting a box out of a box
+// as shown:
+// Input Output
+// ------------------------------ ------------------------------
+// | Single input partition | | 1 Cut up output partitions |
+// | | ------------------------------
+// | ---------- | --------- ----------
+// | | box | | | 2 | box | 3 |
+// | | | | | | is cut | |
+// | ---------- | --------- out ----------
+// | | ------------------------------
+// | | | 4 |
+// ------------------------------ ------------------------------
+// In the context that this function is used, at most 3 of the above output
+// boxes will be created, as the overlapping box is never contained by the
+// input.
+// The above cutting operation is executed for each element of part_list that
+// is overlapped by the input box. Each modified ColPartition is replaced
+// in place in the list by the output of the cutting operation in the order
+// shown above, so iff no holes are ever created, the output will be in
+// top-to-bottom order, but in extreme cases, hole creation is possible.
+// In such cases, the output order may cause strange block polygons.
+// rotation, rerotation, pix and im_box are defined in the large comment above.
+static void CutChunkFromParts(const TBOX& box, const TBOX& im_box,
+ const FCOORD& rotation, const FCOORD& rerotation,
+ Pix* pix, ColPartition_LIST* part_list) {
+ ASSERT_HOST(!part_list->empty());
+ ColPartition_IT part_it(part_list);
+ do {
+ ColPartition* part = part_it.data();
+ TBOX part_box = part->bounding_box();
+ if (part_box.overlap(box)) {
+ // This part must be cut and replaced with the remains. There are
+ // up to 4 pieces to be made. Start with the first one and use
+ // add_before_stay_put. For each piece if it has no black pixels
+ // left, just don't make the box.
+ // Above box.
+ if (box.top() < part_box.top()) {
+ TBOX slice(part_box);
+ slice.set_bottom(box.top());
+ if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation,
+ pix) > 0) {
+ AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice);
+ part_it.add_before_stay_put(
+ ColPartition::FakePartition(slice, PT_UNKNOWN, BRT_POLYIMAGE,
+ BTFT_NONTEXT));
+ }
+ }
+ // Left of box.
+ if (box.left() > part_box.left()) {
+ TBOX slice(part_box);
+ slice.set_right(box.left());
+ if (box.top() < part_box.top())
+ slice.set_top(box.top());
+ if (box.bottom() > part_box.bottom())
+ slice.set_bottom(box.bottom());
+ if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation,
+ pix) > 0) {
+ AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice);
+ part_it.add_before_stay_put(
+ ColPartition::FakePartition(slice, PT_UNKNOWN, BRT_POLYIMAGE,
+ BTFT_NONTEXT));
+ }
+ }
+ // Right of box.
+ if (box.right() < part_box.right()) {
+ TBOX slice(part_box);
+ slice.set_left(box.right());
+ if (box.top() < part_box.top())
+ slice.set_top(box.top());
+ if (box.bottom() > part_box.bottom())
+ slice.set_bottom(box.bottom());
+ if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation,
+ pix) > 0) {
+ AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice);
+ part_it.add_before_stay_put(
+ ColPartition::FakePartition(slice, PT_UNKNOWN, BRT_POLYIMAGE,
+ BTFT_NONTEXT));
+ }
+ }
+ // Below box.
+ if (box.bottom() > part_box.bottom()) {
+ TBOX slice(part_box);
+ slice.set_top(box.bottom());
+ if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation,
+ pix) > 0) {
+ AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice);
+ part_it.add_before_stay_put(
+ ColPartition::FakePartition(slice, PT_UNKNOWN, BRT_POLYIMAGE,
+ BTFT_NONTEXT));
+ }
+ }
+ part->DeleteBoxes();
+ delete part_it.extract();
+ }
+ part_it.forward();
+ } while (!part_it.at_first());
+}
+
+// Starts with the bounding box of the image component and cuts it up
+// so that it doesn't intersect text where possible.
+// Strong fully contained horizontal text is marked as text on image,
+// and does not cause a division of the image.
+// For more detail see the large comment above on cutting polygonal images
+// from a rectangle.
+// rotation, rerotation, pix and im_box are defined in the large comment above.
+static void DivideImageIntoParts(const TBOX& im_box, const FCOORD& rotation,
+ const FCOORD& rerotation, Pix* pix,
+ ColPartitionGridSearch* rectsearch,
+ ColPartition_LIST* part_list) {
+ // Add the full im_box partition to the list to begin with.
+ ColPartition* pix_part = ColPartition::FakePartition(im_box, PT_UNKNOWN,
+ BRT_RECTIMAGE,
+ BTFT_NONTEXT);
+ ColPartition_IT part_it(part_list);
+ part_it.add_after_then_move(pix_part);
+
+ rectsearch->StartRectSearch(im_box);
+ ColPartition* part;
+ while ((part = rectsearch->NextRectSearch()) != nullptr) {
+ TBOX part_box = part->bounding_box();
+ if (part_box.contains(im_box) && part->flow() >= BTFT_CHAIN) {
+ // This image is completely covered by an existing text partition.
+ for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
+ ColPartition* pix_part = part_it.extract();
+ pix_part->DeleteBoxes();
+ delete pix_part;
+ }
+ } else if (part->flow() == BTFT_STRONG_CHAIN) {
+ // Text intersects the box.
+ TBOX overlap_box = part_box.intersection(im_box);
+ // Intersect it with the image box.
+ int black_area = ImageFind::CountPixelsInRotatedBox(overlap_box, im_box,
+ rerotation, pix);
+ if (black_area * 2 < part_box.area() || !im_box.contains(part_box)) {
+ // Eat a piece out of the image.
+ // Pad it so that pieces eaten out look decent.
+ int padding = part->blob_type() == BRT_VERT_TEXT
+ ? part_box.width() : part_box.height();
+ part_box.set_top(part_box.top() + padding / 2);
+ part_box.set_bottom(part_box.bottom() - padding / 2);
+ CutChunkFromParts(part_box, im_box, rotation, rerotation,
+ pix, part_list);
+ } else {
+ // Strong overlap with the black area, so call it text on image.
+ part->set_flow(BTFT_TEXT_ON_IMAGE);
+ }
+ }
+ if (part_list->empty()) {
+ break;
+ }
+ }
+}
+
+// Search for the rightmost text that overlaps vertically and is to the left
+// of the given box, but within the given left limit.
+static int ExpandImageLeft(const TBOX& box, int left_limit,
+ ColPartitionGrid* part_grid) {
+ ColPartitionGridSearch search(part_grid);
+ ColPartition* part;
+ // Search right to left for any text that overlaps.
+ search.StartSideSearch(box.left(), box.bottom(), box.top());
+ while ((part = search.NextSideSearch(true)) != nullptr) {
+ if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
+ const TBOX& part_box(part->bounding_box());
+ if (part_box.y_gap(box) < 0) {
+ if (part_box.right() > left_limit && part_box.right() < box.left())
+ left_limit = part_box.right();
+ break;
+ }
+ }
+ }
+ if (part != nullptr) {
+ // Search for the nearest text up to the one we already found.
+ TBOX search_box(left_limit, box.bottom(), box.left(), box.top());
+ search.StartRectSearch(search_box);
+ while ((part = search.NextRectSearch()) != nullptr) {
+ if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
+ const TBOX& part_box(part->bounding_box());
+ if (part_box.y_gap(box) < 0) {
+ if (part_box.right() > left_limit && part_box.right() < box.left()) {
+ left_limit = part_box.right();
+ }
+ }
+ }
+ }
+ }
+ return left_limit;
+}
+
+// Search for the leftmost text that overlaps vertically and is to the right
+// of the given box, but within the given right limit.
+static int ExpandImageRight(const TBOX& box, int right_limit,
+ ColPartitionGrid* part_grid) {
+ ColPartitionGridSearch search(part_grid);
+ ColPartition* part;
+ // Search left to right for any text that overlaps.
+ search.StartSideSearch(box.right(), box.bottom(), box.top());
+ while ((part = search.NextSideSearch(false)) != nullptr) {
+ if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
+ const TBOX& part_box(part->bounding_box());
+ if (part_box.y_gap(box) < 0) {
+ if (part_box.left() < right_limit && part_box.left() > box.right())
+ right_limit = part_box.left();
+ break;
+ }
+ }
+ }
+ if (part != nullptr) {
+ // Search for the nearest text up to the one we already found.
+ TBOX search_box(box.left(), box.bottom(), right_limit, box.top());
+ search.StartRectSearch(search_box);
+ while ((part = search.NextRectSearch()) != nullptr) {
+ if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
+ const TBOX& part_box(part->bounding_box());
+ if (part_box.y_gap(box) < 0) {
+ if (part_box.left() < right_limit && part_box.left() > box.right())
+ right_limit = part_box.left();
+ }
+ }
+ }
+ }
+ return right_limit;
+}
+
+// Search for the topmost text that overlaps horizontally and is below
+// the given box, but within the given bottom limit.
+static int ExpandImageBottom(const TBOX& box, int bottom_limit,
+ ColPartitionGrid* part_grid) {
+ ColPartitionGridSearch search(part_grid);
+ ColPartition* part;
+ // Search right to left for any text that overlaps.
+ search.StartVerticalSearch(box.left(), box.right(), box.bottom());
+ while ((part = search.NextVerticalSearch(true)) != nullptr) {
+ if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
+ const TBOX& part_box(part->bounding_box());
+ if (part_box.x_gap(box) < 0) {
+ if (part_box.top() > bottom_limit && part_box.top() < box.bottom())
+ bottom_limit = part_box.top();
+ break;
+ }
+ }
+ }
+ if (part != nullptr) {
+ // Search for the nearest text up to the one we already found.
+ TBOX search_box(box.left(), bottom_limit, box.right(), box.bottom());
+ search.StartRectSearch(search_box);
+ while ((part = search.NextRectSearch()) != nullptr) {
+ if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
+ const TBOX& part_box(part->bounding_box());
+ if (part_box.x_gap(box) < 0) {
+ if (part_box.top() > bottom_limit && part_box.top() < box.bottom())
+ bottom_limit = part_box.top();
+ }
+ }
+ }
+ }
+ return bottom_limit;
+}
+
+// Search for the bottommost text that overlaps horizontally and is above
+// the given box, but within the given top limit.
+static int ExpandImageTop(const TBOX& box, int top_limit,
+ ColPartitionGrid* part_grid) {
+ ColPartitionGridSearch search(part_grid);
+ ColPartition* part;
+ // Search right to left for any text that overlaps.
+ search.StartVerticalSearch(box.left(), box.right(), box.top());
+ while ((part = search.NextVerticalSearch(false)) != nullptr) {
+ if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
+ const TBOX& part_box(part->bounding_box());
+ if (part_box.x_gap(box) < 0) {
+ if (part_box.bottom() < top_limit && part_box.bottom() > box.top())
+ top_limit = part_box.bottom();
+ break;
+ }
+ }
+ }
+ if (part != nullptr) {
+ // Search for the nearest text up to the one we already found.
+ TBOX search_box(box.left(), box.top(), box.right(), top_limit);
+ search.StartRectSearch(search_box);
+ while ((part = search.NextRectSearch()) != nullptr) {
+ if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) {
+ const TBOX& part_box(part->bounding_box());
+ if (part_box.x_gap(box) < 0) {
+ if (part_box.bottom() < top_limit && part_box.bottom() > box.top())
+ top_limit = part_box.bottom();
+ }
+ }
+ }
+ }
+ return top_limit;
+}
+
+// Expands the image box in the given direction until it hits text,
+// limiting the expansion to the given limit box, returning the result
+// in the expanded box, and
+// returning the increase in area resulting from the expansion.
+static int ExpandImageDir(BlobNeighbourDir dir, const TBOX& im_box,
+ const TBOX& limit_box,
+ ColPartitionGrid* part_grid, TBOX* expanded_box) {
+ *expanded_box = im_box;
+ switch (dir) {
+ case BND_LEFT:
+ expanded_box->set_left(ExpandImageLeft(im_box, limit_box.left(),
+ part_grid));
+ break;
+ case BND_RIGHT:
+ expanded_box->set_right(ExpandImageRight(im_box, limit_box.right(),
+ part_grid));
+ break;
+ case BND_ABOVE:
+ expanded_box->set_top(ExpandImageTop(im_box, limit_box.top(), part_grid));
+ break;
+ case BND_BELOW:
+ expanded_box->set_bottom(ExpandImageBottom(im_box, limit_box.bottom(),
+ part_grid));
+ break;
+ default:
+ return 0;
+ }
+ return expanded_box->area() - im_box.area();
+}
+
+// Expands the image partition into any non-text until it touches text.
+// The expansion proceeds in the order of increasing increase in area
+// as a heuristic to find the best rectangle by expanding in the most
+// constrained direction first.
+static void MaximalImageBoundingBox(ColPartitionGrid* part_grid, TBOX* im_box) {
+ bool dunnit[BND_COUNT];
+ memset(dunnit, 0, sizeof(dunnit));
+ TBOX limit_box(part_grid->bleft().x(), part_grid->bleft().y(),
+ part_grid->tright().x(), part_grid->tright().y());
+ TBOX text_box(*im_box);
+ for (int iteration = 0; iteration < BND_COUNT; ++iteration) {
+ // Find the direction with least area increase.
+ int best_delta = -1;
+ BlobNeighbourDir best_dir = BND_LEFT;
+ TBOX expanded_boxes[BND_COUNT];
+ for (int dir = 0; dir < BND_COUNT; ++dir) {
+ auto bnd = static_cast<BlobNeighbourDir>(dir);
+ if (!dunnit[bnd]) {
+ TBOX expanded_box;
+ int area_delta = ExpandImageDir(bnd, text_box, limit_box, part_grid,
+ &expanded_boxes[bnd]);
+ if (best_delta < 0 || area_delta < best_delta) {
+ best_delta = area_delta;
+ best_dir = bnd;
+ }
+ }
+ }
+ // Run the best and remember the direction.
+ dunnit[best_dir] = true;
+ text_box = expanded_boxes[best_dir];
+ }
+ *im_box = text_box;
+}
+
+// Helper deletes the given partition but first marks up all the blobs as
+// noise, so they get deleted later, and disowns them.
+// If the initial type of the partition is image, then it actually deletes
+// the blobs, as the partition owns them in that case.
+static void DeletePartition(ColPartition* part) {
+ BlobRegionType type = part->blob_type();
+ if (type == BRT_RECTIMAGE || type == BRT_POLYIMAGE) {
+ // The partition owns the boxes of these types, so just delete them.
+ part->DeleteBoxes(); // From a previous iteration.
+ } else {
+ // Once marked, the blobs will be swept up by TidyBlobs.
+ part->set_flow(BTFT_NONTEXT);
+ part->set_blob_type(BRT_NOISE);
+ part->SetBlobTypes();
+ part->DisownBoxes(); // Created before FindImagePartitions.
+ }
+ delete part;
+}
+
+// The meat of joining fragmented images and consuming ColPartitions of
+// uncertain type.
+// *part_ptr is an input/output BRT_RECTIMAGE ColPartition that is to be
+// expanded to consume overlapping and nearby ColPartitions of uncertain type
+// and other BRT_RECTIMAGE partitions, but NOT to be expanded beyond
+// max_image_box. *part_ptr is NOT in the part_grid.
+// rectsearch is already constructed on the part_grid, and is used for
+// searching for overlapping and nearby ColPartitions.
+// ExpandImageIntoParts is called iteratively until it returns false. Each
+// time it absorbs the nearest non-contained candidate, and everything that
+// is fully contained within part_ptr's bounding box.
+// TODO(rays) what if it just eats everything inside max_image_box in one go?
+static bool ExpandImageIntoParts(const TBOX& max_image_box,
+ ColPartitionGridSearch* rectsearch,
+ ColPartitionGrid* part_grid,
+ ColPartition** part_ptr) {
+ ColPartition* image_part = *part_ptr;
+ TBOX im_part_box = image_part->bounding_box();
+ if (textord_tabfind_show_images > 1) {
+ tprintf("Searching for merge with image part:");
+ im_part_box.print();
+ tprintf("Text box=");
+ max_image_box.print();
+ }
+ rectsearch->StartRectSearch(max_image_box);
+ ColPartition* part;
+ ColPartition* best_part = nullptr;
+ int best_dist = 0;
+ while ((part = rectsearch->NextRectSearch()) != nullptr) {
+ if (textord_tabfind_show_images > 1) {
+ tprintf("Considering merge with part:");
+ part->Print();
+ if (im_part_box.contains(part->bounding_box()))
+ tprintf("Fully contained\n");
+ else if (!max_image_box.contains(part->bounding_box()))
+ tprintf("Not within text box\n");
+ else if (part->flow() == BTFT_STRONG_CHAIN)
+ tprintf("Too strong text\n");
+ else
+ tprintf("Real candidate\n");
+ }
+ if (part->flow() == BTFT_STRONG_CHAIN ||
+ part->flow() == BTFT_TEXT_ON_IMAGE ||
+ part->blob_type() == BRT_POLYIMAGE)
+ continue;
+ TBOX box = part->bounding_box();
+ if (max_image_box.contains(box) && part->blob_type() != BRT_NOISE) {
+ if (im_part_box.contains(box)) {
+ // Eat it completely.
+ rectsearch->RemoveBBox();
+ DeletePartition(part);
+ continue;
+ }
+ int x_dist = std::max(0, box.x_gap(im_part_box));
+ int y_dist = std::max(0, box.y_gap(im_part_box));
+ int dist = x_dist * x_dist + y_dist * y_dist;
+ if (dist > box.area() || dist > im_part_box.area())
+ continue; // Not close enough.
+ if (best_part == nullptr || dist < best_dist) {
+ // We keep the nearest qualifier, which is not necessarily the nearest.
+ best_part = part;
+ best_dist = dist;
+ }
+ }
+ }
+ if (best_part != nullptr) {
+ // It needs expanding. We can do it without touching text.
+ TBOX box = best_part->bounding_box();
+ if (textord_tabfind_show_images > 1) {
+ tprintf("Merging image part:");
+ im_part_box.print();
+ tprintf("with part:");
+ box.print();
+ }
+ im_part_box += box;
+ *part_ptr = ColPartition::FakePartition(im_part_box, PT_UNKNOWN,
+ BRT_RECTIMAGE,
+ BTFT_NONTEXT);
+ DeletePartition(image_part);
+ part_grid->RemoveBBox(best_part);
+ DeletePartition(best_part);
+ rectsearch->RepositionIterator();
+ return true;
+ }
+ return false;
+}
+
+// Helper function to compute the overlap area between the box and the
+// given list of partitions.
+static int IntersectArea(const TBOX& box, ColPartition_LIST* part_list) {
+ int intersect_area = 0;
+ ColPartition_IT part_it(part_list);
+ // Iterate the parts and subtract intersecting area.
+ for (part_it.mark_cycle_pt(); !part_it.cycled_list();
+ part_it.forward()) {
+ ColPartition* image_part = part_it.data();
+ TBOX intersect = box.intersection(image_part->bounding_box());
+ intersect_area += intersect.area();
+ }
+ return intersect_area;
+}
+
+// part_list is a set of ColPartitions representing a polygonal image, and
+// im_box is the union of the bounding boxes of all the parts in part_list.
+// Tests whether part is to be consumed by the polygonal image.
+// Returns true if part is weak text and more than half of its area is
+// intersected by parts from the part_list, and it is contained within im_box.
+static bool TestWeakIntersectedPart(const TBOX& im_box,
+ ColPartition_LIST* part_list,
+ ColPartition* part) {
+ if (part->flow() < BTFT_STRONG_CHAIN) {
+ // A weak partition intersects the box.
+ const TBOX& part_box = part->bounding_box();
+ if (im_box.contains(part_box)) {
+ int area = part_box.area();
+ int intersect_area = IntersectArea(part_box, part_list);
+ if (area < 2 * intersect_area) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+// A rectangular or polygonal image has been completed, in part_list, bounding
+// box in im_box. We want to eliminate weak text or other uncertain partitions
+// (basically anything that is not BRT_STRONG_CHAIN or better) from both the
+// part_grid and the big_parts list that are contained within im_box and
+// overlapped enough by the possibly polygonal image.
+static void EliminateWeakParts(const TBOX& im_box,
+ ColPartitionGrid* part_grid,
+ ColPartition_LIST* big_parts,
+ ColPartition_LIST* part_list) {
+ ColPartitionGridSearch rectsearch(part_grid);
+ ColPartition* part;
+ rectsearch.StartRectSearch(im_box);
+ while ((part = rectsearch.NextRectSearch()) != nullptr) {
+ if (TestWeakIntersectedPart(im_box, part_list, part)) {
+ BlobRegionType type = part->blob_type();
+ if (type == BRT_POLYIMAGE || type == BRT_RECTIMAGE) {
+ rectsearch.RemoveBBox();
+ DeletePartition(part);
+ } else {
+ // The part is mostly covered, so mark it. Non-image partitions are
+ // kept hanging around to mark the image for pass2
+ part->set_flow(BTFT_NONTEXT);
+ part->set_blob_type(BRT_NOISE);
+ part->SetBlobTypes();
+ }
+ }
+ }
+ ColPartition_IT big_it(big_parts);
+ for (big_it.mark_cycle_pt(); !big_it.cycled_list(); big_it.forward()) {
+ part = big_it.data();
+ if (TestWeakIntersectedPart(im_box, part_list, part)) {
+ // Once marked, the blobs will be swept up by TidyBlobs.
+ DeletePartition(big_it.extract());
+ }
+ }
+}
+
+// Helper scans for good text partitions overlapping the given box.
+// If there are no good text partitions overlapping an expanded box, then
+// the box is expanded, otherwise, the original box is returned.
+// If good text overlaps the box, true is returned.
+static bool ScanForOverlappingText(ColPartitionGrid* part_grid, TBOX* box) {
+ ColPartitionGridSearch rectsearch(part_grid);
+ TBOX padded_box(*box);
+ padded_box.pad(kNoisePadding, kNoisePadding);
+ rectsearch.StartRectSearch(padded_box);
+ ColPartition* part;
+ bool any_text_in_padded_rect = false;
+ while ((part = rectsearch.NextRectSearch()) != nullptr) {
+ if (part->flow() == BTFT_CHAIN ||
+ part->flow() == BTFT_STRONG_CHAIN) {
+ // Text intersects the box.
+ any_text_in_padded_rect = true;
+ const TBOX& part_box = part->bounding_box();
+ if (box->overlap(part_box)) {
+ return true;
+ }
+ }
+ }
+ if (!any_text_in_padded_rect)
+ *box = padded_box;
+ return false;
+}
+
+// Renders the boxes of image parts from the supplied list onto the image_pix,
+// except where they interfere with existing strong text in the part_grid,
+// and then deletes them.
+// Box coordinates are rotated by rerotate to match the image.
+static void MarkAndDeleteImageParts(const FCOORD& rerotate,
+ ColPartitionGrid* part_grid,
+ ColPartition_LIST* image_parts,
+ Pix* image_pix) {
+ if (image_pix == nullptr)
+ return;
+ int imageheight = pixGetHeight(image_pix);
+ ColPartition_IT part_it(image_parts);
+ for (; !part_it.empty(); part_it.forward()) {
+ ColPartition* part = part_it.extract();
+ TBOX part_box = part->bounding_box();
+ BlobRegionType type = part->blob_type();
+ if (!ScanForOverlappingText(part_grid, &part_box) ||
+ type == BRT_RECTIMAGE || type == BRT_POLYIMAGE) {
+ // Mark the box on the image.
+ // All coords need to be rotated to match the image.
+ part_box.rotate(rerotate);
+ int left = part_box.left();
+ int top = part_box.top();
+ pixRasterop(image_pix, left, imageheight - top,
+ part_box.width(), part_box.height(), PIX_SET, nullptr, 0, 0);
+ }
+ DeletePartition(part);
+ }
+}
+
+// Locates all the image partitions in the part_grid, that were found by a
+// previous call to FindImagePartitions, marks them in the image_mask,
+// removes them from the grid, and deletes them. This makes it possible to
+// call FindImagePartitions again to produce less broken-up and less
+// overlapping image partitions.
+// rerotation specifies how to rotate the partition coords to match
+// the image_mask, since this function is used after orientation correction.
+void ImageFind::TransferImagePartsToImageMask(const FCOORD& rerotation,
+ ColPartitionGrid* part_grid,
+ Pix* image_mask) {
+ // Extract the noise parts from the grid and put them on a temporary list.
+ ColPartition_LIST parts_list;
+ ColPartition_IT part_it(&parts_list);
+ ColPartitionGridSearch gsearch(part_grid);
+ gsearch.StartFullSearch();
+ ColPartition* part;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ BlobRegionType type = part->blob_type();
+ if (type == BRT_NOISE || type == BRT_RECTIMAGE || type == BRT_POLYIMAGE) {
+ part_it.add_after_then_move(part);
+ gsearch.RemoveBBox();
+ }
+ }
+ // Render listed noise partitions to the image mask.
+ MarkAndDeleteImageParts(rerotation, part_grid, &parts_list, image_mask);
+}
+
+// Removes and deletes all image partitions that are too small to be worth
+// keeping. We have to do this as a separate phase after creating the image
+// partitions as the small images are needed to join the larger ones together.
+static void DeleteSmallImages(ColPartitionGrid* part_grid) {
+ if (part_grid != nullptr) return;
+ ColPartitionGridSearch gsearch(part_grid);
+ gsearch.StartFullSearch();
+ ColPartition* part;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ // Only delete rectangular images, since if it became a poly image, it
+ // is more evidence that it is somehow important.
+ if (part->blob_type() == BRT_RECTIMAGE) {
+ const TBOX& part_box = part->bounding_box();
+ if (part_box.width() < kMinImageFindSize ||
+ part_box.height() < kMinImageFindSize) {
+ // It is too small to keep. Just make it disappear.
+ gsearch.RemoveBBox();
+ DeletePartition(part);
+ }
+ }
+ }
+}
+
+// Runs a CC analysis on the image_pix mask image, and creates
+// image partitions from them, cutting out strong text, and merging with
+// nearby image regions such that they don't interfere with text.
+// Rotation and rerotation specify how to rotate image coords to match
+// the blob and partition coords and back again.
+// The input/output part_grid owns all the created partitions, and
+// the partitions own all the fake blobs that belong in the partitions.
+// Since the other blobs in the other partitions will be owned by the block,
+// ColPartitionGrid::ReTypeBlobs must be called afterwards to fix this
+// situation and collect the image blobs.
+void ImageFind::FindImagePartitions(Pix* image_pix, const FCOORD& rotation,
+ const FCOORD& rerotation, TO_BLOCK* block,
+ TabFind* tab_grid, DebugPixa* pixa_debug,
+ ColPartitionGrid* part_grid,
+ ColPartition_LIST* big_parts) {
+ int imageheight = pixGetHeight(image_pix);
+ Boxa* boxa;
+ Pixa* pixa;
+ ConnCompAndRectangularize(image_pix, pixa_debug, &boxa, &pixa);
+ // Iterate the connected components in the image regions mask.
+ int nboxes = 0;
+ if (boxa != nullptr && pixa != nullptr) nboxes = boxaGetCount(boxa);
+ for (int i = 0; i < nboxes; ++i) {
+ l_int32 x, y, width, height;
+ boxaGetBoxGeometry(boxa, i, &x, &y, &width, &height);
+ Pix* pix = pixaGetPix(pixa, i, L_CLONE);
+ TBOX im_box(x, imageheight -y - height, x + width, imageheight - y);
+ im_box.rotate(rotation); // Now matches all partitions and blobs.
+ ColPartitionGridSearch rectsearch(part_grid);
+ rectsearch.SetUniqueMode(true);
+ ColPartition_LIST part_list;
+ DivideImageIntoParts(im_box, rotation, rerotation, pix,
+ &rectsearch, &part_list);
+ if (textord_tabfind_show_images && pixa_debug != nullptr) {
+ pixa_debug->AddPix(pix, "ImageComponent");
+ tprintf("Component has %d parts\n", part_list.length());
+ }
+ pixDestroy(&pix);
+ if (!part_list.empty()) {
+ ColPartition_IT part_it(&part_list);
+ if (part_list.singleton()) {
+ // We didn't have to chop it into a polygon to fit around text, so
+ // try expanding it to merge fragmented image parts, as long as it
+ // doesn't touch strong text.
+ ColPartition* part = part_it.extract();
+ TBOX text_box(im_box);
+ MaximalImageBoundingBox(part_grid, &text_box);
+ while (ExpandImageIntoParts(text_box, &rectsearch, part_grid, &part));
+ part_it.set_to_list(&part_list);
+ part_it.add_after_then_move(part);
+ im_box = part->bounding_box();
+ }
+ EliminateWeakParts(im_box, part_grid, big_parts, &part_list);
+ // Iterate the part_list and put the parts into the grid.
+ for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
+ ColPartition* image_part = part_it.extract();
+ im_box = image_part->bounding_box();
+ part_grid->InsertBBox(true, true, image_part);
+ if (!part_it.at_last()) {
+ ColPartition* neighbour = part_it.data_relative(1);
+ image_part->AddPartner(false, neighbour);
+ neighbour->AddPartner(true, image_part);
+ }
+ }
+ }
+ }
+ boxaDestroy(&boxa);
+ pixaDestroy(&pixa);
+ DeleteSmallImages(part_grid);
+#ifndef GRAPHICS_DISABLED
+ if (textord_tabfind_show_images) {
+ ScrollView* images_win_ = part_grid->MakeWindow(1000, 400, "With Images");
+ part_grid->DisplayBoxes(images_win_);
+ }
+#endif
+}
+
+} // namespace tesseract.
diff --git a/tesseract/src/textord/imagefind.h b/tesseract/src/textord/imagefind.h
new file mode 100644
index 00000000..57be6990
--- /dev/null
+++ b/tesseract/src/textord/imagefind.h
@@ -0,0 +1,159 @@
+///////////////////////////////////////////////////////////////////////
+// File: imagefind.h
+// Description: Class to find image and drawing regions in an image
+// and create a corresponding list of empty blobs.
+// Author: Ray Smith
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TEXTORD_IMAGEFIND_H_
+#define TESSERACT_TEXTORD_IMAGEFIND_H_
+
+#include "debugpixa.h"
+
+#include <cstdint>
+
+struct Boxa;
+struct Pix;
+struct Pixa;
+
+namespace tesseract {
+
+class ColPartitionGrid;
+class ColPartition_LIST;
+class TabFind;
+class TBOX;
+class FCOORD;
+class TO_BLOCK;
+class BLOBNBOX_LIST;
+
+// The ImageFind class is a simple static function wrapper class that
+// exposes the FindImages function and some useful helper functions.
+class ImageFind {
+ public:
+ // Finds image regions within the BINARY source pix (page image) and returns
+ // the image regions as a mask image.
+ // The returned pix may be nullptr, meaning no images found.
+ // If not nullptr, it must be PixDestroyed by the caller.
+ // If textord_tabfind_show_images, debug images are appended to pixa_debug.
+ static Pix* FindImages(Pix* pix, DebugPixa* pixa_debug);
+
+ // Generates a Boxa, Pixa pair from the input binary (image mask) pix,
+ // analogous to pixConnComp, except that connected components which are nearly
+ // rectangular are replaced with solid rectangles.
+ // The returned boxa, pixa may be nullptr, meaning no images found.
+ // If not nullptr, they must be destroyed by the caller.
+ // Resolution of pix should match the source image (Tesseract::pix_binary_)
+ // so the output coordinate systems match.
+ static void ConnCompAndRectangularize(Pix* pix, DebugPixa* pixa_debug,
+ Boxa** boxa, Pixa** pixa);
+
+ // Returns true if there is a rectangle in the source pix, such that all
+ // pixel rows and column slices outside of it have less than
+ // min_fraction of the pixels black, and within max_skew_gradient fraction
+ // of the pixels on the inside, there are at least max_fraction of the
+ // pixels black. In other words, the inside of the rectangle looks roughly
+ // rectangular, and the outside of it looks like extra bits.
+ // On return, the rectangle is defined by x_start, y_start, x_end and y_end.
+ // Note: the algorithm is iterative, allowing it to slice off pixels from
+ // one edge, allowing it to then slice off more pixels from another edge.
+ static bool pixNearlyRectangular(Pix* pix,
+ double min_fraction, double max_fraction,
+ double max_skew_gradient,
+ int* x_start, int* y_start,
+ int* x_end, int* y_end);
+
+ // Given an input pix, and a bounding rectangle, the sides of the rectangle
+ // are shrunk inwards until they bound any black pixels found within the
+ // original rectangle. Returns false if the rectangle contains no black
+ // pixels at all.
+ static bool BoundsWithinRect(Pix* pix, int* x_start, int* y_start,
+ int* x_end, int* y_end);
+
+ // Given a point in 3-D (RGB) space, returns the squared Euclidean distance
+ // of the point from the given line, defined by a pair of points in the 3-D
+ // (RGB) space, line1 and line2.
+ static double ColorDistanceFromLine(const uint8_t* line1, const uint8_t* line2,
+ const uint8_t* point);
+
+ // Returns the leptonica combined code for the given RGB triplet.
+ static uint32_t ComposeRGB(uint32_t r, uint32_t g, uint32_t b);
+
+ // Returns the input value clipped to a uint8_t.
+ static uint8_t ClipToByte(double pixel);
+
+ // Computes the light and dark extremes of color in the given rectangle of
+ // the given pix, which is factor smaller than the coordinate system in rect.
+ // The light and dark points are taken to be the upper and lower 8th-ile of
+ // the most deviant of R, G and B. The value of the other 2 channels are
+ // computed by linear fit against the most deviant.
+ // The colors of the two point are returned in color1 and color2, with the
+ // alpha channel set to a scaled mean rms of the fits.
+ // If color_map1 is not null then it and color_map2 get rect pasted in them
+ // with the two calculated colors, and rms map gets a pasted rect of the rms.
+ // color_map1, color_map2 and rms_map are assumed to be the same scale as pix.
+ static void ComputeRectangleColors(const TBOX& rect, Pix* pix, int factor,
+ Pix* color_map1, Pix* color_map2,
+ Pix* rms_map,
+ uint8_t* color1, uint8_t* color2);
+
+ // Returns true if there are no black pixels in between the boxes.
+ // The im_box must represent the bounding box of the pix in tesseract
+ // coordinates, which may be negative, due to rotations to make the textlines
+ // horizontal. The boxes are rotated by rotation, which should undo such
+ // rotations, before mapping them onto the pix.
+ static bool BlankImageInBetween(const TBOX& box1, const TBOX& box2,
+ const TBOX& im_box, const FCOORD& rotation,
+ Pix* pix);
+
+ // Returns the number of pixels in box in the pix.
+ // The im_box must represent the bounding box of the pix in tesseract
+ // coordinates, which may be negative, due to rotations to make the textlines
+ // horizontal. The boxes are rotated by rotation, which should undo such
+ // rotations, before mapping them onto the pix.
+ static int CountPixelsInRotatedBox(TBOX box, const TBOX& im_box,
+ const FCOORD& rotation, Pix* pix);
+
+
+ // Locates all the image partitions in the part_grid, that were found by a
+ // previous call to FindImagePartitions, marks them in the image_mask,
+ // removes them from the grid, and deletes them. This makes it possible to
+ // call FindImagePartitions again to produce less broken-up and less
+ // overlapping image partitions.
+ // rerotation specifies how to rotate the partition coords to match
+ // the image_mask, since this function is used after orientation correction.
+ static void TransferImagePartsToImageMask(const FCOORD& rerotation,
+ ColPartitionGrid* part_grid,
+ Pix* image_mask);
+
+ // Runs a CC analysis on the image_pix mask image, and creates
+ // image partitions from them, cutting out strong text, and merging with
+ // nearby image regions such that they don't interfere with text.
+ // Rotation and rerotation specify how to rotate image coords to match
+ // the blob and partition coords and back again.
+ // The input/output part_grid owns all the created partitions, and
+ // the partitions own all the fake blobs that belong in the partitions.
+ // Since the other blobs in the other partitions will be owned by the block,
+ // ColPartitionGrid::ReTypeBlobs must be called afterwards to fix this
+ // situation and collect the image blobs.
+ static void FindImagePartitions(Pix* image_pix, const FCOORD& rotation,
+ const FCOORD& rerotation, TO_BLOCK* block,
+ TabFind* tab_grid, DebugPixa* pixa_debug,
+ ColPartitionGrid* part_grid,
+ ColPartition_LIST* big_parts);
+};
+
+} // namespace tesseract.
+
+#endif // TESSERACT_TEXTORD_LINEFIND_H_
diff --git a/tesseract/src/textord/linefind.cpp b/tesseract/src/textord/linefind.cpp
new file mode 100644
index 00000000..d3763f31
--- /dev/null
+++ b/tesseract/src/textord/linefind.cpp
@@ -0,0 +1,769 @@
+///////////////////////////////////////////////////////////////////////
+// File: linefind.cpp
+// Description: Class to find vertical lines in an image and create
+// a corresponding list of empty blobs.
+// Author: Ray Smith
+// Created: Thu Mar 20 09:49:01 PDT 2008
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "linefind.h"
+#include "alignedblob.h"
+#include "tabvector.h"
+#include "blobbox.h"
+#include "edgblob.h"
+#if defined(USE_OPENCL)
+#include "openclwrapper.h" // for OpenclDevice
+#endif
+
+#include "allheaders.h"
+
+#include <algorithm>
+
+namespace tesseract {
+
+/// Denominator of resolution makes max pixel width to allow thin lines.
+const int kThinLineFraction = 20;
+/// Denominator of resolution makes min pixels to demand line lengths to be.
+const int kMinLineLengthFraction = 4;
+/// Spacing of cracks across the page to break up tall vertical lines.
+const int kCrackSpacing = 100;
+/// Grid size used by line finder. Not very critical.
+const int kLineFindGridSize = 50;
+// Min width of a line in pixels to be considered thick.
+const int kMinThickLineWidth = 12;
+// Max size of line residue. (The pixels that fail the long thin opening, and
+// therefore don't make it to the candidate line mask, but are nevertheless
+// part of the line.)
+const int kMaxLineResidue = 6;
+// Min length in inches of a line segment that exceeds kMinThickLineWidth in
+// thickness. (Such lines shouldn't break by simple image degradation.)
+const double kThickLengthMultiple = 0.75;
+// Max fraction of line box area that can be occupied by non-line pixels.
+const double kMaxNonLineDensity = 0.25;
+// Max height of a music stave in inches.
+const double kMaxStaveHeight = 1.0;
+// Minimum fraction of pixels in a music rectangle connected to the staves.
+const double kMinMusicPixelFraction = 0.75;
+
+// Erases the unused blobs from the line_pix image, taking into account
+// whether this was a horizontal or vertical line set.
+static void RemoveUnusedLineSegments(bool horizontal_lines,
+ BLOBNBOX_LIST* line_bblobs,
+ Pix* line_pix) {
+ int height = pixGetHeight(line_pix);
+ BLOBNBOX_IT bbox_it(line_bblobs);
+ for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
+ BLOBNBOX* blob = bbox_it.data();
+ if (blob->left_tab_type() != TT_VLINE) {
+ const TBOX& box = blob->bounding_box();
+ Box* pixbox = nullptr;
+ if (horizontal_lines) {
+ // Horizontal lines are in tess format and also have x and y flipped
+ // (to use FindVerticalAlignment) so we have to flip x and y and then
+ // convert to Leptonica by height - flipped x (ie the right edge).
+ // See GetLineBoxes for more explanation.
+ pixbox = boxCreate(box.bottom(), height - box.right(),
+ box.height(), box.width());
+ } else {
+ // For vertical lines, just flip upside-down to convert to Leptonica.
+ // The y position of the box in Leptonica terms is the distance from
+ // the top of the image to the top of the box.
+ pixbox = boxCreate(box.left(), height - box.top(),
+ box.width(), box.height());
+ }
+ pixClearInRect(line_pix, pixbox);
+ boxDestroy(&pixbox);
+ }
+ }
+}
+
+// Helper subtracts the line_pix image from the src_pix, and removes residue
+// as well by removing components that touch the line, but are not in the
+// non_line_pix mask. It is assumed that the non_line_pix mask has already
+// been prepared to required accuracy.
+static void SubtractLinesAndResidue(Pix* line_pix, Pix* non_line_pix,
+ int resolution, Pix* src_pix) {
+ // First remove the lines themselves.
+ pixSubtract(src_pix, src_pix, line_pix);
+ // Subtract the non-lines from the image to get the residue.
+ Pix* residue_pix = pixSubtract(nullptr, src_pix, non_line_pix);
+ // Dilate the lines so they touch the residue.
+ Pix* fat_line_pix = pixDilateBrick(nullptr, line_pix, 3, 3);
+ // Seed fill the fat lines to get all the residue.
+ pixSeedfillBinary(fat_line_pix, fat_line_pix, residue_pix, 8);
+ // Subtract the residue from the original image.
+ pixSubtract(src_pix, src_pix, fat_line_pix);
+ pixDestroy(&fat_line_pix);
+ pixDestroy(&residue_pix);
+}
+
+// Returns the maximum strokewidth in the given binary image by doubling
+// the maximum of the distance function.
+static int MaxStrokeWidth(Pix* pix) {
+ Pix* dist_pix = pixDistanceFunction(pix, 4, 8, L_BOUNDARY_BG);
+ int width = pixGetWidth(dist_pix);
+ int height = pixGetHeight(dist_pix);
+ int wpl = pixGetWpl(dist_pix);
+ l_uint32* data = pixGetData(dist_pix);
+ // Find the maximum value in the distance image.
+ int max_dist = 0;
+ for (int y = 0; y < height; ++y) {
+ for (int x = 0; x < width; ++x) {
+ int pixel = GET_DATA_BYTE(data, x);
+ if (pixel > max_dist)
+ max_dist = pixel;
+ }
+ data += wpl;
+ }
+ pixDestroy(&dist_pix);
+ return max_dist * 2;
+}
+
+// Returns the number of components in the intersection_pix touched by line_box.
+static int NumTouchingIntersections(Box* line_box, Pix* intersection_pix) {
+ if (intersection_pix == nullptr) return 0;
+ Pix* rect_pix = pixClipRectangle(intersection_pix, line_box, nullptr);
+ Boxa* boxa = pixConnComp(rect_pix, nullptr, 8);
+ pixDestroy(&rect_pix);
+ if (boxa == nullptr) return false;
+ int result = boxaGetCount(boxa);
+ boxaDestroy(&boxa);
+ return result;
+}
+
+// Returns the number of black pixels found in the box made by adding the line
+// width to both sides of the line bounding box. (Increasing the smallest
+// dimension of the bounding box.)
+static int CountPixelsAdjacentToLine(int line_width, Box* line_box,
+ Pix* nonline_pix) {
+ l_int32 x, y, box_width, box_height;
+ boxGetGeometry(line_box, &x, &y, &box_width, &box_height);
+ if (box_width > box_height) {
+ // horizontal line.
+ int bottom = std::min(pixGetHeight(nonline_pix), y + box_height + line_width);
+ y = std::max(0, y - line_width);
+ box_height = bottom - y;
+ } else {
+ // Vertical line.
+ int right = std::min(pixGetWidth(nonline_pix), x + box_width + line_width);
+ x = std::max(0, x - line_width);
+ box_width = right - x;
+ }
+ Box* box = boxCreate(x, y, box_width, box_height);
+ Pix* rect_pix = pixClipRectangle(nonline_pix, box, nullptr);
+ boxDestroy(&box);
+ l_int32 result;
+ pixCountPixels(rect_pix, &result, nullptr);
+ pixDestroy(&rect_pix);
+ return result;
+}
+
+// Helper erases false-positive line segments from the input/output line_pix.
+// 1. Since thick lines shouldn't really break up, we can eliminate some false
+// positives by marking segments that are at least kMinThickLineWidth
+// thickness, yet have a length less than min_thick_length.
+// 2. Lines that don't have at least 2 intersections with other lines and have
+// a lot of neighbouring non-lines are probably not lines (perhaps arabic
+// or Hindi words, or underlines.)
+// Bad line components are erased from line_pix.
+// Returns the number of remaining connected components.
+static int FilterFalsePositives(int resolution, Pix* nonline_pix,
+ Pix* intersection_pix, Pix* line_pix) {
+ int min_thick_length = static_cast<int>(resolution * kThickLengthMultiple);
+ Pixa* pixa = nullptr;
+ Boxa* boxa = pixConnComp(line_pix, &pixa, 8);
+ // Iterate over the boxes to remove false positives.
+ int nboxes = boxaGetCount(boxa);
+ int remaining_boxes = nboxes;
+ for (int i = 0; i < nboxes; ++i) {
+ Box* box = boxaGetBox(boxa, i, L_CLONE);
+ l_int32 x, y, box_width, box_height;
+ boxGetGeometry(box, &x, &y, &box_width, &box_height);
+ Pix* comp_pix = pixaGetPix(pixa, i, L_CLONE);
+ int max_width = MaxStrokeWidth(comp_pix);
+ pixDestroy(&comp_pix);
+ bool bad_line = false;
+ // If the length is too short to stand-alone as a line, and the box width
+ // is thick enough, and the stroke width is thick enough it is bad.
+ if (box_width >= kMinThickLineWidth && box_height >= kMinThickLineWidth &&
+ box_width < min_thick_length && box_height < min_thick_length &&
+ max_width > kMinThickLineWidth) {
+ // Too thick for the length.
+ bad_line = true;
+ }
+ if (!bad_line &&
+ (intersection_pix == nullptr ||
+ NumTouchingIntersections(box, intersection_pix) < 2)) {
+ // Test non-line density near the line.
+ int nonline_count = CountPixelsAdjacentToLine(max_width, box,
+ nonline_pix);
+ if (nonline_count > box_height * box_width * kMaxNonLineDensity)
+ bad_line = true;
+ }
+ if (bad_line) {
+ // Not a good line.
+ pixClearInRect(line_pix, box);
+ --remaining_boxes;
+ }
+ boxDestroy(&box);
+ }
+ pixaDestroy(&pixa);
+ boxaDestroy(&boxa);
+ return remaining_boxes;
+}
+
+// Finds vertical and horizontal line objects in the given pix.
+// Uses the given resolution to determine size thresholds instead of any
+// that may be present in the pix.
+// The output vertical_x and vertical_y contain a sum of the output vectors,
+// thereby giving the mean vertical direction.
+// If pix_music_mask != nullptr, and music is detected, a mask of the staves
+// and anything that is connected (bars, notes etc.) will be returned in
+// pix_music_mask, the mask subtracted from pix, and the lines will not
+// appear in v_lines or h_lines.
+// The output vectors are owned by the list and Frozen (cannot refit) by
+// having no boxes, as there is no need to refit or merge separator lines.
+// The detected lines are removed from the pix.
+void LineFinder::FindAndRemoveLines(int resolution, bool debug, Pix* pix,
+ int* vertical_x, int* vertical_y,
+ Pix** pix_music_mask,
+ TabVector_LIST* v_lines,
+ TabVector_LIST* h_lines) {
+ if (pix == nullptr || vertical_x == nullptr || vertical_y == nullptr) {
+ tprintf("Error in parameters for LineFinder::FindAndRemoveLines\n");
+ return;
+ }
+ Pix* pix_vline = nullptr;
+ Pix* pix_non_vline = nullptr;
+ Pix* pix_hline = nullptr;
+ Pix* pix_non_hline = nullptr;
+ Pix* pix_intersections = nullptr;
+ Pixa* pixa_display = debug ? pixaCreate(0) : nullptr;
+ GetLineMasks(resolution, pix, &pix_vline, &pix_non_vline, &pix_hline,
+ &pix_non_hline, &pix_intersections, pix_music_mask,
+ pixa_display);
+ // Find lines, convert to TabVector_LIST and remove those that are used.
+ FindAndRemoveVLines(resolution, pix_intersections, vertical_x, vertical_y,
+ &pix_vline, pix_non_vline, pix, v_lines);
+ if (pix_hline != nullptr) {
+ // Recompute intersections and re-filter false positive h-lines.
+ if (pix_vline != nullptr)
+ pixAnd(pix_intersections, pix_vline, pix_hline);
+ else
+ pixDestroy(&pix_intersections);
+ if (!FilterFalsePositives(resolution, pix_non_hline, pix_intersections,
+ pix_hline)) {
+ pixDestroy(&pix_hline);
+ }
+ }
+ FindAndRemoveHLines(resolution, pix_intersections, *vertical_x, *vertical_y,
+ &pix_hline, pix_non_hline, pix, h_lines);
+ if (pixa_display != nullptr && pix_vline != nullptr)
+ pixaAddPix(pixa_display, pix_vline, L_CLONE);
+ if (pixa_display != nullptr && pix_hline != nullptr)
+ pixaAddPix(pixa_display, pix_hline, L_CLONE);
+ if (pix_vline != nullptr && pix_hline != nullptr) {
+ // Remove joins (intersections) where lines cross, and the residue.
+ // Recalculate the intersections, since some lines have been deleted.
+ pixAnd(pix_intersections, pix_vline, pix_hline);
+ // Fatten up the intersections and seed-fill to get the intersection
+ // residue.
+ Pix* pix_join_residue = pixDilateBrick(nullptr, pix_intersections, 5, 5);
+ pixSeedfillBinary(pix_join_residue, pix_join_residue, pix, 8);
+ // Now remove the intersection residue.
+ pixSubtract(pix, pix, pix_join_residue);
+ pixDestroy(&pix_join_residue);
+ }
+ // Remove any detected music.
+ if (pix_music_mask != nullptr && *pix_music_mask != nullptr) {
+ if (pixa_display != nullptr)
+ pixaAddPix(pixa_display, *pix_music_mask, L_CLONE);
+ pixSubtract(pix, pix, *pix_music_mask);
+ }
+ if (pixa_display != nullptr)
+ pixaAddPix(pixa_display, pix, L_CLONE);
+
+ pixDestroy(&pix_vline);
+ pixDestroy(&pix_non_vline);
+ pixDestroy(&pix_hline);
+ pixDestroy(&pix_non_hline);
+ pixDestroy(&pix_intersections);
+ if (pixa_display != nullptr) {
+ pixaConvertToPdf(pixa_display, resolution, 1.0f, 0, 0, "LineFinding",
+ "vhlinefinding.pdf");
+ pixaDestroy(&pixa_display);
+ }
+}
+
+// Converts the Boxa array to a list of C_BLOB, getting rid of severely
+// overlapping outlines and those that are children of a bigger one.
+// The output is a list of C_BLOBs that are owned by the list.
+// The C_OUTLINEs in the C_BLOBs contain no outline data - just empty
+// bounding boxes. The Boxa is consumed and destroyed.
+void LineFinder::ConvertBoxaToBlobs(int image_width, int image_height,
+ Boxa** boxes, C_BLOB_LIST* blobs) {
+ C_OUTLINE_LIST outlines;
+ C_OUTLINE_IT ol_it = &outlines;
+ // Iterate the boxes to convert to outlines.
+ int nboxes = boxaGetCount(*boxes);
+ for (int i = 0; i < nboxes; ++i) {
+ l_int32 x, y, width, height;
+ boxaGetBoxGeometry(*boxes, i, &x, &y, &width, &height);
+ // Make a C_OUTLINE from the leptonica box. This is a bit of a hack,
+ // as there is no outline, just a bounding box, but with some very
+ // small changes to coutln.cpp, it works nicely.
+ ICOORD top_left(x, y);
+ ICOORD bot_right(x + width, y + height);
+ CRACKEDGE startpt;
+ startpt.pos = top_left;
+ auto* outline = new C_OUTLINE(&startpt, top_left, bot_right, 0);
+ ol_it.add_after_then_move(outline);
+ }
+ // Use outlines_to_blobs to convert the outlines to blobs and find
+ // overlapping and contained objects. The output list of blobs in the block
+ // has all the bad ones filtered out and deleted.
+ BLOCK block;
+ ICOORD page_tl(0, 0);
+ ICOORD page_br(image_width, image_height);
+ outlines_to_blobs(&block, page_tl, page_br, &outlines);
+ // Transfer the created blobs to the output list.
+ C_BLOB_IT blob_it(blobs);
+ blob_it.add_list_after(block.blob_list());
+ // The boxes aren't needed any more.
+ boxaDestroy(boxes);
+}
+
+// Finds vertical line objects in pix_vline and removes the from src_pix.
+// Uses the given resolution to determine size thresholds instead of any
+// that may be present in the pix.
+// The output vertical_x and vertical_y contain a sum of the output vectors,
+// thereby giving the mean vertical direction.
+// The output vectors are owned by the list and Frozen (cannot refit) by
+// having no boxes, as there is no need to refit or merge separator lines.
+// If no good lines are found, pix_vline is destroyed.
+// None of the input pointers may be nullptr, and if *pix_vline is nullptr then
+// the function does nothing.
+void LineFinder::FindAndRemoveVLines(int resolution,
+ Pix* pix_intersections,
+ int* vertical_x, int* vertical_y,
+ Pix** pix_vline, Pix* pix_non_vline,
+ Pix* src_pix, TabVector_LIST* vectors) {
+ if (pix_vline == nullptr || *pix_vline == nullptr) return;
+ C_BLOB_LIST line_cblobs;
+ BLOBNBOX_LIST line_bblobs;
+ GetLineBoxes(false, *pix_vline, pix_intersections,
+ &line_cblobs, &line_bblobs);
+ int width = pixGetWidth(src_pix);
+ int height = pixGetHeight(src_pix);
+ ICOORD bleft(0, 0);
+ ICOORD tright(width, height);
+ FindLineVectors(bleft, tright, &line_bblobs, vertical_x, vertical_y, vectors);
+ if (!vectors->empty()) {
+ RemoveUnusedLineSegments(false, &line_bblobs, *pix_vline);
+ SubtractLinesAndResidue(*pix_vline, pix_non_vline, resolution, src_pix);
+ ICOORD vertical;
+ vertical.set_with_shrink(*vertical_x, *vertical_y);
+ TabVector::MergeSimilarTabVectors(vertical, vectors, nullptr);
+ } else {
+ pixDestroy(pix_vline);
+ }
+}
+
+// Finds horizontal line objects in pix_hline and removes them from src_pix.
+// Uses the given resolution to determine size thresholds instead of any
+// that may be present in the pix.
+// The output vertical_x and vertical_y contain a sum of the output vectors,
+// thereby giving the mean vertical direction.
+// The output vectors are owned by the list and Frozen (cannot refit) by
+// having no boxes, as there is no need to refit or merge separator lines.
+// If no good lines are found, pix_hline is destroyed.
+// None of the input pointers may be nullptr, and if *pix_hline is nullptr then
+// the function does nothing.
+void LineFinder::FindAndRemoveHLines(int resolution,
+ Pix* pix_intersections,
+ int vertical_x, int vertical_y,
+ Pix** pix_hline, Pix* pix_non_hline,
+ Pix* src_pix, TabVector_LIST* vectors) {
+ if (pix_hline == nullptr || *pix_hline == nullptr) return;
+ C_BLOB_LIST line_cblobs;
+ BLOBNBOX_LIST line_bblobs;
+ GetLineBoxes(true, *pix_hline, pix_intersections, &line_cblobs, &line_bblobs);
+ int width = pixGetWidth(src_pix);
+ int height = pixGetHeight(src_pix);
+ ICOORD bleft(0, 0);
+ ICOORD tright(height, width);
+ FindLineVectors(bleft, tright, &line_bblobs, &vertical_x, &vertical_y,
+ vectors);
+ if (!vectors->empty()) {
+ RemoveUnusedLineSegments(true, &line_bblobs, *pix_hline);
+ SubtractLinesAndResidue(*pix_hline, pix_non_hline, resolution, src_pix);
+ ICOORD vertical;
+ vertical.set_with_shrink(vertical_x, vertical_y);
+ TabVector::MergeSimilarTabVectors(vertical, vectors, nullptr);
+ // Iterate the vectors to flip them. x and y were flipped for horizontal
+ // lines, so FindLineVectors can work just with the vertical case.
+ // See GetLineBoxes for more on the flip.
+ TabVector_IT h_it(vectors);
+ for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
+ h_it.data()->XYFlip();
+ }
+ } else {
+ pixDestroy(pix_hline);
+ }
+}
+
+// Finds vertical lines in the given list of BLOBNBOXes. bleft and tright
+// are the bounds of the image on which the input line_bblobs were found.
+// The input line_bblobs list is const really.
+// The output vertical_x and vertical_y are the total of all the vectors.
+// The output list of TabVector makes no reference to the input BLOBNBOXes.
+void LineFinder::FindLineVectors(const ICOORD& bleft, const ICOORD& tright,
+ BLOBNBOX_LIST* line_bblobs,
+ int* vertical_x, int* vertical_y,
+ TabVector_LIST* vectors) {
+ BLOBNBOX_IT bbox_it(line_bblobs);
+ int b_count = 0;
+ // Put all the blobs into the grid to find the lines, and move the blobs
+ // to the output lists.
+ AlignedBlob blob_grid(kLineFindGridSize, bleft, tright);
+ for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) {
+ BLOBNBOX* bblob = bbox_it.data();
+ bblob->set_left_tab_type(TT_MAYBE_ALIGNED);
+ bblob->set_left_rule(bleft.x());
+ bblob->set_right_rule(tright.x());
+ bblob->set_left_crossing_rule(bleft.x());
+ bblob->set_right_crossing_rule(tright.x());
+ blob_grid.InsertBBox(false, true, bblob);
+ ++b_count;
+ }
+ if (b_count == 0)
+ return;
+
+ // Search the entire grid, looking for vertical line vectors.
+ BlobGridSearch lsearch(&blob_grid);
+ BLOBNBOX* bbox;
+ TabVector_IT vector_it(vectors);
+ *vertical_x = 0;
+ *vertical_y = 1;
+ lsearch.StartFullSearch();
+ while ((bbox = lsearch.NextFullSearch()) != nullptr) {
+ if (bbox->left_tab_type() == TT_MAYBE_ALIGNED) {
+ const TBOX& box = bbox->bounding_box();
+ if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom()))
+ tprintf("Finding line vector starting at bbox (%d,%d)\n",
+ box.left(), box.bottom());
+ AlignedBlobParams align_params(*vertical_x, *vertical_y, box.width());
+ TabVector* vector = blob_grid.FindVerticalAlignment(align_params, bbox,
+ vertical_x,
+ vertical_y);
+ if (vector != nullptr) {
+ vector->Freeze();
+ vector_it.add_to_end(vector);
+ }
+ }
+ }
+}
+
+// Returns a Pix music mask if music is detected.
+// Any vertical line that has at least 5 intersections in sufficient density
+// is taken to be a bar. Bars are used as a seed and the entire touching
+// component is added to the output music mask and subtracted from the lines.
+// Returns nullptr and does minimal work if no music is found.
+static Pix* FilterMusic(int resolution, Pix* pix_closed,
+ Pix* pix_vline, Pix* pix_hline,
+ l_int32* v_empty, l_int32* h_empty) {
+ int max_stave_height = static_cast<int>(resolution * kMaxStaveHeight);
+ Pix* intersection_pix = pixAnd(nullptr, pix_vline, pix_hline);
+ Boxa* boxa = pixConnComp(pix_vline, nullptr, 8);
+ // Iterate over the boxes to find music bars.
+ int nboxes = boxaGetCount(boxa);
+ Pix* music_mask = nullptr;
+ for (int i = 0; i < nboxes; ++i) {
+ Box* box = boxaGetBox(boxa, i, L_CLONE);
+ l_int32 x, y, box_width, box_height;
+ boxGetGeometry(box, &x, &y, &box_width, &box_height);
+ int joins = NumTouchingIntersections(box, intersection_pix);
+ // Test for the join density being at least 5 per max_stave_height,
+ // ie (joins-1)/box_height >= (5-1)/max_stave_height.
+ if (joins >= 5 && (joins - 1) * max_stave_height >= 4 * box_height) {
+ // This is a music bar. Add to the mask.
+ if (music_mask == nullptr)
+ music_mask = pixCreate(pixGetWidth(pix_vline), pixGetHeight(pix_vline),
+ 1);
+ pixSetInRect(music_mask, box);
+ }
+ boxDestroy(&box);
+ }
+ boxaDestroy(&boxa);
+ pixDestroy(&intersection_pix);
+ if (music_mask != nullptr) {
+ // The mask currently contains just the bars. Use the mask as a seed
+ // and the pix_closed as the mask for a seedfill to get all the
+ // intersecting staves.
+ pixSeedfillBinary(music_mask, music_mask, pix_closed, 8);
+ // Filter out false positives. CCs in the music_mask should be the vast
+ // majority of the pixels in their bounding boxes, as we expect just a
+ // tiny amount of text, a few phrase marks, and crescendo etc left.
+ Boxa* boxa = pixConnComp(music_mask, nullptr, 8);
+ // Iterate over the boxes to find music components.
+ int nboxes = boxaGetCount(boxa);
+ for (int i = 0; i < nboxes; ++i) {
+ Box* box = boxaGetBox(boxa, i, L_CLONE);
+ Pix* rect_pix = pixClipRectangle(music_mask, box, nullptr);
+ l_int32 music_pixels;
+ pixCountPixels(rect_pix, &music_pixels, nullptr);
+ pixDestroy(&rect_pix);
+ rect_pix = pixClipRectangle(pix_closed, box, nullptr);
+ l_int32 all_pixels;
+ pixCountPixels(rect_pix, &all_pixels, nullptr);
+ pixDestroy(&rect_pix);
+ if (music_pixels < kMinMusicPixelFraction * all_pixels) {
+ // False positive. Delete from the music mask.
+ pixClearInRect(music_mask, box);
+ }
+ boxDestroy(&box);
+ }
+ l_int32 no_remaining_music;
+ boxaDestroy(&boxa);
+ pixZero(music_mask, &no_remaining_music);
+ if (no_remaining_music) {
+ pixDestroy(&music_mask);
+ } else {
+ pixSubtract(pix_vline, pix_vline, music_mask);
+ pixSubtract(pix_hline, pix_hline, music_mask);
+ // We may have deleted all the lines
+ pixZero(pix_vline, v_empty);
+ pixZero(pix_hline, h_empty);
+ }
+ }
+ return music_mask;
+}
+
+// Most of the heavy lifting of line finding. Given src_pix and its separate
+// resolution, returns image masks:
+// pix_vline candidate vertical lines.
+// pix_non_vline pixels that didn't look like vertical lines.
+// pix_hline candidate horizontal lines.
+// pix_non_hline pixels that didn't look like horizontal lines.
+// pix_intersections pixels where vertical and horizontal lines meet.
+// pix_music_mask candidate music staves.
+// This function promises to initialize all the output (2nd level) pointers,
+// but any of the returns that are empty will be nullptr on output.
+// None of the input (1st level) pointers may be nullptr except pix_music_mask,
+// which will disable music detection, and pixa_display.
+void LineFinder::GetLineMasks(int resolution, Pix* src_pix,
+ Pix** pix_vline, Pix** pix_non_vline,
+ Pix** pix_hline, Pix** pix_non_hline,
+ Pix** pix_intersections, Pix** pix_music_mask,
+ Pixa* pixa_display) {
+ Pix* pix_closed = nullptr;
+ Pix* pix_hollow = nullptr;
+
+ int max_line_width = resolution / kThinLineFraction;
+ int min_line_length = resolution / kMinLineLengthFraction;
+ if (pixa_display != nullptr) {
+ tprintf("Image resolution = %d, max line width = %d, min length=%d\n",
+ resolution, max_line_width, min_line_length);
+ }
+ int closing_brick = max_line_width / 3;
+
+// only use opencl if compiled w/ OpenCL and selected device is opencl
+#ifdef USE_OPENCL
+ if (OpenclDevice::selectedDeviceIsOpenCL()) {
+ // OpenCL pixGetLines Operation
+ int clStatus = OpenclDevice::initMorphCLAllocations(pixGetWpl(src_pix),
+ pixGetHeight(src_pix),
+ src_pix);
+ bool getpixclosed = pix_music_mask != nullptr;
+ OpenclDevice::pixGetLinesCL(nullptr, src_pix, pix_vline, pix_hline,
+ &pix_closed, getpixclosed, closing_brick,
+ closing_brick, max_line_width, max_line_width,
+ min_line_length, min_line_length);
+ } else {
+#endif
+ // Close up small holes, making it less likely that false alarms are found
+ // in thickened text (as it will become more solid) and also smoothing over
+ // some line breaks and nicks in the edges of the lines.
+ pix_closed = pixCloseBrick(nullptr, src_pix, closing_brick, closing_brick);
+ if (pixa_display != nullptr)
+ pixaAddPix(pixa_display, pix_closed, L_CLONE);
+ // Open up with a big box to detect solid areas, which can then be subtracted.
+ // This is very generous and will leave in even quite wide lines.
+ Pix* pix_solid = pixOpenBrick(nullptr, pix_closed, max_line_width,
+ max_line_width);
+ if (pixa_display != nullptr)
+ pixaAddPix(pixa_display, pix_solid, L_CLONE);
+ pix_hollow = pixSubtract(nullptr, pix_closed, pix_solid);
+
+ pixDestroy(&pix_solid);
+
+ // Now open up in both directions independently to find lines of at least
+ // 1 inch/kMinLineLengthFraction in length.
+ if (pixa_display != nullptr)
+ pixaAddPix(pixa_display, pix_hollow, L_CLONE);
+ *pix_vline = pixOpenBrick(nullptr, pix_hollow, 1, min_line_length);
+ *pix_hline = pixOpenBrick(nullptr, pix_hollow, min_line_length, 1);
+
+ pixDestroy(&pix_hollow);
+#ifdef USE_OPENCL
+ }
+#endif
+
+ // Lines are sufficiently rare, that it is worth checking for a zero image.
+ l_int32 v_empty = 0;
+ l_int32 h_empty = 0;
+ pixZero(*pix_vline, &v_empty);
+ pixZero(*pix_hline, &h_empty);
+ if (pix_music_mask != nullptr) {
+ if (!v_empty && !h_empty) {
+ *pix_music_mask = FilterMusic(resolution, pix_closed,
+ *pix_vline, *pix_hline,
+ &v_empty, &h_empty);
+ } else {
+ *pix_music_mask = nullptr;
+ }
+ }
+ pixDestroy(&pix_closed);
+ Pix* pix_nonlines = nullptr;
+ *pix_intersections = nullptr;
+ Pix* extra_non_hlines = nullptr;
+ if (!v_empty) {
+ // Subtract both line candidates from the source to get definite non-lines.
+ pix_nonlines = pixSubtract(nullptr, src_pix, *pix_vline);
+ if (!h_empty) {
+ pixSubtract(pix_nonlines, pix_nonlines, *pix_hline);
+ // Intersections are a useful indicator for likelihood of being a line.
+ *pix_intersections = pixAnd(nullptr, *pix_vline, *pix_hline);
+ // Candidate vlines are not hlines (apart from the intersections)
+ // and vice versa.
+ extra_non_hlines = pixSubtract(nullptr, *pix_vline, *pix_intersections);
+ }
+ *pix_non_vline = pixErodeBrick(nullptr, pix_nonlines, kMaxLineResidue, 1);
+ pixSeedfillBinary(*pix_non_vline, *pix_non_vline, pix_nonlines, 8);
+ if (!h_empty) {
+ // Candidate hlines are not vlines.
+ pixOr(*pix_non_vline, *pix_non_vline, *pix_hline);
+ pixSubtract(*pix_non_vline, *pix_non_vline, *pix_intersections);
+ }
+ if (!FilterFalsePositives(resolution, *pix_non_vline, *pix_intersections,
+ *pix_vline))
+ pixDestroy(pix_vline); // No candidates left.
+ } else {
+ // No vertical lines.
+ pixDestroy(pix_vline);
+ *pix_non_vline = nullptr;
+ if (!h_empty) {
+ pix_nonlines = pixSubtract(nullptr, src_pix, *pix_hline);
+ }
+ }
+ if (h_empty) {
+ pixDestroy(pix_hline);
+ *pix_non_hline = nullptr;
+ if (v_empty) {
+ return;
+ }
+ } else {
+ *pix_non_hline = pixErodeBrick(nullptr, pix_nonlines, 1, kMaxLineResidue);
+ pixSeedfillBinary(*pix_non_hline, *pix_non_hline, pix_nonlines, 8);
+ if (extra_non_hlines != nullptr) {
+ pixOr(*pix_non_hline, *pix_non_hline, extra_non_hlines);
+ pixDestroy(&extra_non_hlines);
+ }
+ if (!FilterFalsePositives(resolution, *pix_non_hline, *pix_intersections,
+ *pix_hline))
+ pixDestroy(pix_hline); // No candidates left.
+ }
+ if (pixa_display != nullptr) {
+ if (*pix_vline != nullptr) pixaAddPix(pixa_display, *pix_vline, L_CLONE);
+ if (*pix_hline != nullptr) pixaAddPix(pixa_display, *pix_hline, L_CLONE);
+ if (pix_nonlines != nullptr) pixaAddPix(pixa_display, pix_nonlines, L_CLONE);
+ if (*pix_non_vline != nullptr)
+ pixaAddPix(pixa_display, *pix_non_vline, L_CLONE);
+ if (*pix_non_hline != nullptr)
+ pixaAddPix(pixa_display, *pix_non_hline, L_CLONE);
+ if (*pix_intersections != nullptr)
+ pixaAddPix(pixa_display, *pix_intersections, L_CLONE);
+ if (pix_music_mask != nullptr && *pix_music_mask != nullptr)
+ pixaAddPix(pixa_display, *pix_music_mask, L_CLONE);
+ }
+ pixDestroy(&pix_nonlines);
+}
+
+// Returns a list of boxes corresponding to the candidate line segments. Sets
+// the line_crossings member of the boxes so we can later determine the number
+// of intersections touched by a full line.
+void LineFinder::GetLineBoxes(bool horizontal_lines,
+ Pix* pix_lines, Pix* pix_intersections,
+ C_BLOB_LIST* line_cblobs,
+ BLOBNBOX_LIST* line_bblobs) {
+ // Put a single pixel crack in every line at an arbitrary spacing,
+ // so they break up and the bounding boxes can be used to get the
+ // direction accurately enough without needing outlines.
+ int wpl = pixGetWpl(pix_lines);
+ int width = pixGetWidth(pix_lines);
+ int height = pixGetHeight(pix_lines);
+ l_uint32* data = pixGetData(pix_lines);
+ if (horizontal_lines) {
+ for (int y = 0; y < height; ++y, data += wpl) {
+ for (int x = kCrackSpacing; x < width; x += kCrackSpacing) {
+ CLEAR_DATA_BIT(data, x);
+ }
+ }
+ } else {
+ for (int y = kCrackSpacing; y < height; y += kCrackSpacing) {
+ memset(data + wpl * y, 0, wpl * sizeof(*data));
+ }
+ }
+ // Get the individual connected components
+ Boxa* boxa = pixConnComp(pix_lines, nullptr, 8);
+ ConvertBoxaToBlobs(width, height, &boxa, line_cblobs);
+ // Make the BLOBNBOXes from the C_BLOBs.
+ C_BLOB_IT blob_it(line_cblobs);
+ BLOBNBOX_IT bbox_it(line_bblobs);
+ for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+ C_BLOB* cblob = blob_it.data();
+ auto* bblob = new BLOBNBOX(cblob);
+ bbox_it.add_to_end(bblob);
+ // Determine whether the line segment touches two intersections.
+ const TBOX& bbox = bblob->bounding_box();
+ Box* box = boxCreate(bbox.left(), bbox.bottom(),
+ bbox.width(), bbox.height());
+ bblob->set_line_crossings(NumTouchingIntersections(box, pix_intersections));
+ boxDestroy(&box);
+ // Transform the bounding box prior to finding lines. To save writing
+ // two line finders, flip x and y for horizontal lines and re-use the
+ // tab-stop detection code. For vertical lines we still have to flip the
+ // y-coordinates to switch from leptonica coords to tesseract coords.
+ if (horizontal_lines) {
+ // Note that we have Leptonica coords stored in a Tesseract box, so that
+ // bbox.bottom(), being the MIN y coord, is actually the top, so to get
+ // back to Leptonica coords in RemoveUnusedLineSegments, we have to
+ // use height - box.right() as the top, which looks very odd.
+ TBOX new_box(height - bbox.top(), bbox.left(),
+ height - bbox.bottom(), bbox.right());
+ bblob->set_bounding_box(new_box);
+ } else {
+ TBOX new_box(bbox.left(), height - bbox.top(),
+ bbox.right(), height - bbox.bottom());
+ bblob->set_bounding_box(new_box);
+ }
+ }
+}
+
+} // namespace tesseract.
diff --git a/tesseract/src/textord/linefind.h b/tesseract/src/textord/linefind.h
new file mode 100644
index 00000000..93b58e1f
--- /dev/null
+++ b/tesseract/src/textord/linefind.h
@@ -0,0 +1,149 @@
+///////////////////////////////////////////////////////////////////////
+// File: linefind.h
+// Description: Class to find vertical lines in an image and create
+// a corresponding list of empty blobs.
+// Author: Ray Smith
+// Created: Thu Mar 20 09:49:01 PDT 2008
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TEXTORD_LINEFIND_H_
+#define TESSERACT_TEXTORD_LINEFIND_H_
+
+struct Boxa;
+struct Pix;
+struct Pixa;
+
+namespace tesseract {
+
+class TabVector_LIST;
+class C_BLOB_LIST;
+class BLOBNBOX_LIST;
+class ICOORD;
+
+/**
+ * The LineFinder class is a simple static function wrapper class that mainly
+ * exposes the FindVerticalLines function.
+ */
+class LineFinder {
+ public:
+ /**
+ * Finds vertical and horizontal line objects in the given pix and removes
+ * them.
+ *
+ * Uses the given resolution to determine size thresholds instead of any
+ * that may be present in the pix.
+ *
+ * The output vertical_x and vertical_y contain a sum of the output vectors,
+ * thereby giving the mean vertical direction.
+ *
+ * If pix_music_mask != nullptr, and music is detected, a mask of the staves
+ * and anything that is connected (bars, notes etc.) will be returned in
+ * pix_music_mask, the mask subtracted from pix, and the lines will not
+ * appear in v_lines or h_lines.
+ *
+ * The output vectors are owned by the list and Frozen (cannot refit) by
+ * having no boxes, as there is no need to refit or merge separator lines.
+ *
+ * The detected lines are removed from the pix.
+ */
+ static void FindAndRemoveLines(int resolution, bool debug, Pix* pix,
+ int* vertical_x, int* vertical_y,
+ Pix** pix_music_mask,
+ TabVector_LIST* v_lines,
+ TabVector_LIST* h_lines);
+
+ /**
+ * Converts the Boxa array to a list of C_BLOB, getting rid of severely
+ * overlapping outlines and those that are children of a bigger one.
+ *
+ * The output is a list of C_BLOBs that are owned by the list.
+ *
+ * The C_OUTLINEs in the C_BLOBs contain no outline data - just empty
+ * bounding boxes. The Boxa is consumed and destroyed.
+ */
+ static void ConvertBoxaToBlobs(int image_width, int image_height,
+ Boxa** boxes, C_BLOB_LIST* blobs);
+
+ private:
+ // Finds vertical line objects in pix_vline and removes them from src_pix.
+ // Uses the given resolution to determine size thresholds instead of any
+ // that may be present in the pix.
+ // The output vertical_x and vertical_y contain a sum of the output vectors,
+ // thereby giving the mean vertical direction.
+ // The output vectors are owned by the list and Frozen (cannot refit) by
+ // having no boxes, as there is no need to refit or merge separator lines.
+ // If no good lines are found, pix_vline is destroyed.
+ static void FindAndRemoveVLines(int resolution,
+ Pix* pix_intersections,
+ int* vertical_x, int* vertical_y,
+ Pix** pix_vline, Pix* pix_non_vline,
+ Pix* src_pix, TabVector_LIST* vectors);
+
+
+ // Finds horizontal line objects in pix_vline and removes them from src_pix.
+ // Uses the given resolution to determine size thresholds instead of any
+ // that may be present in the pix.
+ // The output vertical_x and vertical_y contain a sum of the output vectors,
+ // thereby giving the mean vertical direction.
+ // The output vectors are owned by the list and Frozen (cannot refit) by
+ // having no boxes, as there is no need to refit or merge separator lines.
+ // If no good lines are found, pix_hline is destroyed.
+ static void FindAndRemoveHLines(int resolution,
+ Pix* pix_intersections,
+ int vertical_x, int vertical_y,
+ Pix** pix_hline, Pix* pix_non_hline,
+ Pix* src_pix, TabVector_LIST* vectors);
+
+ // Finds vertical lines in the given list of BLOBNBOXes. bleft and tright
+ // are the bounds of the image on which the input line_bblobs were found.
+ // The input line_bblobs list is const really.
+ // The output vertical_x and vertical_y are the total of all the vectors.
+ // The output list of TabVector makes no reference to the input BLOBNBOXes.
+ static void FindLineVectors(const ICOORD& bleft, const ICOORD& tright,
+ BLOBNBOX_LIST* line_bblobs,
+ int* vertical_x, int* vertical_y,
+ TabVector_LIST* vectors);
+
+ // Most of the heavy lifting of line finding. Given src_pix and its separate
+ // resolution, returns image masks:
+ // Returns image masks:
+ // pix_vline candidate vertical lines.
+ // pix_non_vline pixels that didn't look like vertical lines.
+ // pix_hline candidate horizontal lines.
+ // pix_non_hline pixels that didn't look like horizontal lines.
+ // pix_intersections pixels where vertical and horizontal lines meet.
+ // pix_music_mask candidate music staves.
+ // This function promises to initialize all the output (2nd level) pointers,
+ // but any of the returns that are empty will be nullptr on output.
+ // None of the input (1st level) pointers may be nullptr except pix_music_mask,
+ // which will disable music detection, and pixa_display, which is for debug.
+ static void GetLineMasks(int resolution, Pix* src_pix,
+ Pix** pix_vline, Pix** pix_non_vline,
+ Pix** pix_hline, Pix** pix_non_hline,
+ Pix** pix_intersections, Pix** pix_music_mask,
+ Pixa* pixa_display);
+
+ // Returns a list of boxes corresponding to the candidate line segments. Sets
+ // the line_crossings member of the boxes so we can later determine the number
+ // of intersections touched by a full line.
+ static void GetLineBoxes(bool horizontal_lines,
+ Pix* pix_lines, Pix* pix_intersections,
+ C_BLOB_LIST* line_cblobs,
+ BLOBNBOX_LIST* line_bblobs);
+};
+
+} // namespace tesseract.
+
+#endif // TESSERACT_TEXTORD_LINEFIND_H_
diff --git a/tesseract/src/textord/makerow.cpp b/tesseract/src/textord/makerow.cpp
new file mode 100644
index 00000000..0df8243a
--- /dev/null
+++ b/tesseract/src/textord/makerow.cpp
@@ -0,0 +1,2673 @@
+/**********************************************************************
+ * File: makerow.cpp (Formerly makerows.c)
+ * Description: Code to arrange blobs into rows of text.
+ * Author: Ray Smith
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+ // Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "makerow.h"
+
+#include "blobbox.h"
+#include "ccstruct.h"
+#include "detlinefit.h"
+#include "statistc.h"
+#include "drawtord.h"
+#include "blkocc.h"
+#include "sortflts.h"
+#include "oldbasel.h"
+#include "textord.h"
+#include "tordmain.h"
+#include "underlin.h"
+#include "tprintf.h"
+#include "tovars.h"
+
+#include <algorithm>
+#include <vector> // for std::vector
+
+namespace tesseract {
+
+BOOL_VAR(textord_heavy_nr, false, "Vigorously remove noise");
+BOOL_VAR(textord_show_initial_rows, false, "Display row accumulation");
+BOOL_VAR(textord_show_parallel_rows, false, "Display page correlated rows");
+BOOL_VAR(textord_show_expanded_rows, false, "Display rows after expanding");
+BOOL_VAR(textord_show_final_rows, false, "Display rows after final fitting");
+BOOL_VAR(textord_show_final_blobs, false, "Display blob bounds after pre-ass");
+BOOL_VAR(textord_test_landscape, false, "Tests refer to land/port");
+BOOL_VAR(textord_parallel_baselines, true, "Force parallel baselines");
+BOOL_VAR(textord_straight_baselines, false, "Force straight baselines");
+BOOL_VAR(textord_old_baselines, true, "Use old baseline algorithm");
+BOOL_VAR(textord_old_xheight, false, "Use old xheight algorithm");
+BOOL_VAR(textord_fix_xheight_bug, true, "Use spline baseline");
+BOOL_VAR(textord_fix_makerow_bug, true, "Prevent multiple baselines");
+BOOL_VAR(textord_debug_xheights, false, "Test xheight algorithms");
+static BOOL_VAR(textord_biased_skewcalc, true, "Bias skew estimates with line length");
+static BOOL_VAR(textord_interpolating_skew, true, "Interpolate across gaps");
+static INT_VAR(textord_skewsmooth_offset, 4, "For smooth factor");
+static INT_VAR(textord_skewsmooth_offset2, 1, "For smooth factor");
+INT_VAR(textord_test_x, -INT32_MAX, "coord of test pt");
+INT_VAR(textord_test_y, -INT32_MAX, "coord of test pt");
+INT_VAR(textord_min_blobs_in_row, 4, "Min blobs before gradient counted");
+INT_VAR(textord_spline_minblobs, 8, "Min blobs in each spline segment");
+INT_VAR(textord_spline_medianwin, 6, "Size of window for spline segmentation");
+static INT_VAR(textord_max_blob_overlaps, 4,
+ "Max number of blobs a big blob can overlap");
+INT_VAR(textord_min_xheight, 10, "Min credible pixel xheight");
+double_VAR(textord_spline_shift_fraction, 0.02,
+ "Fraction of line spacing for quad");
+double_VAR(textord_spline_outlier_fraction, 0.1,
+ "Fraction of line spacing for outlier");
+double_VAR(textord_skew_ile, 0.5, "Ile of gradients for page skew");
+double_VAR(textord_skew_lag, 0.02, "Lag for skew on row accumulation");
+double_VAR(textord_linespace_iqrlimit, 0.2, "Max iqr/median for linespace");
+double_VAR(textord_width_limit, 8, "Max width of blobs to make rows");
+double_VAR(textord_chop_width, 1.5, "Max width before chopping");
+static double_VAR(textord_expansion_factor, 1.0,
+ "Factor to expand rows by in expand_rows");
+static double_VAR(textord_overlap_x, 0.375, "Fraction of linespace for good overlap");
+double_VAR(textord_minxh, 0.25, "fraction of linesize for min xheight");
+double_VAR(textord_min_linesize, 1.25, "* blob height for initial linesize");
+double_VAR(textord_excess_blobsize, 1.3,
+ "New row made if blob makes row this big");
+double_VAR(textord_occupancy_threshold, 0.4, "Fraction of neighbourhood");
+double_VAR(textord_underline_width, 2.0, "Multiple of line_size for underline");
+double_VAR(textord_min_blob_height_fraction, 0.75,
+ "Min blob height/top to include blob top into xheight stats");
+double_VAR(textord_xheight_mode_fraction, 0.4,
+ "Min pile height to make xheight");
+double_VAR(textord_ascheight_mode_fraction, 0.08,
+ "Min pile height to make ascheight");
+static double_VAR(textord_descheight_mode_fraction, 0.08,
+ "Min pile height to make descheight");
+double_VAR(textord_ascx_ratio_min, 1.25, "Min cap/xheight");
+double_VAR(textord_ascx_ratio_max, 1.8, "Max cap/xheight");
+double_VAR(textord_descx_ratio_min, 0.25, "Min desc/xheight");
+double_VAR(textord_descx_ratio_max, 0.6, "Max desc/xheight");
+double_VAR(textord_xheight_error_margin, 0.1, "Accepted variation");
+INT_VAR(textord_lms_line_trials, 12, "Number of linew fits to do");
+BOOL_VAR(textord_new_initial_xheight, true, "Use test xheight mechanism");
+BOOL_VAR(textord_debug_blob, false, "Print test blob information");
+
+#define MAX_HEIGHT_MODES 12
+
+const int kMinLeaderCount = 5;
+
+// Factored-out helper to build a single row from a list of blobs.
+// Returns the mean blob size.
+static float MakeRowFromBlobs(float line_size,
+ BLOBNBOX_IT* blob_it, TO_ROW_IT* row_it) {
+ blob_it->sort(blob_x_order);
+ blob_it->move_to_first();
+ TO_ROW* row = nullptr;
+ float total_size = 0.0f;
+ int blob_count = 0;
+ // Add all the blobs to a single TO_ROW.
+ for (; !blob_it->empty(); blob_it->forward()) {
+ BLOBNBOX* blob = blob_it->extract();
+ int top = blob->bounding_box().top();
+ int bottom = blob->bounding_box().bottom();
+ if (row == nullptr) {
+ row = new TO_ROW(blob, top, bottom, line_size);
+ row_it->add_before_then_move(row);
+ } else {
+ row->add_blob(blob, top, bottom, line_size);
+ }
+ total_size += top - bottom;
+ ++blob_count;
+ }
+ return blob_count > 0 ? total_size / blob_count : total_size;
+}
+
+// Helper to make a row using the children of a single blob.
+// Returns the mean size of the blobs created.
+static float MakeRowFromSubBlobs(TO_BLOCK* block, C_BLOB* blob,
+ TO_ROW_IT* row_it) {
+ // The blobs made from the children will go in the small_blobs list.
+ BLOBNBOX_IT bb_it(&block->small_blobs);
+ C_OUTLINE_IT ol_it(blob->out_list());
+ // Get the children.
+ ol_it.set_to_list(ol_it.data()->child());
+ if (ol_it.empty())
+ return 0.0f;
+ for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) {
+ // Deep copy the child outline and use that to make a blob.
+ blob = new C_BLOB(C_OUTLINE::deep_copy(ol_it.data()));
+ // Correct direction as needed.
+ blob->CheckInverseFlagAndDirection();
+ auto* bbox = new BLOBNBOX(blob);
+ bb_it.add_after_then_move(bbox);
+ }
+ // Now we can make a row from the blobs.
+ return MakeRowFromBlobs(block->line_size, &bb_it, row_it);
+}
+
+/**
+ * @name make_single_row
+ *
+ * Arrange the blobs into a single row... well actually, if there is
+ * only a single blob, it makes 2 rows, in case the top-level blob
+ * is a container of the real blobs to recognize.
+ */
+float make_single_row(ICOORD page_tr, bool allow_sub_blobs,
+ TO_BLOCK* block, TO_BLOCK_LIST* blocks) {
+ BLOBNBOX_IT blob_it = &block->blobs;
+ TO_ROW_IT row_it = block->get_rows();
+
+ // Include all the small blobs and large blobs.
+ blob_it.add_list_after(&block->small_blobs);
+ blob_it.add_list_after(&block->noise_blobs);
+ blob_it.add_list_after(&block->large_blobs);
+ if (block->blobs.singleton() && allow_sub_blobs) {
+ blob_it.move_to_first();
+ float size = MakeRowFromSubBlobs(block, blob_it.data()->cblob(), &row_it);
+ if (size > block->line_size)
+ block->line_size = size;
+ } else if (block->blobs.empty()) {
+ // Make a fake blob.
+ C_BLOB* blob = C_BLOB::FakeBlob(block->block->pdblk.bounding_box());
+ // The blobnbox owns the blob.
+ auto* bblob = new BLOBNBOX(blob);
+ blob_it.add_after_then_move(bblob);
+ }
+ MakeRowFromBlobs(block->line_size, &blob_it, &row_it);
+ // Fit an LMS line to the rows.
+ for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward())
+ fit_lms_line(row_it.data());
+ float gradient;
+ float fit_error;
+ // Compute the skew based on the fitted line.
+ compute_page_skew(blocks, gradient, fit_error);
+ return gradient;
+}
+
+/**
+ * @name make_rows
+ *
+ * Arrange the blobs into rows.
+ */
+float make_rows(ICOORD page_tr, TO_BLOCK_LIST *port_blocks) {
+ float port_m; // global skew
+ float port_err; // global noise
+ TO_BLOCK_IT block_it; // iterator
+
+ block_it.set_to_list(port_blocks);
+ for (block_it.mark_cycle_pt(); !block_it.cycled_list();
+ block_it.forward())
+ make_initial_textrows(page_tr, block_it.data(), FCOORD(1.0f, 0.0f),
+ !textord_test_landscape);
+ // compute globally
+ compute_page_skew(port_blocks, port_m, port_err);
+ block_it.set_to_list(port_blocks);
+ for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
+ cleanup_rows_making(page_tr, block_it.data(), port_m, FCOORD(1.0f, 0.0f),
+ block_it.data()->block->pdblk.bounding_box().left(),
+ !textord_test_landscape);
+ }
+ return port_m; // global skew
+}
+
+/**
+ * @name make_initial_textrows
+ *
+ * Arrange the good blobs into rows of text.
+ */
+void make_initial_textrows( //find lines
+ ICOORD page_tr,
+ TO_BLOCK* block, //block to do
+ FCOORD rotation, //for drawing
+ bool testing_on //correct orientation
+) {
+ TO_ROW_IT row_it = block->get_rows ();
+
+#ifndef GRAPHICS_DISABLED
+ ScrollView::Color colour; //of row
+
+ if (textord_show_initial_rows && testing_on) {
+ if (to_win == nullptr)
+ create_to_win(page_tr);
+ }
+#endif
+ //guess skew
+ assign_blobs_to_rows (block, nullptr, 0, true, true, textord_show_initial_rows && testing_on);
+ row_it.move_to_first ();
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
+ fit_lms_line (row_it.data ());
+#ifndef GRAPHICS_DISABLED
+ if (textord_show_initial_rows && testing_on) {
+ colour = ScrollView::RED;
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+ plot_to_row (row_it.data (), colour, rotation);
+ colour = static_cast<ScrollView::Color>(colour + 1);
+ if (colour > ScrollView::MAGENTA)
+ colour = ScrollView::RED;
+ }
+ }
+#endif
+}
+
+
+/**
+ * @name fit_lms_line
+ *
+ * Fit an LMS line to a row.
+ */
+void fit_lms_line(TO_ROW *row) {
+ float m, c; // fitted line
+ tesseract::DetLineFit lms;
+ BLOBNBOX_IT blob_it = row->blob_list();
+
+ for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+ const TBOX& box = blob_it.data()->bounding_box();
+ lms.Add(ICOORD((box.left() + box.right()) / 2, box.bottom()));
+ }
+ double error = lms.Fit(&m, &c);
+ row->set_line(m, c, error);
+}
+
+
+/**
+ * @name compute_page_skew
+ *
+ * Compute the skew over a full page by averaging the gradients over
+ * all the lines. Get the error of the same row.
+ */
+void compute_page_skew( //get average gradient
+ TO_BLOCK_LIST *blocks, //list of blocks
+ float &page_m, //average gradient
+ float &page_err //average error
+ ) {
+ int32_t row_count; //total rows
+ int32_t blob_count; //total_blobs
+ int32_t row_err; //integer error
+ int32_t row_index; //of total
+ TO_ROW *row; //current row
+ TO_BLOCK_IT block_it = blocks; //iterator
+
+ row_count = 0;
+ blob_count = 0;
+ for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
+ block_it.forward ()) {
+ POLY_BLOCK* pb = block_it.data()->block->pdblk.poly_block();
+ if (pb != nullptr && !pb->IsText())
+ continue; // Pretend non-text blocks don't exist.
+ row_count += block_it.data ()->get_rows ()->length ();
+ //count up rows
+ TO_ROW_IT row_it(block_it.data()->get_rows());
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
+ blob_count += row_it.data ()->blob_list ()->length ();
+ }
+ if (row_count == 0) {
+ page_m = 0.0f;
+ page_err = 0.0f;
+ return;
+ }
+ // of rows
+ std::vector<float> gradients(blob_count);
+ // of rows
+ std::vector<float> errors(blob_count);
+
+ row_index = 0;
+ for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
+ block_it.forward ()) {
+ POLY_BLOCK* pb = block_it.data()->block->pdblk.poly_block();
+ if (pb != nullptr && !pb->IsText())
+ continue; // Pretend non-text blocks don't exist.
+ TO_ROW_IT row_it(block_it.data ()->get_rows());
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+ row = row_it.data ();
+ blob_count = row->blob_list ()->length ();
+ row_err = static_cast<int32_t>(ceil (row->line_error ()));
+ if (row_err <= 0)
+ row_err = 1;
+ if (textord_biased_skewcalc) {
+ blob_count /= row_err;
+ for (blob_count /= row_err; blob_count > 0; blob_count--) {
+ gradients[row_index] = row->line_m ();
+ errors[row_index] = row->line_error ();
+ row_index++;
+ }
+ }
+ else if (blob_count >= textord_min_blobs_in_row) {
+ //get gradient
+ gradients[row_index] = row->line_m ();
+ errors[row_index] = row->line_error ();
+ row_index++;
+ }
+ }
+ }
+ if (row_index == 0) {
+ //desperate
+ for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
+ block_it.forward ()) {
+ POLY_BLOCK* pb = block_it.data()->block->pdblk.poly_block();
+ if (pb != nullptr && !pb->IsText())
+ continue; // Pretend non-text blocks don't exist.
+ TO_ROW_IT row_it(block_it.data()->get_rows());
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
+ row_it.forward ()) {
+ row = row_it.data ();
+ gradients[row_index] = row->line_m ();
+ errors[row_index] = row->line_error ();
+ row_index++;
+ }
+ }
+ }
+ row_count = row_index;
+ row_index = choose_nth_item (static_cast<int32_t>(row_count * textord_skew_ile),
+ &gradients[0], row_count);
+ page_m = gradients[row_index];
+ row_index = choose_nth_item (static_cast<int32_t>(row_count * textord_skew_ile),
+ &errors[0], row_count);
+ page_err = errors[row_index];
+}
+
+const double kNoiseSize = 0.5; // Fraction of xheight.
+const int kMinSize = 8; // Min pixels to be xheight.
+
+/**
+ * Return true if the dot looks like it is part of the i.
+ * Doesn't work for any other diacritical.
+ */
+static bool dot_of_i(BLOBNBOX* dot, BLOBNBOX* i, TO_ROW* row) {
+ const TBOX& ibox = i->bounding_box();
+ const TBOX& dotbox = dot->bounding_box();
+
+ // Must overlap horizontally by enough and be high enough.
+ int overlap = std::min(dotbox.right(), ibox.right()) -
+ std::max(dotbox.left(), ibox.left());
+ if (ibox.height() <= 2 * dotbox.height() ||
+ (overlap * 2 < ibox.width() && overlap < dotbox.width()))
+ return false;
+
+ // If the i is tall and thin then it is good.
+ if (ibox.height() > ibox.width() * 2)
+ return true; // The i or ! must be tall and thin.
+
+ // It might still be tall and thin, but it might be joined to something.
+ // So search the outline for a piece of large height close to the edges
+ // of the dot.
+ const double kHeightFraction = 0.6;
+ double target_height = std::min(dotbox.bottom(), ibox.top());
+ target_height -= row->line_m()*dotbox.left() + row->line_c();
+ target_height *= kHeightFraction;
+ int left_min = dotbox.left() - dotbox.width();
+ int middle = (dotbox.left() + dotbox.right())/2;
+ int right_max = dotbox.right() + dotbox.width();
+ int left_miny = 0;
+ int left_maxy = 0;
+ int right_miny = 0;
+ int right_maxy = 0;
+ bool found_left = false;
+ bool found_right = false;
+ bool in_left = false;
+ bool in_right = false;
+ C_BLOB* blob = i->cblob();
+ C_OUTLINE_IT o_it = blob->out_list();
+ for (o_it.mark_cycle_pt(); !o_it.cycled_list(); o_it.forward()) {
+ C_OUTLINE* outline = o_it.data();
+ int length = outline->pathlength();
+ ICOORD pos = outline->start_pos();
+ for (int step = 0; step < length; pos += outline->step(step++)) {
+ int x = pos.x();
+ int y = pos.y();
+ if (x >= left_min && x < middle && !found_left) {
+ // We are in the left part so find min and max y.
+ if (in_left) {
+ if (y > left_maxy) left_maxy = y;
+ if (y < left_miny) left_miny = y;
+ } else {
+ left_maxy = left_miny = y;
+ in_left = true;
+ }
+ } else if (in_left) {
+ // We just left the left so look for size.
+ if (left_maxy - left_miny > target_height) {
+ if (found_right)
+ return true;
+ found_left = true;
+ }
+ in_left = false;
+ }
+ if (x <= right_max && x > middle && !found_right) {
+ // We are in the right part so find min and max y.
+ if (in_right) {
+ if (y > right_maxy) right_maxy = y;
+ if (y < right_miny) right_miny = y;
+ } else {
+ right_maxy = right_miny = y;
+ in_right = true;
+ }
+ } else if (in_right) {
+ // We just left the right so look for size.
+ if (right_maxy - right_miny > target_height) {
+ if (found_left)
+ return true;
+ found_right = true;
+ }
+ in_right = false;
+ }
+ }
+ }
+ return false;
+}
+
+void vigorous_noise_removal(TO_BLOCK* block) {
+ TO_ROW_IT row_it = block->get_rows ();
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+ TO_ROW* row = row_it.data();
+ BLOBNBOX_IT b_it = row->blob_list();
+ // Estimate the xheight on the row.
+ int max_height = 0;
+ for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
+ BLOBNBOX* blob = b_it.data();
+ if (blob->bounding_box().height() > max_height)
+ max_height = blob->bounding_box().height();
+ }
+ STATS hstats(0, max_height + 1);
+ for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
+ BLOBNBOX* blob = b_it.data();
+ int height = blob->bounding_box().height();
+ if (height >= kMinSize)
+ hstats.add(blob->bounding_box().height(), 1);
+ }
+ float xheight = hstats.median();
+ // Delete small objects.
+ BLOBNBOX* prev = nullptr;
+ for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
+ BLOBNBOX* blob = b_it.data();
+ const TBOX& box = blob->bounding_box();
+ if (box.height() < kNoiseSize * xheight) {
+ // Small so delete unless it looks like an i dot.
+ if (prev != nullptr) {
+ if (dot_of_i(blob, prev, row))
+ continue; // Looks OK.
+ }
+ if (!b_it.at_last()) {
+ BLOBNBOX* next = b_it.data_relative(1);
+ if (dot_of_i(blob, next, row))
+ continue; // Looks OK.
+ }
+ // It might be noise so get rid of it.
+ delete blob->cblob();
+ delete b_it.extract();
+ } else {
+ prev = blob;
+ }
+ }
+ }
+}
+
+/**
+ * cleanup_rows_making
+ *
+ * Remove overlapping rows and fit all the blobs to what's left.
+ */
+void cleanup_rows_making( //find lines
+ ICOORD page_tr, //top right
+ TO_BLOCK* block, //block to do
+ float gradient, //gradient to fit
+ FCOORD rotation, //for drawing
+ int32_t block_edge, //edge of block
+ bool testing_on //correct orientation
+) {
+ //iterators
+ BLOBNBOX_IT blob_it = &block->blobs;
+ TO_ROW_IT row_it = block->get_rows ();
+
+#ifndef GRAPHICS_DISABLED
+ if (textord_show_parallel_rows && testing_on) {
+ if (to_win == nullptr)
+ create_to_win(page_tr);
+ }
+#endif
+ //get row coords
+ fit_parallel_rows(block,
+ gradient,
+ rotation,
+ block_edge,
+ textord_show_parallel_rows && testing_on);
+ delete_non_dropout_rows(block,
+ gradient,
+ rotation,
+ block_edge,
+ textord_show_parallel_rows && testing_on);
+ expand_rows(page_tr, block, gradient, rotation, block_edge, testing_on);
+ blob_it.set_to_list (&block->blobs);
+ row_it.set_to_list (block->get_rows ());
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
+ blob_it.add_list_after (row_it.data ()->blob_list ());
+ //give blobs back
+ assign_blobs_to_rows (block, &gradient, 1, false, false, false);
+ //now new rows must be genuine
+ blob_it.set_to_list (&block->blobs);
+ blob_it.add_list_after (&block->large_blobs);
+ assign_blobs_to_rows (block, &gradient, 2, true, true, false);
+ //safe to use big ones now
+ blob_it.set_to_list (&block->blobs);
+ //throw all blobs in
+ blob_it.add_list_after (&block->noise_blobs);
+ blob_it.add_list_after (&block->small_blobs);
+ assign_blobs_to_rows (block, &gradient, 3, false, false, false);
+}
+
+/**
+ * delete_non_dropout_rows
+ *
+ * Compute the linespacing and offset.
+ */
+void delete_non_dropout_rows( //find lines
+ TO_BLOCK* block, //block to do
+ float gradient, //global skew
+ FCOORD rotation, //deskew vector
+ int32_t block_edge, //left edge
+ bool testing_on //correct orientation
+) {
+ TBOX block_box; //deskewed block
+ int32_t max_y; //in block
+ int32_t min_y;
+ int32_t line_index; //of scan line
+ int32_t line_count; //no of scan lines
+ int32_t distance; //to drop-out
+ int32_t xleft; //of block
+ int32_t ybottom; //of block
+ TO_ROW *row; //current row
+ TO_ROW_IT row_it = block->get_rows ();
+ BLOBNBOX_IT blob_it = &block->blobs;
+
+ if (row_it.length () == 0)
+ return; //empty block
+ block_box = deskew_block_coords (block, gradient);
+ xleft = block->block->pdblk.bounding_box ().left ();
+ ybottom = block->block->pdblk.bounding_box ().bottom ();
+ min_y = block_box.bottom () - 1;
+ max_y = block_box.top () + 1;
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+ line_index = static_cast<int32_t>(floor (row_it.data ()->intercept ()));
+ if (line_index <= min_y)
+ min_y = line_index - 1;
+ if (line_index >= max_y)
+ max_y = line_index + 1;
+ }
+ line_count = max_y - min_y + 1;
+ if (line_count <= 0)
+ return; //empty block
+ // change in occupation
+ std::vector<int32_t> deltas(line_count);
+ // of pixel coords
+ std::vector<int32_t> occupation(line_count);
+
+ compute_line_occupation(block, gradient, min_y, max_y, &occupation[0], &deltas[0]);
+ compute_occupation_threshold (static_cast<int32_t>(ceil (block->line_spacing *
+ (tesseract::CCStruct::kDescenderFraction +
+ tesseract::CCStruct::kAscenderFraction))),
+ static_cast<int32_t>(ceil (block->line_spacing *
+ (tesseract::CCStruct::kXHeightFraction +
+ tesseract::CCStruct::kAscenderFraction))),
+ max_y - min_y + 1, &occupation[0], &deltas[0]);
+#ifndef GRAPHICS_DISABLED
+ if (testing_on) {
+ draw_occupation(xleft, ybottom, min_y, max_y, &occupation[0], &deltas[0]);
+ }
+#endif
+ compute_dropout_distances(&occupation[0], &deltas[0], line_count);
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+ row = row_it.data ();
+ line_index = static_cast<int32_t>(floor (row->intercept ()));
+ distance = deltas[line_index - min_y];
+ if (find_best_dropout_row (row, distance, block->line_spacing / 2,
+ line_index, &row_it, testing_on)) {
+#ifndef GRAPHICS_DISABLED
+ if (testing_on)
+ plot_parallel_row(row, gradient, block_edge,
+ ScrollView::WHITE, rotation);
+#endif
+ blob_it.add_list_after (row_it.data ()->blob_list ());
+ delete row_it.extract (); //too far away
+ }
+ }
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+ blob_it.add_list_after (row_it.data ()->blob_list ());
+ }
+}
+
+
+/**
+ * @name find_best_dropout_row
+ *
+ * Delete this row if it has a neighbour with better dropout characteristics.
+ * true is returned if the row should be deleted.
+ */
+bool find_best_dropout_row( //find neighbours
+ TO_ROW* row, //row to test
+ int32_t distance, //dropout dist
+ float dist_limit, //threshold distance
+ int32_t line_index, //index of row
+ TO_ROW_IT* row_it, //current position
+ bool testing_on //correct orientation
+) {
+ int32_t next_index; // of neighbouring row
+ int32_t row_offset; //from current row
+ int32_t abs_dist; //absolute distance
+ int8_t row_inc; //increment to row_index
+ TO_ROW *next_row; //nextious row
+
+ if (testing_on)
+ tprintf ("Row at %g(%g), dropout dist=%d,",
+ row->intercept (), row->parallel_c (), distance);
+ if (distance < 0) {
+ row_inc = 1;
+ abs_dist = -distance;
+ }
+ else {
+ row_inc = -1;
+ abs_dist = distance;
+ }
+ if (abs_dist > dist_limit) {
+ if (testing_on) {
+ tprintf (" too far - deleting\n");
+ }
+ return true;
+ }
+ if ((distance < 0 && !row_it->at_last ())
+ || (distance >= 0 && !row_it->at_first ())) {
+ row_offset = row_inc;
+ do {
+ next_row = row_it->data_relative (row_offset);
+ next_index = static_cast<int32_t>(floor (next_row->intercept ()));
+ if ((distance < 0
+ && next_index < line_index
+ && next_index > line_index + distance + distance)
+ || (distance >= 0
+ && next_index > line_index
+ && next_index < line_index + distance + distance)) {
+ if (testing_on) {
+ tprintf (" nearer neighbour (%d) at %g\n",
+ line_index + distance - next_index,
+ next_row->intercept ());
+ }
+ return true; //other is nearer
+ }
+ else if (next_index == line_index
+ || next_index == line_index + distance + distance) {
+ if (row->believability () <= next_row->believability ()) {
+ if (testing_on) {
+ tprintf (" equal but more believable at %g (%g/%g)\n",
+ next_row->intercept (),
+ row->believability (),
+ next_row->believability ());
+ }
+ return true; //other is more believable
+ }
+ }
+ row_offset += row_inc;
+ }
+ while ((next_index == line_index
+ || next_index == line_index + distance + distance)
+ && row_offset < row_it->length ());
+ if (testing_on)
+ tprintf (" keeping\n");
+ }
+ return false;
+}
+
+
+/**
+ * @name deskew_block_coords
+ *
+ * Compute the bounding box of all the blobs in the block
+ * if they were deskewed without actually doing it.
+ */
+TBOX deskew_block_coords( //block box
+ TO_BLOCK *block, //block to do
+ float gradient //global skew
+ ) {
+ TBOX result; //block bounds
+ TBOX blob_box; //of block
+ FCOORD rotation; //deskew vector
+ float length; //of gradient vector
+ TO_ROW_IT row_it = block->get_rows ();
+ TO_ROW *row; //current row
+ BLOBNBOX *blob; //current blob
+ BLOBNBOX_IT blob_it; //iterator
+
+ length = sqrt (gradient * gradient + 1);
+ rotation = FCOORD (1 / length, -gradient / length);
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+ row = row_it.data ();
+ blob_it.set_to_list (row->blob_list ());
+ for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
+ blob_it.forward ()) {
+ blob = blob_it.data ();
+ blob_box = blob->bounding_box ();
+ blob_box.rotate (rotation);//de-skew it
+ result += blob_box;
+ }
+ }
+ return result;
+}
+
+
+/**
+ * @name compute_line_occupation
+ *
+ * Compute the pixel projection back on the y axis given the global
+ * skew. Also compute the 1st derivative.
+ */
+void compute_line_occupation( //project blobs
+ TO_BLOCK *block, //block to do
+ float gradient, //global skew
+ int32_t min_y, //min coord in block
+ int32_t max_y, //in block
+ int32_t *occupation, //output projection
+ int32_t *deltas //derivative
+ ) {
+ int32_t line_count; //maxy-miny+1
+ int32_t line_index; //of scan line
+ int index; //array index for daft compilers
+ TO_ROW *row; //current row
+ TO_ROW_IT row_it = block->get_rows ();
+ BLOBNBOX *blob; //current blob
+ BLOBNBOX_IT blob_it; //iterator
+ float length; //of skew vector
+ TBOX blob_box; //bounding box
+ FCOORD rotation; //inverse of skew
+
+ line_count = max_y - min_y + 1;
+ length = sqrt (gradient * gradient + 1);
+ rotation = FCOORD (1 / length, -gradient / length);
+ for (line_index = 0; line_index < line_count; line_index++)
+ deltas[line_index] = 0;
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+ row = row_it.data ();
+ blob_it.set_to_list (row->blob_list ());
+ for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
+ blob_it.forward ()) {
+ blob = blob_it.data ();
+ blob_box = blob->bounding_box ();
+ blob_box.rotate (rotation);//de-skew it
+ int32_t width = blob_box.right() - blob_box.left();
+ index = blob_box.bottom() - min_y;
+ ASSERT_HOST(index >= 0 && index < line_count);
+ // count transitions
+ deltas[index] += width;
+ index = blob_box.top() - min_y;
+ ASSERT_HOST(index >= 0 && index < line_count);
+ deltas[index] -= width;
+ }
+ }
+ occupation[0] = deltas[0];
+ for (line_index = 1; line_index < line_count; line_index++)
+ occupation[line_index] = occupation[line_index - 1] + deltas[line_index];
+}
+
+
+/**
+ * compute_occupation_threshold
+ *
+ * Compute thresholds for textline or not for the occupation array.
+ */
+void compute_occupation_threshold( //project blobs
+ int32_t low_window, //below result point
+ int32_t high_window, //above result point
+ int32_t line_count, //array sizes
+ int32_t *occupation, //input projection
+ int32_t *thresholds //output thresholds
+ ) {
+ int32_t line_index; //of thresholds line
+ int32_t low_index; //in occupation
+ int32_t high_index; //in occupation
+ int32_t sum; //current average
+ int32_t divisor; //to get thresholds
+ int32_t min_index; //of min occ
+ int32_t min_occ; //min in locality
+ int32_t test_index; //for finding min
+
+ divisor =
+ static_cast<int32_t>(ceil ((low_window + high_window) / textord_occupancy_threshold));
+ if (low_window + high_window < line_count) {
+ for (sum = 0, high_index = 0; high_index < low_window; high_index++)
+ sum += occupation[high_index];
+ for (low_index = 0; low_index < high_window; low_index++, high_index++)
+ sum += occupation[high_index];
+ min_occ = occupation[0];
+ min_index = 0;
+ for (test_index = 1; test_index < high_index; test_index++) {
+ if (occupation[test_index] <= min_occ) {
+ min_occ = occupation[test_index];
+ min_index = test_index; //find min in region
+ }
+ }
+ for (line_index = 0; line_index < low_window; line_index++)
+ thresholds[line_index] = (sum - min_occ) / divisor + min_occ;
+ //same out to end
+ for (low_index = 0; high_index < line_count; low_index++, high_index++) {
+ sum -= occupation[low_index];
+ sum += occupation[high_index];
+ if (occupation[high_index] <= min_occ) {
+ //find min in region
+ min_occ = occupation[high_index];
+ min_index = high_index;
+ }
+ //lost min from region
+ if (min_index <= low_index) {
+ min_occ = occupation[low_index + 1];
+ min_index = low_index + 1;
+ for (test_index = low_index + 2; test_index <= high_index;
+ test_index++) {
+ if (occupation[test_index] <= min_occ) {
+ min_occ = occupation[test_index];
+ //find min in region
+ min_index = test_index;
+ }
+ }
+ }
+ thresholds[line_index++] = (sum - min_occ) / divisor + min_occ;
+ }
+ }
+ else {
+ min_occ = occupation[0];
+ min_index = 0;
+ for (sum = 0, low_index = 0; low_index < line_count; low_index++) {
+ if (occupation[low_index] < min_occ) {
+ min_occ = occupation[low_index];
+ min_index = low_index;
+ }
+ sum += occupation[low_index];
+ }
+ line_index = 0;
+ }
+ for (; line_index < line_count; line_index++)
+ thresholds[line_index] = (sum - min_occ) / divisor + min_occ;
+ //same out to end
+}
+
+
+/**
+ * @name compute_dropout_distances
+ *
+ * Compute the distance from each coordinate to the nearest dropout.
+ */
+void compute_dropout_distances( //project blobs
+ int32_t *occupation, //input projection
+ int32_t *thresholds, //output thresholds
+ int32_t line_count //array sizes
+ ) {
+ int32_t line_index; //of thresholds line
+ int32_t distance; //from prev dropout
+ int32_t next_dist; //to next dropout
+ int32_t back_index; //for back filling
+ int32_t prev_threshold; //before overwrite
+
+ distance = -line_count;
+ line_index = 0;
+ do {
+ do {
+ distance--;
+ prev_threshold = thresholds[line_index];
+ //distance from prev
+ thresholds[line_index] = distance;
+ line_index++;
+ }
+ while (line_index < line_count
+ && (occupation[line_index] < thresholds[line_index]
+ || occupation[line_index - 1] >= prev_threshold));
+ if (line_index < line_count) {
+ back_index = line_index - 1;
+ next_dist = 1;
+ while (next_dist < -distance && back_index >= 0) {
+ thresholds[back_index] = next_dist;
+ back_index--;
+ next_dist++;
+ distance++;
+ }
+ distance = 1;
+ }
+ }
+ while (line_index < line_count);
+}
+
+
+/**
+ * @name expand_rows
+ *
+ * Expand each row to the least of its allowed size and touching its
+ * neighbours. If the expansion would entirely swallow a neighbouring row
+ * then do so.
+ */
+void expand_rows( //find lines
+ ICOORD page_tr, //top right
+ TO_BLOCK* block, //block to do
+ float gradient, //gradient to fit
+ FCOORD rotation, //for drawing
+ int32_t block_edge, //edge of block
+ bool testing_on //correct orientation
+) {
+ bool swallowed_row; //eaten a neighbour
+ float y_max, y_min; //new row limits
+ float y_bottom, y_top; //allowed limits
+ TO_ROW *test_row; //next row
+ TO_ROW *row; //current row
+ //iterators
+ BLOBNBOX_IT blob_it = &block->blobs;
+ TO_ROW_IT row_it = block->get_rows ();
+
+#ifndef GRAPHICS_DISABLED
+ if (textord_show_expanded_rows && testing_on) {
+ if (to_win == nullptr)
+ create_to_win(page_tr);
+ }
+#endif
+
+ adjust_row_limits(block); //shift min,max.
+ if (textord_new_initial_xheight) {
+ if (block->get_rows ()->length () == 0)
+ return;
+ compute_row_stats(block, textord_show_expanded_rows && testing_on);
+ }
+ assign_blobs_to_rows (block, &gradient, 4, true, false, false);
+ //get real membership
+ if (block->get_rows ()->length () == 0)
+ return;
+ fit_parallel_rows(block,
+ gradient,
+ rotation,
+ block_edge,
+ textord_show_expanded_rows && testing_on);
+ if (!textord_new_initial_xheight)
+ compute_row_stats(block, textord_show_expanded_rows && testing_on);
+ row_it.move_to_last ();
+ do {
+ row = row_it.data ();
+ y_max = row->max_y (); //get current limits
+ y_min = row->min_y ();
+ y_bottom = row->intercept () - block->line_size * textord_expansion_factor *
+ tesseract::CCStruct::kDescenderFraction;
+ y_top = row->intercept () + block->line_size * textord_expansion_factor *
+ (tesseract::CCStruct::kXHeightFraction +
+ tesseract::CCStruct::kAscenderFraction);
+ if (y_min > y_bottom) { //expansion allowed
+ if (textord_show_expanded_rows && testing_on)
+ tprintf("Expanding bottom of row at %f from %f to %f\n",
+ row->intercept(), y_min, y_bottom);
+ //expandable
+ swallowed_row = true;
+ while (swallowed_row && !row_it.at_last ()) {
+ swallowed_row = false;
+ //get next one
+ test_row = row_it.data_relative (1);
+ //overlaps space
+ if (test_row->max_y () > y_bottom) {
+ if (test_row->min_y () > y_bottom) {
+ if (textord_show_expanded_rows && testing_on)
+ tprintf("Eating row below at %f\n", test_row->intercept());
+ row_it.forward ();
+#ifndef GRAPHICS_DISABLED
+ if (textord_show_expanded_rows && testing_on)
+ plot_parallel_row(test_row,
+ gradient,
+ block_edge,
+ ScrollView::WHITE,
+ rotation);
+#endif
+ blob_it.set_to_list (row->blob_list ());
+ blob_it.add_list_after (test_row->blob_list ());
+ //swallow complete row
+ delete row_it.extract ();
+ row_it.backward ();
+ swallowed_row = true;
+ }
+ else if (test_row->max_y () < y_min) {
+ //shorter limit
+ y_bottom = test_row->max_y ();
+ if (textord_show_expanded_rows && testing_on)
+ tprintf("Truncating limit to %f due to touching row at %f\n",
+ y_bottom, test_row->intercept());
+ }
+ else {
+ y_bottom = y_min; //can't expand it
+ if (textord_show_expanded_rows && testing_on)
+ tprintf("Not expanding limit beyond %f due to touching row at %f\n",
+ y_bottom, test_row->intercept());
+ }
+ }
+ }
+ y_min = y_bottom; //expand it
+ }
+ if (y_max < y_top) { //expansion allowed
+ if (textord_show_expanded_rows && testing_on)
+ tprintf("Expanding top of row at %f from %f to %f\n",
+ row->intercept(), y_max, y_top);
+ swallowed_row = true;
+ while (swallowed_row && !row_it.at_first ()) {
+ swallowed_row = false;
+ //get one above
+ test_row = row_it.data_relative (-1);
+ if (test_row->min_y () < y_top) {
+ if (test_row->max_y () < y_top) {
+ if (textord_show_expanded_rows && testing_on)
+ tprintf("Eating row above at %f\n", test_row->intercept());
+ row_it.backward ();
+ blob_it.set_to_list (row->blob_list ());
+#ifndef GRAPHICS_DISABLED
+ if (textord_show_expanded_rows && testing_on)
+ plot_parallel_row(test_row,
+ gradient,
+ block_edge,
+ ScrollView::WHITE,
+ rotation);
+#endif
+ blob_it.add_list_after (test_row->blob_list ());
+ //swallow complete row
+ delete row_it.extract ();
+ row_it.forward ();
+ swallowed_row = true;
+ }
+ else if (test_row->min_y () < y_max) {
+ //shorter limit
+ y_top = test_row->min_y ();
+ if (textord_show_expanded_rows && testing_on)
+ tprintf("Truncating limit to %f due to touching row at %f\n",
+ y_top, test_row->intercept());
+ }
+ else {
+ y_top = y_max; //can't expand it
+ if (textord_show_expanded_rows && testing_on)
+ tprintf("Not expanding limit beyond %f due to touching row at %f\n",
+ y_top, test_row->intercept());
+ }
+ }
+ }
+ y_max = y_top;
+ }
+ //new limits
+ row->set_limits (y_min, y_max);
+ row_it.backward ();
+ }
+ while (!row_it.at_last ());
+}
+
+
+/**
+ * adjust_row_limits
+ *
+ * Change the limits of rows to suit the default fractions.
+ */
+void adjust_row_limits( //tidy limits
+ TO_BLOCK *block //block to do
+ ) {
+ TO_ROW *row; //current row
+ float size; //size of row
+ float ymax; //top of row
+ float ymin; //bottom of row
+ TO_ROW_IT row_it = block->get_rows ();
+
+ if (textord_show_expanded_rows)
+ tprintf("Adjusting row limits for block(%d,%d)\n",
+ block->block->pdblk.bounding_box().left(),
+ block->block->pdblk.bounding_box().top());
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+ row = row_it.data ();
+ size = row->max_y () - row->min_y ();
+ if (textord_show_expanded_rows)
+ tprintf("Row at %f has min %f, max %f, size %f\n",
+ row->intercept(), row->min_y(), row->max_y(), size);
+ size /= tesseract::CCStruct::kXHeightFraction +
+ tesseract::CCStruct::kAscenderFraction +
+ tesseract::CCStruct::kDescenderFraction;
+ ymax = size * (tesseract::CCStruct::kXHeightFraction +
+ tesseract::CCStruct::kAscenderFraction);
+ ymin = -size * tesseract::CCStruct::kDescenderFraction;
+ row->set_limits (row->intercept () + ymin, row->intercept () + ymax);
+ row->merged = false;
+ }
+}
+
+
+/**
+ * @name compute_row_stats
+ *
+ * Compute the linespacing and offset.
+ */
+void compute_row_stats( //find lines
+ TO_BLOCK* block, //block to do
+ bool testing_on //correct orientation
+) {
+ int32_t row_index; //of median
+ TO_ROW *row; //current row
+ TO_ROW *prev_row; //previous row
+ float iqr; //inter quartile range
+ TO_ROW_IT row_it = block->get_rows ();
+ //number of rows
+ int16_t rowcount = row_it.length ();
+ // for choose nth
+ std::vector<TO_ROW*> rows(rowcount);
+ rowcount = 0;
+ prev_row = nullptr;
+ row_it.move_to_last (); //start at bottom
+ do {
+ row = row_it.data ();
+ if (prev_row != nullptr) {
+ rows[rowcount++] = prev_row;
+ prev_row->spacing = row->intercept () - prev_row->intercept ();
+ if (testing_on)
+ tprintf ("Row at %g yields spacing of %g\n",
+ row->intercept (), prev_row->spacing);
+ }
+ prev_row = row;
+ row_it.backward ();
+ }
+ while (!row_it.at_last ());
+ block->key_row = prev_row;
+ block->baseline_offset =
+ fmod (prev_row->parallel_c (), block->line_spacing);
+ if (testing_on)
+ tprintf ("Blob based spacing=(%g,%g), offset=%g",
+ block->line_size, block->line_spacing, block->baseline_offset);
+ if (rowcount > 0) {
+ row_index = choose_nth_item(rowcount * 3 / 4, &rows[0], rowcount,
+ sizeof (TO_ROW *), row_spacing_order);
+ iqr = rows[row_index]->spacing;
+ row_index = choose_nth_item(rowcount / 4, &rows[0], rowcount,
+ sizeof (TO_ROW *), row_spacing_order);
+ iqr -= rows[row_index]->spacing;
+ row_index = choose_nth_item(rowcount / 2, &rows[0], rowcount,
+ sizeof (TO_ROW *), row_spacing_order);
+ block->key_row = rows[row_index];
+ if (testing_on)
+ tprintf (" row based=%g(%g)", rows[row_index]->spacing, iqr);
+ if (rowcount > 2
+ && iqr < rows[row_index]->spacing * textord_linespace_iqrlimit) {
+ if (!textord_new_initial_xheight) {
+ if (rows[row_index]->spacing < block->line_spacing
+ && rows[row_index]->spacing > block->line_size)
+ //within range
+ block->line_size = rows[row_index]->spacing;
+ //spacing=size
+ else if (rows[row_index]->spacing > block->line_spacing)
+ block->line_size = block->line_spacing;
+ //too big so use max
+ }
+ else {
+ if (rows[row_index]->spacing < block->line_spacing)
+ block->line_size = rows[row_index]->spacing;
+ else
+ block->line_size = block->line_spacing;
+ //too big so use max
+ }
+ if (block->line_size < textord_min_xheight)
+ block->line_size = (float) textord_min_xheight;
+ block->line_spacing = rows[row_index]->spacing;
+ block->max_blob_size =
+ block->line_spacing * textord_excess_blobsize;
+ }
+ block->baseline_offset = fmod (rows[row_index]->intercept (),
+ block->line_spacing);
+ }
+ if (testing_on)
+ tprintf ("\nEstimate line size=%g, spacing=%g, offset=%g\n",
+ block->line_size, block->line_spacing, block->baseline_offset);
+}
+
+
+/**
+ * @name compute_block_xheight
+ *
+ * Compute the xheight of the individual rows, then correlate them
+ * and interpret ascenderless lines, correcting xheights.
+ *
+ * First we compute our best guess of the x-height of each row independently
+ * with compute_row_xheight(), which looks for a pair of commonly occurring
+ * heights that could be x-height and ascender height. This function also
+ * attempts to find descenders of lowercase letters (i.e. not the small
+ * descenders that could appear in upper case letters as Q,J).
+ *
+ * After this computation each row falls into one of the following categories:
+ * ROW_ASCENDERS_FOUND: we found xheight and ascender modes, so this must be
+ * a regular row; we'll use its xheight to compute
+ * xheight and ascrise estimates for the block
+ * ROW_DESCENDERS_FOUND: no ascenders, so we do not have a high confidence in
+ * the xheight of this row (don't use it for estimating
+ * block xheight), but this row can't contain all caps
+ * ROW_UNKNOWN: a row with no ascenders/descenders, could be all lowercase
+ * (or mostly lowercase for fonts with very few ascenders),
+ * all upper case or small caps
+ * ROW_INVALID: no meaningful xheight could be found for this row
+ *
+ * We then run correct_row_xheight() and use the computed xheight and ascrise
+ * averages to correct xheight values of the rows in ROW_DESCENDERS_FOUND,
+ * ROW_UNKNOWN and ROW_INVALID categories.
+ *
+ */
+void Textord::compute_block_xheight(TO_BLOCK *block, float gradient) {
+ TO_ROW *row; // current row
+ float asc_frac_xheight = CCStruct::kAscenderFraction /
+ CCStruct::kXHeightFraction;
+ float desc_frac_xheight = CCStruct::kDescenderFraction /
+ CCStruct::kXHeightFraction;
+ int32_t min_height, max_height; // limits on xheight
+ TO_ROW_IT row_it = block->get_rows();
+ if (row_it.empty()) return; // no rows
+
+ // Compute the best guess of xheight of each row individually.
+ // Use xheight and ascrise values of the rows where ascenders were found.
+ get_min_max_xheight(block->line_size, &min_height, &max_height);
+ STATS row_asc_xheights(min_height, max_height + 1);
+ STATS row_asc_ascrise(static_cast<int>(min_height * asc_frac_xheight),
+ static_cast<int>(max_height * asc_frac_xheight) + 1);
+ int min_desc_height = static_cast<int>(min_height * desc_frac_xheight);
+ int max_desc_height = static_cast<int>(max_height * desc_frac_xheight);
+ STATS row_asc_descdrop(min_desc_height, max_desc_height + 1);
+ STATS row_desc_xheights(min_height, max_height + 1);
+ STATS row_desc_descdrop(min_desc_height, max_desc_height + 1);
+ STATS row_cap_xheights(min_height, max_height + 1);
+ STATS row_cap_floating_xheights(min_height, max_height + 1);
+ for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+ row = row_it.data();
+ // Compute the xheight of this row if it has not been computed before.
+ if (row->xheight <= 0.0) {
+ compute_row_xheight(row, block->block->classify_rotation(),
+ gradient, block->line_size);
+ }
+ ROW_CATEGORY row_category = get_row_category(row);
+ if (row_category == ROW_ASCENDERS_FOUND) {
+ row_asc_xheights.add(static_cast<int32_t>(row->xheight),
+ row->xheight_evidence);
+ row_asc_ascrise.add(static_cast<int32_t>(row->ascrise),
+ row->xheight_evidence);
+ row_asc_descdrop.add(static_cast<int32_t>(-row->descdrop),
+ row->xheight_evidence);
+ } else if (row_category == ROW_DESCENDERS_FOUND) {
+ row_desc_xheights.add(static_cast<int32_t>(row->xheight),
+ row->xheight_evidence);
+ row_desc_descdrop.add(static_cast<int32_t>(-row->descdrop),
+ row->xheight_evidence);
+ } else if (row_category == ROW_UNKNOWN) {
+ fill_heights(row, gradient, min_height, max_height,
+ &row_cap_xheights, &row_cap_floating_xheights);
+ }
+ }
+
+ float xheight = 0.0;
+ float ascrise = 0.0;
+ float descdrop = 0.0;
+ // Compute our best guess of xheight of this block.
+ if (row_asc_xheights.get_total() > 0) {
+ // Determine xheight from rows where ascenders were found.
+ xheight = row_asc_xheights.median();
+ ascrise = row_asc_ascrise.median();
+ descdrop = -row_asc_descdrop.median();
+ } else if (row_desc_xheights.get_total() > 0) {
+ // Determine xheight from rows where descenders were found.
+ xheight = row_desc_xheights.median();
+ descdrop = -row_desc_descdrop.median();
+ } else if (row_cap_xheights.get_total() > 0) {
+ // All the rows in the block were (a/de)scenderless.
+ // Try to search for two modes in row_cap_heights that could
+ // be the xheight and the capheight (e.g. some of the rows
+ // were lowercase, but did not have enough (a/de)scenders.
+ // If such two modes can not be found, this block is most
+ // likely all caps (or all small caps, in which case the code
+ // still works as intended).
+ compute_xheight_from_modes(&row_cap_xheights, &row_cap_floating_xheights,
+ textord_single_height_mode &&
+ block->block->classify_rotation().y() == 0.0,
+ min_height, max_height, &(xheight), &(ascrise));
+ if (ascrise == 0) { // assume only caps in the whole block
+ xheight = row_cap_xheights.median() * CCStruct::kXHeightCapRatio;
+ }
+ } else { // default block sizes
+ xheight = block->line_size * CCStruct::kXHeightFraction;
+ }
+ // Correct xheight, ascrise and descdrop if necessary.
+ bool corrected_xheight = false;
+ if (xheight < textord_min_xheight) {
+ xheight = static_cast<float>(textord_min_xheight);
+ corrected_xheight = true;
+ }
+ if (corrected_xheight || ascrise <= 0.0) {
+ ascrise = xheight * asc_frac_xheight;
+ }
+ if (corrected_xheight || descdrop >= 0.0) {
+ descdrop = -(xheight * desc_frac_xheight);
+ }
+ block->xheight = xheight;
+
+ if (textord_debug_xheights) {
+ tprintf("Block average xheight=%.4f, ascrise=%.4f, descdrop=%.4f\n",
+ xheight, ascrise, descdrop);
+ }
+ // Correct xheight, ascrise, descdrop of rows based on block averages.
+ for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+ correct_row_xheight(row_it.data(), xheight, ascrise, descdrop);
+ }
+}
+
+/**
+ * @name compute_row_xheight
+ *
+ * Estimate the xheight of this row.
+ * Compute the ascender rise and descender drop at the same time.
+ * Set xheigh_evidence to the number of blobs with the chosen xheight
+ * that appear in this row.
+ */
+void Textord::compute_row_xheight(TO_ROW *row, // row to do
+ const FCOORD& rotation,
+ float gradient, // global skew
+ int block_line_size) {
+ // Find blobs representing repeated characters in rows and mark them.
+ // This information is used for computing row xheight and at a later
+ // stage when words are formed by make_words.
+ if (!row->rep_chars_marked()) {
+ mark_repeated_chars(row);
+ }
+
+ int min_height, max_height;
+ get_min_max_xheight(block_line_size, &min_height, &max_height);
+ STATS heights(min_height, max_height + 1);
+ STATS floating_heights(min_height, max_height + 1);
+ fill_heights(row, gradient, min_height, max_height,
+ &heights, &floating_heights);
+ row->ascrise = 0.0f;
+ row->xheight = 0.0f;
+ row->xheight_evidence =
+ compute_xheight_from_modes(&heights, &floating_heights,
+ textord_single_height_mode &&
+ rotation.y() == 0.0,
+ min_height, max_height,
+ &(row->xheight), &(row->ascrise));
+ row->descdrop = 0.0f;
+ if (row->xheight > 0.0) {
+ row->descdrop = static_cast<float>(
+ compute_row_descdrop(row, gradient, row->xheight_evidence, &heights));
+ }
+}
+
+/**
+ * @name fill_heights
+ *
+ * Fill the given heights with heights of the blobs that are legal
+ * candidates for estimating xheight.
+ */
+void fill_heights(TO_ROW *row, float gradient, int min_height,
+ int max_height, STATS *heights, STATS *floating_heights) {
+ float xcentre; // centre of blob
+ float top; // top y coord of blob
+ float height; // height of blob
+ BLOBNBOX *blob; // current blob
+ int repeated_set;
+ BLOBNBOX_IT blob_it = row->blob_list();
+ if (blob_it.empty()) return; // no blobs in this row
+ bool has_rep_chars =
+ row->rep_chars_marked() && row->num_repeated_sets() > 0;
+ do {
+ blob = blob_it.data();
+ if (!blob->joined_to_prev()) {
+ xcentre = (blob->bounding_box().left() +
+ blob->bounding_box().right()) / 2.0f;
+ top = blob->bounding_box().top();
+ height = blob->bounding_box().height();
+ if (textord_fix_xheight_bug)
+ top -= row->baseline.y(xcentre);
+ else
+ top -= gradient * xcentre + row->parallel_c();
+ if (top >= min_height && top <= max_height) {
+ heights->add(static_cast<int32_t>(floor(top + 0.5)), 1);
+ if (height / top < textord_min_blob_height_fraction) {
+ floating_heights->add(static_cast<int32_t>(floor(top + 0.5)), 1);
+ }
+ }
+ }
+ // Skip repeated chars, since they are likely to skew the height stats.
+ if (has_rep_chars && blob->repeated_set() != 0) {
+ repeated_set = blob->repeated_set();
+ blob_it.forward();
+ while (!blob_it.at_first() &&
+ blob_it.data()->repeated_set() == repeated_set) {
+ blob_it.forward();
+ if (textord_debug_xheights)
+ tprintf("Skipping repeated char when computing xheight\n");
+ }
+ } else {
+ blob_it.forward();
+ }
+ } while (!blob_it.at_first());
+}
+
+/**
+ * @name compute_xheight_from_modes
+ *
+ * Given a STATS object heights, looks for two most frequently occurring
+ * heights that look like xheight and xheight + ascrise. If found, sets
+ * the values of *xheight and *ascrise accordingly, otherwise sets xheight
+ * to any most frequently occurring height and sets *ascrise to 0.
+ * Returns the number of times xheight occurred in heights.
+ * For each mode that is considered for being an xheight the count of
+ * floating blobs (stored in floating_heights) is subtracted from the
+ * total count of the blobs of this height. This is done because blobs
+ * that sit far above the baseline could represent valid ascenders, but
+ * it is highly unlikely that such a character's height will be an xheight
+ * (e.g. -, ', =, ^, `, ", ', etc)
+ * If cap_only, then force finding of only the top mode.
+ */
+int compute_xheight_from_modes(
+ STATS *heights, STATS *floating_heights, bool cap_only, int min_height,
+ int max_height, float *xheight, float *ascrise) {
+ int blob_index = heights->mode(); // find mode
+ int blob_count = heights->pile_count(blob_index); // get count of mode
+ if (textord_debug_xheights) {
+ tprintf("min_height=%d, max_height=%d, mode=%d, count=%d, total=%d\n",
+ min_height, max_height, blob_index, blob_count,
+ heights->get_total());
+ heights->print();
+ floating_heights->print();
+ }
+ if (blob_count == 0) return 0;
+ int modes[MAX_HEIGHT_MODES]; // biggest piles
+ bool in_best_pile = false;
+ int prev_size = -INT32_MAX;
+ int best_count = 0;
+ int mode_count = compute_height_modes(heights, min_height, max_height,
+ modes, MAX_HEIGHT_MODES);
+ if (cap_only && mode_count > 1)
+ mode_count = 1;
+ int x;
+ if (textord_debug_xheights) {
+ tprintf("found %d modes: ", mode_count);
+ for (x = 0; x < mode_count; x++) tprintf("%d ", modes[x]);
+ tprintf("\n");
+ }
+
+ for (x = 0; x < mode_count - 1; x++) {
+ if (modes[x] != prev_size + 1)
+ in_best_pile = false; // had empty height
+ int modes_x_count = heights->pile_count(modes[x]) -
+ floating_heights->pile_count(modes[x]);
+ if ((modes_x_count >= blob_count * textord_xheight_mode_fraction) &&
+ (in_best_pile || modes_x_count > best_count)) {
+ for (int asc = x + 1; asc < mode_count; asc++) {
+ float ratio =
+ static_cast<float>(modes[asc]) / static_cast<float>(modes[x]);
+ if (textord_ascx_ratio_min < ratio &&
+ ratio < textord_ascx_ratio_max &&
+ (heights->pile_count(modes[asc]) >=
+ blob_count * textord_ascheight_mode_fraction)) {
+ if (modes_x_count > best_count) {
+ in_best_pile = true;
+ best_count = modes_x_count;
+ }
+ if (textord_debug_xheights) {
+ tprintf("X=%d, asc=%d, count=%d, ratio=%g\n",
+ modes[x], modes[asc]-modes[x], modes_x_count, ratio);
+ }
+ prev_size = modes[x];
+ *xheight = static_cast<float>(modes[x]);
+ *ascrise = static_cast<float>(modes[asc] - modes[x]);
+ }
+ }
+ }
+ }
+ if (*xheight == 0) { // single mode
+ // Remove counts of the "floating" blobs (the one whose height is too
+ // small in relation to it's top end of the bounding box) from heights
+ // before computing the single-mode xheight.
+ // Restore the counts in heights after the mode is found, since
+ // floating blobs might be useful for determining potential ascenders
+ // in compute_row_descdrop().
+ if (floating_heights->get_total() > 0) {
+ for (x = min_height; x < max_height; ++x) {
+ heights->add(x, -(floating_heights->pile_count(x)));
+ }
+ blob_index = heights->mode(); // find the modified mode
+ for (x = min_height; x < max_height; ++x) {
+ heights->add(x, floating_heights->pile_count(x));
+ }
+ }
+ *xheight = static_cast<float>(blob_index);
+ *ascrise = 0.0f;
+ best_count = heights->pile_count(blob_index);
+ if (textord_debug_xheights)
+ tprintf("Single mode xheight set to %g\n", *xheight);
+ } else if (textord_debug_xheights) {
+ tprintf("Multi-mode xheight set to %g, asc=%g\n", *xheight, *ascrise);
+ }
+ return best_count;
+}
+
+/**
+ * @name compute_row_descdrop
+ *
+ * Estimates the descdrop of this row. This function looks for
+ * "significant" descenders of lowercase letters (those that could
+ * not just be the small descenders of upper case letters like Q,J).
+ * The function also takes into account how many potential ascenders
+ * this row might contain. If the number of potential ascenders along
+ * with descenders is close to the expected fraction of the total
+ * number of blobs in the row, the function returns the descender
+ * height, returns 0 otherwise.
+ */
+int32_t compute_row_descdrop(TO_ROW *row, float gradient,
+ int xheight_blob_count, STATS *asc_heights) {
+ // Count how many potential ascenders are in this row.
+ int i_min = asc_heights->min_bucket();
+ if ((i_min / row->xheight) < textord_ascx_ratio_min) {
+ i_min = static_cast<int>(
+ floor(row->xheight * textord_ascx_ratio_min + 0.5));
+ }
+ int i_max = asc_heights->max_bucket();
+ if ((i_max / row->xheight) > textord_ascx_ratio_max) {
+ i_max = static_cast<int>(floor(row->xheight * textord_ascx_ratio_max));
+ }
+ int num_potential_asc = 0;
+ for (int i = i_min; i <= i_max; ++i) {
+ num_potential_asc += asc_heights->pile_count(i);
+ }
+ auto min_height =
+ static_cast<int32_t>(floor(row->xheight * textord_descx_ratio_min + 0.5));
+ auto max_height =
+ static_cast<int32_t>(floor(row->xheight * textord_descx_ratio_max));
+ float xcentre; // centre of blob
+ float height; // height of blob
+ BLOBNBOX_IT blob_it = row->blob_list();
+ BLOBNBOX *blob; // current blob
+ STATS heights (min_height, max_height + 1);
+ for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+ blob = blob_it.data();
+ if (!blob->joined_to_prev()) {
+ xcentre = (blob->bounding_box().left() +
+ blob->bounding_box().right()) / 2.0f;
+ height = (gradient * xcentre + row->parallel_c() -
+ blob->bounding_box().bottom());
+ if (height >= min_height && height <= max_height)
+ heights.add(static_cast<int>(floor(height + 0.5)), 1);
+ }
+ }
+ int blob_index = heights.mode(); // find mode
+ int blob_count = heights.pile_count(blob_index); // get count of mode
+ float total_fraction =
+ (textord_descheight_mode_fraction + textord_ascheight_mode_fraction);
+ if (static_cast<float>(blob_count + num_potential_asc) <
+ xheight_blob_count * total_fraction) {
+ blob_count = 0;
+ }
+ int descdrop = blob_count > 0 ? -blob_index : 0;
+ if (textord_debug_xheights) {
+ tprintf("Descdrop: %d (potential ascenders %d, descenders %d)\n",
+ descdrop, num_potential_asc, blob_count);
+ heights.print();
+ }
+ return descdrop;
+}
+
+
+/**
+ * @name compute_height_modes
+ *
+ * Find the top maxmodes values in the input array and put their
+ * indices in the output in the order in which they occurred.
+ */
+int32_t compute_height_modes(STATS *heights, // stats to search
+ int32_t min_height, // bottom of range
+ int32_t max_height, // top of range
+ int32_t *modes, // output array
+ int32_t maxmodes) { // size of modes
+ int32_t pile_count; // no in source pile
+ int32_t src_count; // no of source entries
+ int32_t src_index; // current entry
+ int32_t least_count; // height of smalllest
+ int32_t least_index; // index of least
+ int32_t dest_count; // index in modes
+
+ src_count = max_height + 1 - min_height;
+ dest_count = 0;
+ least_count = INT32_MAX;
+ least_index = -1;
+ for (src_index = 0; src_index < src_count; src_index++) {
+ pile_count = heights->pile_count(min_height + src_index);
+ if (pile_count > 0) {
+ if (dest_count < maxmodes) {
+ if (pile_count < least_count) {
+ // find smallest in array
+ least_count = pile_count;
+ least_index = dest_count;
+ }
+ modes[dest_count++] = min_height + src_index;
+ } else if (pile_count >= least_count) {
+ while (least_index < maxmodes - 1) {
+ modes[least_index] = modes[least_index + 1];
+ // shuffle up
+ least_index++;
+ }
+ // new one on end
+ modes[maxmodes - 1] = min_height + src_index;
+ if (pile_count == least_count) {
+ // new smallest
+ least_index = maxmodes - 1;
+ } else {
+ least_count = heights->pile_count(modes[0]);
+ least_index = 0;
+ for (dest_count = 1; dest_count < maxmodes; dest_count++) {
+ pile_count = heights->pile_count(modes[dest_count]);
+ if (pile_count < least_count) {
+ // find smallest
+ least_count = pile_count;
+ least_index = dest_count;
+ }
+ }
+ }
+ }
+ }
+ }
+ return dest_count;
+}
+
+
+/**
+ * @name correct_row_xheight
+ *
+ * Adjust the xheight etc of this row if not within reasonable limits
+ * of the average for the block.
+ */
+void correct_row_xheight(TO_ROW *row, float xheight,
+ float ascrise, float descdrop) {
+ ROW_CATEGORY row_category = get_row_category(row);
+ if (textord_debug_xheights) {
+ tprintf("correcting row xheight: row->xheight %.4f"
+ ", row->acrise %.4f row->descdrop %.4f\n",
+ row->xheight, row->ascrise, row->descdrop);
+ }
+ bool normal_xheight =
+ within_error_margin(row->xheight, xheight, textord_xheight_error_margin);
+ bool cap_xheight =
+ within_error_margin(row->xheight, xheight + ascrise,
+ textord_xheight_error_margin);
+ // Use the average xheight/ascrise for the following cases:
+ // -- the xheight of the row could not be determined at all
+ // -- the row has descenders (e.g. "many groups", "ISBN 12345 p.3")
+ // and its xheight is close to either cap height or average xheight
+ // -- the row does not have ascenders or descenders, but its xheight
+ // is close to the average block xheight (e.g. row with "www.mmm.com")
+ if (row_category == ROW_ASCENDERS_FOUND) {
+ if (row->descdrop >= 0.0) {
+ row->descdrop = row->xheight * (descdrop / xheight);
+ }
+ } else if (row_category == ROW_INVALID ||
+ (row_category == ROW_DESCENDERS_FOUND &&
+ (normal_xheight || cap_xheight)) ||
+ (row_category == ROW_UNKNOWN && normal_xheight)) {
+ if (textord_debug_xheights) tprintf("using average xheight\n");
+ row->xheight = xheight;
+ row->ascrise = ascrise;
+ row->descdrop = descdrop;
+ } else if (row_category == ROW_DESCENDERS_FOUND) {
+ // Assume this is a row with mostly lowercase letters and it's xheight
+ // is computed correctly (unfortunately there is no way to distinguish
+ // this from the case when descenders are found, but the most common
+ // height is capheight).
+ if (textord_debug_xheights) tprintf("lowercase, corrected ascrise\n");
+ row->ascrise = row->xheight * (ascrise / xheight);
+ } else if (row_category == ROW_UNKNOWN) {
+ // Otherwise assume this row is an all-caps or small-caps row
+ // and adjust xheight and ascrise of the row.
+
+ row->all_caps = true;
+ if (cap_xheight) { // regular all caps
+ if (textord_debug_xheights) tprintf("all caps\n");
+ row->xheight = xheight;
+ row->ascrise = ascrise;
+ row->descdrop = descdrop;
+ } else { // small caps or caps with an odd xheight
+ if (textord_debug_xheights) {
+ if (row->xheight < xheight + ascrise && row->xheight > xheight) {
+ tprintf("small caps\n");
+ } else {
+ tprintf("all caps with irregular xheight\n");
+ }
+ }
+ row->ascrise = row->xheight * (ascrise / (xheight + ascrise));
+ row->xheight -= row->ascrise;
+ row->descdrop = row->xheight * (descdrop / xheight);
+ }
+ }
+ if (textord_debug_xheights) {
+ tprintf("corrected row->xheight = %.4f, row->acrise = %.4f, row->descdrop"
+ " = %.4f\n", row->xheight, row->ascrise, row->descdrop);
+ }
+}
+
+static int CountOverlaps(const TBOX& box, int min_height,
+ BLOBNBOX_LIST* blobs) {
+ int overlaps = 0;
+ BLOBNBOX_IT blob_it(blobs);
+ for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+ BLOBNBOX* blob = blob_it.data();
+ const TBOX &blob_box = blob->bounding_box();
+ if (blob_box.height() >= min_height && box.major_overlap(blob_box)) {
+ ++overlaps;
+ }
+ }
+ return overlaps;
+}
+
+/**
+ * @name separate_underlines
+ *
+ * Test wide objects for being potential underlines. If they are then
+ * put them in a separate list in the block.
+ */
+void separate_underlines(TO_BLOCK* block, // block to do
+ float gradient, // skew angle
+ FCOORD rotation, // inverse landscape
+ bool testing_on) { // correct orientation
+ BLOBNBOX *blob; // current blob
+ C_BLOB *rotated_blob; // rotated blob
+ TO_ROW *row; // current row
+ float length; // of g_vec
+ TBOX blob_box;
+ FCOORD blob_rotation; // inverse of rotation
+ FCOORD g_vec; // skew rotation
+ BLOBNBOX_IT blob_it; // iterator
+ // iterator
+ BLOBNBOX_IT under_it = &block->underlines;
+ BLOBNBOX_IT large_it = &block->large_blobs;
+ TO_ROW_IT row_it = block->get_rows();
+ int min_blob_height = static_cast<int>(textord_min_blob_height_fraction *
+ block->line_size + 0.5);
+
+ // length of vector
+ length = sqrt(1 + gradient * gradient);
+ g_vec = FCOORD(1 / length, -gradient / length);
+ blob_rotation = FCOORD(rotation.x(), -rotation.y());
+ blob_rotation.rotate(g_vec); // undoing everything
+ for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+ row = row_it.data();
+ // get blobs
+ blob_it.set_to_list(row->blob_list());
+ for (blob_it.mark_cycle_pt(); !blob_it.cycled_list();
+ blob_it.forward()) {
+ blob = blob_it.data();
+ blob_box = blob->bounding_box();
+ if (blob_box.width() > block->line_size * textord_underline_width) {
+ ASSERT_HOST(blob->cblob() != nullptr);
+ rotated_blob = crotate_cblob (blob->cblob(),
+ blob_rotation);
+ if (test_underline(
+ testing_on && textord_show_final_rows,
+ rotated_blob, static_cast<int16_t>(row->intercept()),
+ static_cast<int16_t>(
+ block->line_size *
+ (tesseract::CCStruct::kXHeightFraction +
+ tesseract::CCStruct::kAscenderFraction / 2.0f)))) {
+ under_it.add_after_then_move(blob_it.extract());
+ if (testing_on && textord_show_final_rows) {
+ tprintf("Underlined blob at:");
+ rotated_blob->bounding_box().print();
+ tprintf("Was:");
+ blob_box.print();
+ }
+ } else if (CountOverlaps(blob->bounding_box(), min_blob_height,
+ row->blob_list()) >
+ textord_max_blob_overlaps) {
+ large_it.add_after_then_move(blob_it.extract());
+ if (testing_on && textord_show_final_rows) {
+ tprintf("Large blob overlaps %d blobs at:",
+ CountOverlaps(blob_box, min_blob_height,
+ row->blob_list()));
+ blob_box.print();
+ }
+ }
+ delete rotated_blob;
+ }
+ }
+ }
+}
+
+
+/**
+ * @name pre_associate_blobs
+ *
+ * Associate overlapping blobs and fake chop wide blobs.
+ */
+void pre_associate_blobs( //make rough chars
+ ICOORD page_tr, //top right
+ TO_BLOCK* block, //block to do
+ FCOORD rotation, //inverse landscape
+ bool testing_on //correct orientation
+) {
+#ifndef GRAPHICS_DISABLED
+ ScrollView::Color colour; //of boxes
+#endif
+ BLOBNBOX *blob; //current blob
+ BLOBNBOX *nextblob; //next in list
+ TBOX blob_box;
+ FCOORD blob_rotation; //inverse of rotation
+ BLOBNBOX_IT blob_it; //iterator
+ BLOBNBOX_IT start_it; //iterator
+ TO_ROW_IT row_it = block->get_rows ();
+
+#ifndef GRAPHICS_DISABLED
+ colour = ScrollView::RED;
+#endif
+
+ blob_rotation = FCOORD (rotation.x (), -rotation.y ());
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+ //get blobs
+ blob_it.set_to_list (row_it.data ()->blob_list ());
+ for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
+ blob_it.forward ()) {
+ blob = blob_it.data ();
+ blob_box = blob->bounding_box ();
+ start_it = blob_it; //save start point
+ // if (testing_on && textord_show_final_blobs)
+ // {
+ // tprintf("Blob at (%d,%d)->(%d,%d), addr=%x, count=%d\n",
+ // blob_box.left(),blob_box.bottom(),
+ // blob_box.right(),blob_box.top(),
+ // (void*)blob,blob_it.length());
+ // }
+ bool overlap;
+ do {
+ overlap = false;
+ if (!blob_it.at_last ()) {
+ nextblob = blob_it.data_relative(1);
+ overlap = blob_box.major_x_overlap(nextblob->bounding_box());
+ if (overlap) {
+ blob->merge(nextblob); // merge new blob
+ blob_box = blob->bounding_box(); // get bigger box
+ blob_it.forward();
+ }
+ }
+ }
+ while (overlap);
+ blob->chop (&start_it, &blob_it,
+ blob_rotation,
+ block->line_size * tesseract::CCStruct::kXHeightFraction *
+ textord_chop_width);
+ //attempt chop
+ }
+#ifndef GRAPHICS_DISABLED
+ if (testing_on && textord_show_final_blobs) {
+ if (to_win == nullptr)
+ create_to_win(page_tr);
+ to_win->Pen(colour);
+ for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
+ blob_it.forward ()) {
+ blob = blob_it.data ();
+ blob_box = blob->bounding_box ();
+ blob_box.rotate (rotation);
+ if (!blob->joined_to_prev ()) {
+ to_win->Rectangle (blob_box.left (), blob_box.bottom (),
+ blob_box.right (), blob_box.top ());
+ }
+ }
+ colour = static_cast<ScrollView::Color>(colour + 1);
+ if (colour > ScrollView::MAGENTA)
+ colour = ScrollView::RED;
+ }
+#endif
+ }
+}
+
+
+/**
+ * @name fit_parallel_rows
+ *
+ * Re-fit the rows in the block to the given gradient.
+ */
+void fit_parallel_rows( //find lines
+ TO_BLOCK* block, //block to do
+ float gradient, //gradient to fit
+ FCOORD rotation, //for drawing
+ int32_t block_edge, //edge of block
+ bool testing_on //correct orientation
+) {
+#ifndef GRAPHICS_DISABLED
+ ScrollView::Color colour; //of row
+#endif
+ TO_ROW_IT row_it = block->get_rows ();
+
+ row_it.move_to_first ();
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+ if (row_it.data ()->blob_list ()->empty ())
+ delete row_it.extract (); //nothing in it
+ else
+ fit_parallel_lms (gradient, row_it.data ());
+ }
+#ifndef GRAPHICS_DISABLED
+ if (testing_on) {
+ colour = ScrollView::RED;
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+ plot_parallel_row (row_it.data (), gradient,
+ block_edge, colour, rotation);
+ colour = static_cast<ScrollView::Color>(colour + 1);
+ if (colour > ScrollView::MAGENTA)
+ colour = ScrollView::RED;
+ }
+ }
+#endif
+ row_it.sort (row_y_order); //may have gone out of order
+}
+
+
+/**
+ * @name fit_parallel_lms
+ *
+ * Fit an LMS line to a row.
+ * Make the fit parallel to the given gradient and set the
+ * row accordingly.
+ */
+void fit_parallel_lms(float gradient, TO_ROW *row) {
+ float c; // fitted line
+ int blobcount; // no of blobs
+ tesseract::DetLineFit lms;
+ BLOBNBOX_IT blob_it = row->blob_list();
+
+ blobcount = 0;
+ for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+ if (!blob_it.data()->joined_to_prev()) {
+ const TBOX& box = blob_it.data()->bounding_box();
+ lms.Add(ICOORD((box.left() + box.right()) / 2, box.bottom()));
+ blobcount++;
+ }
+ }
+ double error = lms.ConstrainedFit(gradient, &c);
+ row->set_parallel_line(gradient, c, error);
+ if (textord_straight_baselines && blobcount > textord_lms_line_trials) {
+ error = lms.Fit(&gradient, &c);
+ }
+ //set the other too
+ row->set_line(gradient, c, error);
+}
+
+
+/**
+ * @name make_spline_rows
+ *
+ * Re-fit the rows in the block to the given gradient.
+ */
+void Textord::make_spline_rows(TO_BLOCK* block, // block to do
+ float gradient, // gradient to fit
+ bool testing_on) {
+#ifndef GRAPHICS_DISABLED
+ ScrollView::Color colour; //of row
+#endif
+ TO_ROW_IT row_it = block->get_rows ();
+
+ row_it.move_to_first ();
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+ if (row_it.data ()->blob_list ()->empty ())
+ delete row_it.extract (); //nothing in it
+ else
+ make_baseline_spline (row_it.data (), block);
+ }
+ if (textord_old_baselines) {
+#ifndef GRAPHICS_DISABLED
+ if (testing_on) {
+ colour = ScrollView::RED;
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
+ row_it.forward ()) {
+ row_it.data ()->baseline.plot (to_win, colour);
+ colour = static_cast<ScrollView::Color>(colour + 1);
+ if (colour > ScrollView::MAGENTA)
+ colour = ScrollView::RED;
+ }
+ }
+#endif
+ make_old_baselines(block, testing_on, gradient);
+ }
+#ifndef GRAPHICS_DISABLED
+ if (testing_on) {
+ colour = ScrollView::RED;
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+ row_it.data ()->baseline.plot (to_win, colour);
+ colour = static_cast<ScrollView::Color>(colour + 1);
+ if (colour > ScrollView::MAGENTA)
+ colour = ScrollView::RED;
+ }
+ }
+#endif
+}
+
+/**
+ * @name make_baseline_spline
+ *
+ * Fit an LMS line to a row.
+ * Make the fit parallel to the given gradient and set the
+ * row accordingly.
+ */
+void make_baseline_spline(TO_ROW *row, //row to fit
+ TO_BLOCK *block) {
+ double *coeffs; // quadratic coeffs
+ int32_t segments; // no of segments
+
+ // spline boundaries
+ auto *xstarts = new int32_t[row->blob_list()->length() + 1];
+ if (segment_baseline(row, block, segments, xstarts)
+ && !textord_straight_baselines && !textord_parallel_baselines) {
+ coeffs = linear_spline_baseline(row, block, segments, xstarts);
+ } else {
+ xstarts[1] = xstarts[segments];
+ segments = 1;
+ coeffs = new double[3];
+ coeffs[0] = 0;
+ coeffs[1] = row->line_m ();
+ coeffs[2] = row->line_c ();
+ }
+ row->baseline = QSPLINE (segments, xstarts, coeffs);
+ delete[] coeffs;
+ delete[] xstarts;
+}
+
+
+/**
+ * @name segment_baseline
+ *
+ * Divide the baseline up into segments which require a different
+ * quadratic fitted to them.
+ * Return true if enough blobs were far enough away to need a quadratic.
+ */
+bool
+segment_baseline( //split baseline
+ TO_ROW* row, //row to fit
+ TO_BLOCK* block, //block it came from
+ int32_t& segments, //no fo segments
+ int32_t* xstarts //coords of segments
+) {
+ bool needs_curve; //needs curved line
+ int blobcount; //no of blobs
+ int blobindex; //current blob
+ int last_state; //above, on , below
+ int state; //of current blob
+ float yshift; //from baseline
+ TBOX box; //blob box
+ TBOX new_box; //new_it box
+ float middle; //xcentre of blob
+ //blobs
+ BLOBNBOX_IT blob_it = row->blob_list ();
+ BLOBNBOX_IT new_it = blob_it; //front end
+ SORTED_FLOATS yshifts; //shifts from baseline
+
+ needs_curve = false;
+ box = box_next_pre_chopped (&blob_it);
+ xstarts[0] = box.left ();
+ segments = 1;
+ blobcount = row->blob_list ()->length ();
+ if (textord_oldbl_debug)
+ tprintf ("Segmenting baseline of %d blobs at (%d,%d)\n",
+ blobcount, box.left (), box.bottom ());
+ if (blobcount <= textord_spline_medianwin
+ || blobcount < textord_spline_minblobs) {
+ blob_it.move_to_last ();
+ box = blob_it.data ()->bounding_box ();
+ xstarts[1] = box.right ();
+ return false;
+ }
+ last_state = 0;
+ new_it.mark_cycle_pt ();
+ for (blobindex = 0; blobindex < textord_spline_medianwin; blobindex++) {
+ new_box = box_next_pre_chopped (&new_it);
+ middle = (new_box.left () + new_box.right ()) / 2.0;
+ yshift = new_box.bottom () - row->line_m () * middle - row->line_c ();
+ //record shift
+ yshifts.add (yshift, blobindex);
+ if (new_it.cycled_list ()) {
+ xstarts[1] = new_box.right ();
+ return false;
+ }
+ }
+ for (blobcount = 0; blobcount < textord_spline_medianwin / 2; blobcount++)
+ box = box_next_pre_chopped (&blob_it);
+ do {
+ new_box = box_next_pre_chopped (&new_it);
+ //get middle one
+ yshift = yshifts[textord_spline_medianwin / 2];
+ if (yshift > textord_spline_shift_fraction * block->line_size)
+ state = 1;
+ else if (-yshift > textord_spline_shift_fraction * block->line_size)
+ state = -1;
+ else
+ state = 0;
+ if (state != 0)
+ needs_curve = true;
+ // tprintf("State=%d, prev=%d, shift=%g\n",
+ // state,last_state,yshift);
+ if (state != last_state && blobcount > textord_spline_minblobs) {
+ xstarts[segments++] = box.left ();
+ blobcount = 0;
+ }
+ last_state = state;
+ yshifts.remove (blobindex - textord_spline_medianwin);
+ box = box_next_pre_chopped (&blob_it);
+ middle = (new_box.left () + new_box.right ()) / 2.0;
+ yshift = new_box.bottom () - row->line_m () * middle - row->line_c ();
+ yshifts.add (yshift, blobindex);
+ blobindex++;
+ blobcount++;
+ }
+ while (!new_it.cycled_list ());
+ if (blobcount > textord_spline_minblobs || segments == 1) {
+ xstarts[segments] = new_box.right ();
+ }
+ else {
+ xstarts[--segments] = new_box.right ();
+ }
+ if (textord_oldbl_debug)
+ tprintf ("Made %d segments on row at (%d,%d)\n",
+ segments, box.right (), box.bottom ());
+ return needs_curve;
+}
+
+
+/**
+ * @name linear_spline_baseline
+ *
+ * Divide the baseline up into segments which require a different
+ * quadratic fitted to them.
+ * @return true if enough blobs were far enough away to need a quadratic.
+ */
+double *
+linear_spline_baseline ( //split baseline
+TO_ROW * row, //row to fit
+TO_BLOCK * block, //block it came from
+int32_t & segments, //no fo segments
+int32_t xstarts[] //coords of segments
+) {
+ int blobcount; //no of blobs
+ int blobindex; //current blob
+ int index1, index2; //blob numbers
+ int blobs_per_segment; //blobs in each
+ TBOX box; //blob box
+ TBOX new_box; //new_it box
+ //blobs
+ BLOBNBOX_IT blob_it = row->blob_list ();
+ BLOBNBOX_IT new_it = blob_it; //front end
+ float b, c; //fitted curve
+ tesseract::DetLineFit lms;
+ int32_t segment; //current segment
+
+ box = box_next_pre_chopped (&blob_it);
+ xstarts[0] = box.left ();
+ blobcount = 1;
+ while (!blob_it.at_first ()) {
+ blobcount++;
+ box = box_next_pre_chopped (&blob_it);
+ }
+ segments = blobcount / textord_spline_medianwin;
+ if (segments < 1)
+ segments = 1;
+ blobs_per_segment = blobcount / segments;
+ // quadratic coeffs
+ auto *coeffs = new double[segments * 3];
+ if (textord_oldbl_debug)
+ tprintf
+ ("Linear splining baseline of %d blobs at (%d,%d), into %d segments of %d blobs\n",
+ blobcount, box.left (), box.bottom (), segments, blobs_per_segment);
+ segment = 1;
+ for (index2 = 0; index2 < blobs_per_segment / 2; index2++)
+ box_next_pre_chopped(&new_it);
+ index1 = 0;
+ blobindex = index2;
+ do {
+ blobindex += blobs_per_segment;
+ lms.Clear();
+ while (index1 < blobindex || (segment == segments && index1 < blobcount)) {
+ box = box_next_pre_chopped (&blob_it);
+ int middle = (box.left() + box.right()) / 2;
+ lms.Add(ICOORD(middle, box.bottom()));
+ index1++;
+ if (index1 == blobindex - blobs_per_segment / 2
+ || index1 == blobcount - 1) {
+ xstarts[segment] = box.left ();
+ }
+ }
+ lms.Fit(&b, &c);
+ coeffs[segment * 3 - 3] = 0;
+ coeffs[segment * 3 - 2] = b;
+ coeffs[segment * 3 - 1] = c;
+ segment++;
+ if (segment > segments)
+ break;
+
+ blobindex += blobs_per_segment;
+ lms.Clear();
+ while (index2 < blobindex || (segment == segments && index2 < blobcount)) {
+ new_box = box_next_pre_chopped (&new_it);
+ int middle = (new_box.left() + new_box.right()) / 2;
+ lms.Add(ICOORD (middle, new_box.bottom()));
+ index2++;
+ if (index2 == blobindex - blobs_per_segment / 2
+ || index2 == blobcount - 1) {
+ xstarts[segment] = new_box.left ();
+ }
+ }
+ lms.Fit(&b, &c);
+ coeffs[segment * 3 - 3] = 0;
+ coeffs[segment * 3 - 2] = b;
+ coeffs[segment * 3 - 1] = c;
+ segment++;
+ }
+ while (segment <= segments);
+ return coeffs;
+}
+
+
+/**
+ * @name assign_blobs_to_rows
+ *
+ * Make enough rows to allocate all the given blobs to one.
+ * If a block skew is given, use that, else attempt to track it.
+ */
+void assign_blobs_to_rows( //find lines
+ TO_BLOCK* block, //block to do
+ float* gradient, //block skew
+ int pass, //identification
+ bool reject_misses, //chuck big ones out
+ bool make_new_rows, //add rows for unmatched
+ bool drawing_skew //draw smoothed skew
+) {
+ OVERLAP_STATE overlap_result; //what to do with it
+ float ycoord; //current y
+ float top, bottom; //of blob
+ float g_length = 1.0f; //from gradient
+ int16_t row_count; //no of rows
+ int16_t left_x; //left edge
+ int16_t last_x; //previous edge
+ float block_skew; //y delta
+ float smooth_factor; //for new coords
+ float near_dist; //dist to nearest row
+ ICOORD testpt; //testing only
+ BLOBNBOX *blob; //current blob
+ TO_ROW *row; //current row
+ TO_ROW *dest_row = nullptr; //row to put blob in
+ //iterators
+ BLOBNBOX_IT blob_it = &block->blobs;
+ TO_ROW_IT row_it = block->get_rows ();
+
+ ycoord =
+ (block->block->pdblk.bounding_box ().bottom () +
+ block->block->pdblk.bounding_box ().top ()) / 2.0f;
+ if (gradient != nullptr)
+ g_length = sqrt (1 + *gradient * *gradient);
+#ifndef GRAPHICS_DISABLED
+ if (drawing_skew)
+ to_win->SetCursor(block->block->pdblk.bounding_box ().left (), ycoord);
+#endif
+ testpt = ICOORD (textord_test_x, textord_test_y);
+ blob_it.sort (blob_x_order);
+ smooth_factor = 1.0;
+ block_skew = 0.0f;
+ row_count = row_it.length (); //might have rows
+ if (!blob_it.empty ()) {
+ left_x = blob_it.data ()->bounding_box ().left ();
+ }
+ else {
+ left_x = block->block->pdblk.bounding_box ().left ();
+ }
+ last_x = left_x;
+ for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
+ blob = blob_it.data ();
+ if (gradient != nullptr) {
+ block_skew = (1 - 1 / g_length) * blob->bounding_box ().bottom ()
+ + *gradient / g_length * blob->bounding_box ().left ();
+ }
+ else if (blob->bounding_box ().left () - last_x > block->line_size / 2
+ && last_x - left_x > block->line_size * 2
+ && textord_interpolating_skew) {
+ // tprintf("Interpolating skew from %g",block_skew);
+ block_skew *= static_cast<float>(blob->bounding_box ().left () - left_x)
+ / (last_x - left_x);
+ // tprintf("to %g\n",block_skew);
+ }
+ last_x = blob->bounding_box ().left ();
+ top = blob->bounding_box ().top () - block_skew;
+ bottom = blob->bounding_box ().bottom () - block_skew;
+#ifndef GRAPHICS_DISABLED
+ if (drawing_skew)
+ to_win->DrawTo(blob->bounding_box ().left (), ycoord + block_skew);
+#endif
+ if (!row_it.empty ()) {
+ for (row_it.move_to_first ();
+ !row_it.at_last () && row_it.data ()->min_y () > top;
+ row_it.forward ());
+ row = row_it.data ();
+ if (row->min_y () <= top && row->max_y () >= bottom) {
+ //any overlap
+ dest_row = row;
+ overlap_result = most_overlapping_row (&row_it, dest_row,
+ top, bottom,
+ block->line_size,
+ blob->bounding_box ().
+ contains (testpt));
+ if (overlap_result == NEW_ROW && !reject_misses)
+ overlap_result = ASSIGN;
+ }
+ else {
+ overlap_result = NEW_ROW;
+ if (!make_new_rows) {
+ near_dist = row_it.data_relative (-1)->min_y () - top;
+ //below bottom
+ if (bottom < row->min_y ()) {
+ if (row->min_y () - bottom <=
+ (block->line_spacing -
+ block->line_size) * tesseract::CCStruct::kDescenderFraction) {
+ //done it
+ overlap_result = ASSIGN;
+ dest_row = row;
+ }
+ }
+ else if (near_dist > 0
+ && near_dist < bottom - row->max_y ()) {
+ row_it.backward ();
+ dest_row = row_it.data ();
+ if (dest_row->min_y () - bottom <=
+ (block->line_spacing -
+ block->line_size) * tesseract::CCStruct::kDescenderFraction) {
+ //done it
+ overlap_result = ASSIGN;
+ }
+ }
+ else {
+ if (top - row->max_y () <=
+ (block->line_spacing -
+ block->line_size) * (textord_overlap_x +
+ tesseract::CCStruct::kAscenderFraction)) {
+ //done it
+ overlap_result = ASSIGN;
+ dest_row = row;
+ }
+ }
+ }
+ }
+ if (overlap_result == ASSIGN)
+ dest_row->add_blob (blob_it.extract (), top, bottom,
+ block->line_size);
+ if (overlap_result == NEW_ROW) {
+ if (make_new_rows && top - bottom < block->max_blob_size) {
+ dest_row =
+ new TO_ROW (blob_it.extract (), top, bottom,
+ block->line_size);
+ row_count++;
+ if (bottom > row_it.data ()->min_y ())
+ row_it.add_before_then_move (dest_row);
+ //insert in right place
+ else
+ row_it.add_after_then_move (dest_row);
+ smooth_factor =
+ 1.0 / (row_count * textord_skew_lag +
+ textord_skewsmooth_offset);
+ }
+ else
+ overlap_result = REJECT;
+ }
+ }
+ else if (make_new_rows && top - bottom < block->max_blob_size) {
+ overlap_result = NEW_ROW;
+ dest_row =
+ new TO_ROW(blob_it.extract(), top, bottom, block->line_size);
+ row_count++;
+ row_it.add_after_then_move(dest_row);
+ smooth_factor = 1.0 / (row_count * textord_skew_lag +
+ textord_skewsmooth_offset2);
+ }
+ else
+ overlap_result = REJECT;
+ if (blob->bounding_box ().contains(testpt) && textord_debug_blob) {
+ if (overlap_result != REJECT) {
+ tprintf("Test blob assigned to row at (%g,%g) on pass %d\n",
+ dest_row->min_y(), dest_row->max_y(), pass);
+ }
+ else {
+ tprintf("Test blob assigned to no row on pass %d\n", pass);
+ }
+ }
+ if (overlap_result != REJECT) {
+ while (!row_it.at_first() &&
+ row_it.data()->min_y() > row_it.data_relative(-1)->min_y()) {
+ row = row_it.extract();
+ row_it.backward();
+ row_it.add_before_then_move(row);
+ }
+ while (!row_it.at_last() &&
+ row_it.data ()->min_y() < row_it.data_relative (1)->min_y()) {
+ row = row_it.extract();
+ row_it.forward();
+ // Keep rows in order.
+ row_it.add_after_then_move(row);
+ }
+ BLOBNBOX_IT added_blob_it(dest_row->blob_list());
+ added_blob_it.move_to_last();
+ TBOX prev_box = added_blob_it.data_relative(-1)->bounding_box();
+ if (dest_row->blob_list()->singleton() ||
+ !prev_box.major_x_overlap(blob->bounding_box())) {
+ block_skew = (1 - smooth_factor) * block_skew
+ + smooth_factor * (blob->bounding_box().bottom() -
+ dest_row->initial_min_y());
+ }
+ }
+ }
+ for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+ if (row_it.data()->blob_list()->empty())
+ delete row_it.extract(); // Discard empty rows.
+ }
+}
+
+
+/**
+ * @name most_overlapping_row
+ *
+ * Return the row which most overlaps the blob.
+ */
+OVERLAP_STATE most_overlapping_row( //find best row
+ TO_ROW_IT* row_it, //iterator
+ TO_ROW*& best_row, //output row
+ float top, //top of blob
+ float bottom, //bottom of blob
+ float rowsize, //max row size
+ bool testing_blob //test stuff
+) {
+ OVERLAP_STATE result; //result of tests
+ float overlap; //of blob & row
+ float bestover; //nearest row
+ float merge_top, merge_bottom; //size of merged row
+ ICOORD testpt; //testing only
+ TO_ROW *row; //current row
+ TO_ROW *test_row; //for multiple overlaps
+ BLOBNBOX_IT blob_it; //for merging rows
+
+ result = ASSIGN;
+ row = row_it->data ();
+ bestover = top - bottom;
+ if (top > row->max_y ())
+ bestover -= top - row->max_y ();
+ if (bottom < row->min_y ())
+ //compute overlap
+ bestover -= row->min_y () - bottom;
+ if (testing_blob && textord_debug_blob) {
+ tprintf("Test blob y=(%g,%g), row=(%f,%f), size=%g, overlap=%f\n",
+ bottom, top, row->min_y(), row->max_y(), rowsize, bestover);
+ }
+ test_row = row;
+ do {
+ if (!row_it->at_last ()) {
+ row_it->forward ();
+ test_row = row_it->data ();
+ if (test_row->min_y () <= top && test_row->max_y () >= bottom) {
+ merge_top =
+ test_row->max_y () >
+ row->max_y ()? test_row->max_y () : row->max_y ();
+ merge_bottom =
+ test_row->min_y () <
+ row->min_y ()? test_row->min_y () : row->min_y ();
+ if (merge_top - merge_bottom <= rowsize) {
+ if (testing_blob && textord_debug_blob) {
+ tprintf ("Merging rows at (%g,%g), (%g,%g)\n",
+ row->min_y (), row->max_y (),
+ test_row->min_y (), test_row->max_y ());
+ }
+ test_row->set_limits (merge_bottom, merge_top);
+ blob_it.set_to_list (test_row->blob_list ());
+ blob_it.add_list_after (row->blob_list ());
+ blob_it.sort (blob_x_order);
+ row_it->backward ();
+ delete row_it->extract ();
+ row_it->forward ();
+ bestover = -1.0f; //force replacement
+ }
+ overlap = top - bottom;
+ if (top > test_row->max_y ())
+ overlap -= top - test_row->max_y ();
+ if (bottom < test_row->min_y ())
+ overlap -= test_row->min_y () - bottom;
+ if (bestover >= rowsize - 1 && overlap >= rowsize - 1) {
+ result = REJECT;
+ }
+ if (overlap > bestover) {
+ bestover = overlap; //find biggest overlap
+ row = test_row;
+ }
+ if (testing_blob && textord_debug_blob) {
+ tprintf("Test blob y=(%g,%g), row=(%f,%f), size=%g, overlap=%f->%f\n",
+ bottom, top, test_row->min_y(), test_row->max_y(),
+ rowsize, overlap, bestover);
+ }
+ }
+ }
+ }
+ while (!row_it->at_last ()
+ && test_row->min_y () <= top && test_row->max_y () >= bottom);
+ while (row_it->data () != row)
+ row_it->backward (); //make it point to row
+ //doesn't overlap much
+ if (top - bottom - bestover > rowsize * textord_overlap_x &&
+ (!textord_fix_makerow_bug || bestover < rowsize * textord_overlap_x)
+ && result == ASSIGN)
+ result = NEW_ROW; //doesn't overlap enough
+ best_row = row;
+ return result;
+}
+
+
+/**
+ * @name blob_x_order
+ *
+ * Sort function to sort blobs in x from page left.
+ */
+int blob_x_order( //sort function
+ const void *item1, //items to compare
+ const void *item2) {
+ //converted ptr
+ const BLOBNBOX *blob1 = *reinterpret_cast<const BLOBNBOX* const*>(item1);
+ //converted ptr
+ const BLOBNBOX *blob2 = *reinterpret_cast<const BLOBNBOX* const*>(item2);
+
+ if (blob1->bounding_box ().left () < blob2->bounding_box ().left ())
+ return -1;
+ else if (blob1->bounding_box ().left () > blob2->bounding_box ().left ())
+ return 1;
+ else
+ return 0;
+}
+
+
+/**
+ * @name row_y_order
+ *
+ * Sort function to sort rows in y from page top.
+ */
+int row_y_order( //sort function
+ const void *item1, //items to compare
+ const void *item2) {
+ //converted ptr
+ const TO_ROW *row1 = *reinterpret_cast<const TO_ROW* const*>(item1);
+ //converted ptr
+ const TO_ROW *row2 = *reinterpret_cast<const TO_ROW* const*>(item2);
+
+ if (row1->parallel_c () > row2->parallel_c ())
+ return -1;
+ else if (row1->parallel_c () < row2->parallel_c ())
+ return 1;
+ else
+ return 0;
+}
+
+
+/**
+ * @name row_spacing_order
+ *
+ * Qsort style function to compare 2 TO_ROWS based on their spacing value.
+ */
+int row_spacing_order( //sort function
+ const void *item1, //items to compare
+ const void *item2) {
+ //converted ptr
+ const TO_ROW *row1 = *reinterpret_cast<const TO_ROW* const*>(item1);
+ //converted ptr
+ const TO_ROW *row2 = *reinterpret_cast<const TO_ROW* const*>(item2);
+
+ if (row1->spacing < row2->spacing)
+ return -1;
+ else if (row1->spacing > row2->spacing)
+ return 1;
+ else
+ return 0;
+}
+
+/**
+ * @name mark_repeated_chars
+ *
+ * Mark blobs marked with BTFT_LEADER in repeated sets using the
+ * repeated_set member of BLOBNBOX.
+ */
+void mark_repeated_chars(TO_ROW *row) {
+ BLOBNBOX_IT box_it(row->blob_list()); // Iterator.
+ int num_repeated_sets = 0;
+ if (!box_it.empty()) {
+ do {
+ BLOBNBOX* bblob = box_it.data();
+ int repeat_length = 1;
+ if (bblob->flow() == BTFT_LEADER &&
+ !bblob->joined_to_prev() && bblob->cblob() != nullptr) {
+ BLOBNBOX_IT test_it(box_it);
+ for (test_it.forward(); !test_it.at_first();) {
+ bblob = test_it.data();
+ if (bblob->flow() != BTFT_LEADER)
+ break;
+ test_it.forward();
+ bblob = test_it.data();
+ if (bblob->joined_to_prev() || bblob->cblob() == nullptr) {
+ repeat_length = 0;
+ break;
+ }
+ ++repeat_length;
+ }
+ }
+ if (repeat_length >= kMinLeaderCount) {
+ num_repeated_sets++;
+ for (; repeat_length > 0; box_it.forward(), --repeat_length) {
+ bblob = box_it.data();
+ bblob->set_repeated_set(num_repeated_sets);
+ }
+ } else {
+ bblob->set_repeated_set(0);
+ box_it.forward();
+ }
+ } while (!box_it.at_first()); // until all done
+ }
+ row->set_num_repeated_sets(num_repeated_sets);
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/textord/makerow.h b/tesseract/src/textord/makerow.h
new file mode 100644
index 00000000..c9e1e5e6
--- /dev/null
+++ b/tesseract/src/textord/makerow.h
@@ -0,0 +1,291 @@
+/**********************************************************************
+ * File: makerow.h (Formerly makerows.h)
+ * Description: Code to arrange blobs into rows of text.
+ * Author: Ray Smith
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef MAKEROW_H
+#define MAKEROW_H
+
+#include "params.h"
+#include "ocrblock.h"
+#include "blobs.h"
+#include "blobbox.h"
+#include "statistc.h"
+
+namespace tesseract {
+
+enum OVERLAP_STATE
+{
+ ASSIGN, //assign it to row
+ REJECT, //reject it - dual overlap
+ NEW_ROW
+};
+
+enum ROW_CATEGORY {
+ ROW_ASCENDERS_FOUND,
+ ROW_DESCENDERS_FOUND,
+ ROW_UNKNOWN,
+ ROW_INVALID,
+};
+
+extern BOOL_VAR_H(textord_heavy_nr, false, "Vigorously remove noise");
+extern BOOL_VAR_H (textord_show_initial_rows, false,
+"Display row accumulation");
+extern BOOL_VAR_H (textord_show_parallel_rows, false,
+"Display page correlated rows");
+extern BOOL_VAR_H (textord_show_expanded_rows, false,
+"Display rows after expanding");
+extern BOOL_VAR_H (textord_show_final_rows, false,
+"Display rows after final fitting");
+extern BOOL_VAR_H (textord_show_final_blobs, false,
+"Display blob bounds after pre-ass");
+extern BOOL_VAR_H (textord_test_landscape, false, "Tests refer to land/port");
+extern BOOL_VAR_H (textord_parallel_baselines, true,
+"Force parallel baselines");
+extern BOOL_VAR_H (textord_straight_baselines, false,
+"Force straight baselines");
+extern BOOL_VAR_H (textord_quadratic_baselines, false,
+"Use quadratic splines");
+extern BOOL_VAR_H (textord_old_baselines, true, "Use old baseline algorithm");
+extern BOOL_VAR_H (textord_old_xheight, true, "Use old xheight algorithm");
+extern BOOL_VAR_H (textord_fix_xheight_bug, true, "Use spline baseline");
+extern BOOL_VAR_H (textord_fix_makerow_bug, true,
+"Prevent multiple baselines");
+extern BOOL_VAR_H (textord_cblob_blockocc, true,
+"Use new projection for underlines");
+extern BOOL_VAR_H (textord_debug_xheights, false, "Test xheight algorithms");
+extern INT_VAR_H (textord_test_x, -INT32_MAX, "coord of test pt");
+extern INT_VAR_H (textord_test_y, -INT32_MAX, "coord of test pt");
+extern INT_VAR_H (textord_min_blobs_in_row, 4,
+"Min blobs before gradient counted");
+extern INT_VAR_H (textord_spline_minblobs, 8,
+"Min blobs in each spline segment");
+extern INT_VAR_H (textord_spline_medianwin, 6,
+"Size of window for spline segmentation");
+extern INT_VAR_H (textord_min_xheight, 10, "Min credible pixel xheight");
+extern double_VAR_H (textord_spline_shift_fraction, 0.02,
+"Fraction of line spacing for quad");
+extern double_VAR_H (textord_spline_outlier_fraction, 0.1,
+"Fraction of line spacing for outlier");
+extern double_VAR_H (textord_skew_ile, 0.5, "Ile of gradients for page skew");
+extern double_VAR_H (textord_skew_lag, 0.75,
+"Lag for skew on row accumulation");
+extern double_VAR_H (textord_linespace_iqrlimit, 0.2,
+"Max iqr/median for linespace");
+extern double_VAR_H (textord_width_limit, 8,
+"Max width of blobs to make rows");
+extern double_VAR_H (textord_chop_width, 1.5, "Max width before chopping");
+extern double_VAR_H (textord_minxh, 0.25,
+"fraction of linesize for min xheight");
+extern double_VAR_H (textord_min_linesize, 1.25,
+"* blob height for initial linesize");
+extern double_VAR_H (textord_excess_blobsize, 1.3,
+"New row made if blob makes row this big");
+extern double_VAR_H (textord_occupancy_threshold, 0.4,
+"Fraction of neighbourhood");
+extern double_VAR_H (textord_underline_width, 2.0,
+"Multiple of line_size for underline");
+extern double_VAR_H(textord_min_blob_height_fraction, 0.75,
+"Min blob height/top to include blob top into xheight stats");
+extern double_VAR_H (textord_xheight_mode_fraction, 0.4,
+"Min pile height to make xheight");
+extern double_VAR_H (textord_ascheight_mode_fraction, 0.15,
+"Min pile height to make ascheight");
+extern double_VAR_H (textord_ascx_ratio_min, 1.2, "Min cap/xheight");
+extern double_VAR_H (textord_ascx_ratio_max, 1.7, "Max cap/xheight");
+extern double_VAR_H (textord_descx_ratio_min, 0.15, "Min desc/xheight");
+extern double_VAR_H (textord_descx_ratio_max, 0.6, "Max desc/xheight");
+extern double_VAR_H (textord_xheight_error_margin, 0.1, "Accepted variation");
+extern INT_VAR_H (textord_lms_line_trials, 12, "Number of linew fits to do");
+extern BOOL_VAR_H (textord_new_initial_xheight, true,
+"Use test xheight mechanism");
+extern BOOL_VAR_H(textord_debug_blob, false, "Print test blob information");
+
+inline void get_min_max_xheight(int block_linesize,
+ int *min_height, int *max_height) {
+ *min_height = static_cast<int32_t>(floor(block_linesize * textord_minxh));
+ if (*min_height < textord_min_xheight) *min_height = textord_min_xheight;
+ *max_height = static_cast<int32_t>(ceil(block_linesize * 3.0));
+}
+
+inline ROW_CATEGORY get_row_category(const TO_ROW *row) {
+ if (row->xheight <= 0) return ROW_INVALID;
+ return (row->ascrise > 0) ? ROW_ASCENDERS_FOUND :
+ (row->descdrop != 0) ? ROW_DESCENDERS_FOUND : ROW_UNKNOWN;
+}
+
+inline bool within_error_margin(float test, float num, float margin) {
+ return (test >= num * (1 - margin) && test <= num * (1 + margin));
+}
+
+void fill_heights(TO_ROW *row, float gradient, int min_height,
+ int max_height, STATS *heights, STATS *floating_heights);
+
+float make_single_row(ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK* block,
+ TO_BLOCK_LIST* blocks);
+float make_rows(ICOORD page_tr, // top right
+ TO_BLOCK_LIST *port_blocks);
+void make_initial_textrows(ICOORD page_tr,
+ TO_BLOCK* block, // block to do
+ FCOORD rotation, // for drawing
+ bool testing_on); // correct orientation
+void fit_lms_line(TO_ROW *row);
+void compute_page_skew(TO_BLOCK_LIST *blocks, // list of blocks
+ float &page_m, // average gradient
+ float &page_err); // average error
+void vigorous_noise_removal(TO_BLOCK* block);
+void cleanup_rows_making(ICOORD page_tr, // top right
+ TO_BLOCK* block, // block to do
+ float gradient, // gradient to fit
+ FCOORD rotation, // for drawing
+ int32_t block_edge, // edge of block
+ bool testing_on); // correct orientation
+void delete_non_dropout_rows( //find lines
+ TO_BLOCK* block, //block to do
+ float gradient, //global skew
+ FCOORD rotation, //deskew vector
+ int32_t block_edge, //left edge
+ bool testing_on //correct orientation
+);
+bool find_best_dropout_row( //find neighbours
+ TO_ROW* row, //row to test
+ int32_t distance, //dropout dist
+ float dist_limit, //threshold distance
+ int32_t line_index, //index of row
+ TO_ROW_IT* row_it, //current position
+ bool testing_on //correct orientation
+);
+TBOX deskew_block_coords( //block box
+ TO_BLOCK *block, //block to do
+ float gradient //global skew
+ );
+void compute_line_occupation( //project blobs
+ TO_BLOCK *block, //block to do
+ float gradient, //global skew
+ int32_t min_y, //min coord in block
+ int32_t max_y, //in block
+ int32_t *occupation, //output projection
+ int32_t *deltas //derivative
+ );
+void compute_occupation_threshold( //project blobs
+ int32_t low_window, //below result point
+ int32_t high_window, //above result point
+ int32_t line_count, //array sizes
+ int32_t *occupation, //input projection
+ int32_t *thresholds //output thresholds
+ );
+void compute_dropout_distances( //project blobs
+ int32_t *occupation, //input projection
+ int32_t *thresholds, //output thresholds
+ int32_t line_count //array sizes
+ );
+void expand_rows( //find lines
+ ICOORD page_tr, //top right
+ TO_BLOCK* block, //block to do
+ float gradient, //gradient to fit
+ FCOORD rotation, //for drawing
+ int32_t block_edge, //edge of block
+ bool testing_on //correct orientation
+);
+void adjust_row_limits( //tidy limits
+ TO_BLOCK *block //block to do
+ );
+void compute_row_stats( //find lines
+ TO_BLOCK* block, //block to do
+ bool testing_on //correct orientation
+);
+float median_block_xheight( //find lines
+ TO_BLOCK *block, //block to do
+ float gradient //global skew
+ );
+
+int compute_xheight_from_modes(
+ STATS *heights, STATS *floating_heights, bool cap_only, int min_height,
+ int max_height, float *xheight, float *ascrise);
+
+int32_t compute_row_descdrop(TO_ROW *row, // row to do
+ float gradient, // global skew
+ int xheight_blob_count,
+ STATS *heights);
+int32_t compute_height_modes(STATS *heights, // stats to search
+ int32_t min_height, // bottom of range
+ int32_t max_height, // top of range
+ int32_t *modes, // output array
+ int32_t maxmodes); // size of modes
+void correct_row_xheight(TO_ROW *row, // row to fix
+ float xheight, // average values
+ float ascrise,
+ float descdrop);
+void separate_underlines(TO_BLOCK* block, // block to do
+ float gradient, // skew angle
+ FCOORD rotation, // inverse landscape
+ bool testing_on); // correct orientation
+void pre_associate_blobs(ICOORD page_tr, // top right
+ TO_BLOCK* block, // block to do
+ FCOORD rotation, // inverse landscape
+ bool testing_on); // correct orientation
+void fit_parallel_rows(TO_BLOCK* block, // block to do
+ float gradient, // gradient to fit
+ FCOORD rotation, // for drawing
+ int32_t block_edge, // edge of block
+ bool testing_on); // correct orientation
+void fit_parallel_lms(float gradient, // forced gradient
+ TO_ROW *row); // row to fit
+void make_baseline_spline(TO_ROW *row, // row to fit
+ TO_BLOCK *block); // block it came from
+bool segment_baseline( //split baseline
+ TO_ROW* row, //row to fit
+ TO_BLOCK* block, //block it came from
+ int32_t& segments, //no fo segments
+ int32_t* xstarts //coords of segments
+);
+double *linear_spline_baseline ( //split baseline
+TO_ROW * row, //row to fit
+TO_BLOCK * block, //block it came from
+int32_t & segments, //no fo segments
+int32_t xstarts[] //coords of segments
+);
+void assign_blobs_to_rows( //find lines
+ TO_BLOCK* block, //block to do
+ float* gradient, //block skew
+ int pass, //identification
+ bool reject_misses, //chuck big ones out
+ bool make_new_rows, //add rows for unmatched
+ bool drawing_skew //draw smoothed skew
+);
+ //find best row
+OVERLAP_STATE most_overlapping_row(TO_ROW_IT* row_it, //iterator
+ TO_ROW*& best_row, //output row
+ float top, //top of blob
+ float bottom, //bottom of blob
+ float rowsize, //max row size
+ bool testing_blob //test stuff
+ );
+int blob_x_order( //sort function
+ const void *item1, //items to compare
+ const void *item2);
+int row_y_order( //sort function
+ const void *item1, //items to compare
+ const void *item2);
+int row_spacing_order( //sort function
+ const void *item1, //items to compare
+ const void *item2);
+
+void mark_repeated_chars(TO_ROW *row);
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/textord/oldbasel.cpp b/tesseract/src/textord/oldbasel.cpp
new file mode 100644
index 00000000..f8dadc33
--- /dev/null
+++ b/tesseract/src/textord/oldbasel.cpp
@@ -0,0 +1,1698 @@
+/**********************************************************************
+ * File: oldbasel.cpp (Formerly oldbl.c)
+ * Description: A re-implementation of the old baseline algorithm.
+ * Author: Ray Smith
+ *
+ * (C) Copyright 1993, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+ // Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "oldbasel.h"
+
+#include "ccstruct.h"
+#include "statistc.h"
+#include "quadlsq.h"
+#include "detlinefit.h"
+#include "makerow.h"
+#include "drawtord.h"
+#include "textord.h"
+#include "tprintf.h"
+
+#include <vector> // for std::vector
+
+#include <algorithm>
+
+namespace tesseract {
+
+static BOOL_VAR (textord_really_old_xheight, false,
+"Use original wiseowl xheight");
+BOOL_VAR (textord_oldbl_debug, false, "Debug old baseline generation");
+static BOOL_VAR (textord_debug_baselines, false, "Debug baseline generation");
+static BOOL_VAR (textord_oldbl_paradef, true, "Use para default mechanism");
+static BOOL_VAR (textord_oldbl_split_splines, true, "Split stepped splines");
+static BOOL_VAR (textord_oldbl_merge_parts, true, "Merge suspect partitions");
+static BOOL_VAR (oldbl_corrfix, true, "Improve correlation of heights");
+static BOOL_VAR (oldbl_xhfix, false,
+"Fix bug in modes threshold for xheights");
+static BOOL_VAR(textord_ocropus_mode, false, "Make baselines for ocropus");
+static double_VAR (oldbl_xhfract, 0.4, "Fraction of est allowed in calc");
+static INT_VAR (oldbl_holed_losscount, 10,
+"Max lost before fallback line used");
+static double_VAR (oldbl_dot_error_size, 1.26, "Max aspect ratio of a dot");
+static double_VAR (textord_oldbl_jumplimit, 0.15,
+"X fraction for new partition");
+
+#define TURNLIMIT 1 /*min size for turning point */
+#define X_HEIGHT_FRACTION 0.7 /*x-height/caps height */
+#define DESCENDER_FRACTION 0.5 /*descender/x-height */
+#define MIN_ASC_FRACTION 0.20 /*min size of ascenders */
+#define MIN_DESC_FRACTION 0.25 /*min size of descenders */
+#define MINASCRISE 2.0 /*min ascender/desc step */
+#define MAXHEIGHTVARIANCE 0.15 /*accepted variation in x-height */
+#define MAXHEIGHT 300 /*max blob height */
+#define MAXOVERLAP 0.1 /*max 10% missed overlap */
+#define MAXBADRUN 2 /*max non best for failed */
+#define HEIGHTBUCKETS 200 /* Num of buckets */
+#define MODENUM 10
+#define MAXPARTS 6
+#define SPLINESIZE 23
+
+#define ABS(x) ((x)<0 ? (-(x)) : (x))
+
+/**********************************************************************
+ * make_old_baselines
+ *
+ * Top level function to make baselines the old way.
+ **********************************************************************/
+
+void Textord::make_old_baselines(TO_BLOCK* block, // block to do
+ bool testing_on, // correct orientation
+ float gradient) {
+ QSPLINE *prev_baseline; // baseline of previous row
+ TO_ROW *row; // current row
+ TO_ROW_IT row_it = block->get_rows();
+ BLOBNBOX_IT blob_it;
+
+ prev_baseline = nullptr; // nothing yet
+ for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+ row = row_it.data();
+ find_textlines(block, row, 2, nullptr);
+ if (row->xheight <= 0 && prev_baseline != nullptr)
+ find_textlines(block, row, 2, prev_baseline);
+ if (row->xheight > 0) { // was a good one
+ prev_baseline = &row->baseline;
+ } else {
+ prev_baseline = nullptr;
+ blob_it.set_to_list(row->blob_list());
+ if (textord_debug_baselines)
+ tprintf("Row baseline generation failed on row at (%d,%d)\n",
+ blob_it.data()->bounding_box().left(),
+ blob_it.data()->bounding_box().bottom());
+ }
+ }
+ correlate_lines(block, gradient);
+ block->block->set_xheight(block->xheight);
+}
+
+
+/**********************************************************************
+ * correlate_lines
+ *
+ * Correlate the x-heights and ascender heights of a block to fill-in
+ * the ascender height and descender height for rows without one.
+ * Also fix baselines of rows without a decent fit.
+ **********************************************************************/
+
+void Textord::correlate_lines(TO_BLOCK *block, float gradient) {
+ int rowcount; /*no of rows to do */
+ int rowindex; /*no of row */
+ // iterator
+ TO_ROW_IT row_it = block->get_rows ();
+
+ rowcount = row_it.length ();
+ if (rowcount == 0) {
+ //default value
+ block->xheight = block->line_size;
+ return; /*none to do */
+ }
+ // array of ptrs
+ std::vector <TO_ROW *> rows(rowcount);
+ rowindex = 0;
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
+ //make array
+ rows[rowindex++] = row_it.data ();
+
+ /*try to fix bad lines */
+ correlate_neighbours(block, &rows[0], rowcount);
+
+ if (textord_really_old_xheight || textord_old_xheight) {
+ block->xheight = static_cast<float>(correlate_with_stats(&rows[0], rowcount, block));
+ if (block->xheight <= 0)
+ block->xheight = block->line_size * tesseract::CCStruct::kXHeightFraction;
+ if (block->xheight < textord_min_xheight)
+ block->xheight = (float) textord_min_xheight;
+ } else {
+ compute_block_xheight(block, gradient);
+ }
+}
+
+
+/**********************************************************************
+ * correlate_neighbours
+ *
+ * Try to fix rows that had a bad spline fit by using neighbours.
+ **********************************************************************/
+
+void Textord::correlate_neighbours(TO_BLOCK *block, // block rows are in.
+ TO_ROW **rows, // rows of block.
+ int rowcount) { // no of rows to do.
+ TO_ROW *row; /*current row */
+ int rowindex; /*no of row */
+ int otherrow; /*second row */
+ int upperrow; /*row above to use */
+ int lowerrow; /*row below to use */
+ float biggest;
+
+ for (rowindex = 0; rowindex < rowcount; rowindex++) {
+ row = rows[rowindex]; /*current row */
+ if (row->xheight < 0) {
+ /*quadratic failed */
+ for (otherrow = rowindex - 2;
+ otherrow >= 0
+ && (rows[otherrow]->xheight < 0.0
+ || !row->baseline.overlap (&rows[otherrow]->baseline,
+ MAXOVERLAP)); otherrow--);
+ upperrow = otherrow; /*decent row above */
+ for (otherrow = rowindex + 1;
+ otherrow < rowcount
+ && (rows[otherrow]->xheight < 0.0
+ || !row->baseline.overlap (&rows[otherrow]->baseline,
+ MAXOVERLAP)); otherrow++);
+ lowerrow = otherrow; /*decent row below */
+ if (upperrow >= 0)
+ find_textlines(block, row, 2, &rows[upperrow]->baseline);
+ if (row->xheight < 0 && lowerrow < rowcount)
+ find_textlines(block, row, 2, &rows[lowerrow]->baseline);
+ if (row->xheight < 0) {
+ if (upperrow >= 0)
+ find_textlines(block, row, 1, &rows[upperrow]->baseline);
+ else if (lowerrow < rowcount)
+ find_textlines(block, row, 1, &rows[lowerrow]->baseline);
+ }
+ }
+ }
+
+ for (biggest = 0.0f, rowindex = 0; rowindex < rowcount; rowindex++) {
+ row = rows[rowindex]; /*current row */
+ if (row->xheight < 0) /*linear failed */
+ /*make do */
+ row->xheight = -row->xheight;
+ biggest = std::max(biggest, row->xheight);
+ }
+}
+
+
+/**********************************************************************
+ * correlate_with_stats
+ *
+ * correlate the x-heights and ascender heights of a block to fill-in
+ * the ascender height and descender height for rows without one.
+ **********************************************************************/
+
+int Textord::correlate_with_stats(TO_ROW **rows, // rows of block.
+ int rowcount, // no of rows to do.
+ TO_BLOCK* block) {
+ TO_ROW *row; /*current row */
+ int rowindex; /*no of row */
+ float lineheight; /*mean x-height */
+ float ascheight; /*average ascenders */
+ float minascheight; /*min allowed ascheight */
+ int xcount; /*no of samples for xheight */
+ float fullheight; /*mean top height */
+ int fullcount; /*no of samples */
+ float descheight; /*mean descender drop */
+ float mindescheight; /*min allowed descheight */
+ int desccount; /*no of samples */
+
+ /*no samples */
+ xcount = fullcount = desccount = 0;
+ lineheight = ascheight = fullheight = descheight = 0.0;
+ for (rowindex = 0; rowindex < rowcount; rowindex++) {
+ row = rows[rowindex]; /*current row */
+ if (row->ascrise > 0.0) { /*got ascenders? */
+ lineheight += row->xheight;/*average x-heights */
+ ascheight += row->ascrise; /*average ascenders */
+ xcount++;
+ }
+ else {
+ fullheight += row->xheight;/*assume full height */
+ fullcount++;
+ }
+ if (row->descdrop < 0.0) { /*got descenders? */
+ /*average descenders */
+ descheight += row->descdrop;
+ desccount++;
+ }
+ }
+
+ if (xcount > 0 && (!oldbl_corrfix || xcount >= fullcount)) {
+ lineheight /= xcount; /*average x-height */
+ /*average caps height */
+ fullheight = lineheight + ascheight / xcount;
+ /*must be decent size */
+ if (fullheight < lineheight * (1 + MIN_ASC_FRACTION))
+ fullheight = lineheight * (1 + MIN_ASC_FRACTION);
+ }
+ else {
+ fullheight /= fullcount; /*average max height */
+ /*guess x-height */
+ lineheight = fullheight * X_HEIGHT_FRACTION;
+ }
+ if (desccount > 0 && (!oldbl_corrfix || desccount >= rowcount / 2))
+ descheight /= desccount; /*average descenders */
+ else
+ /*guess descenders */
+ descheight = -lineheight * DESCENDER_FRACTION;
+
+ if (lineheight > 0.0f)
+ block->block->set_cell_over_xheight((fullheight - descheight) / lineheight);
+
+ minascheight = lineheight * MIN_ASC_FRACTION;
+ mindescheight = -lineheight * MIN_DESC_FRACTION;
+ for (rowindex = 0; rowindex < rowcount; rowindex++) {
+ row = rows[rowindex]; /*do each row */
+ row->all_caps = false;
+ if (row->ascrise / row->xheight < MIN_ASC_FRACTION) {
+ /*no ascenders */
+ if (row->xheight >= lineheight * (1 - MAXHEIGHTVARIANCE)
+ && row->xheight <= lineheight * (1 + MAXHEIGHTVARIANCE)) {
+ row->ascrise = fullheight - lineheight;
+ /*set to average */
+ row->xheight = lineheight;
+
+ }
+ else if (row->xheight >= fullheight * (1 - MAXHEIGHTVARIANCE)
+ && row->xheight <= fullheight * (1 + MAXHEIGHTVARIANCE)) {
+ row->ascrise = row->xheight - lineheight;
+ /*set to average */
+ row->xheight = lineheight;
+ row->all_caps = true;
+ }
+ else {
+ row->ascrise = (fullheight - lineheight) * row->xheight
+ / fullheight;
+ /*scale it */
+ row->xheight -= row->ascrise;
+ row->all_caps = true;
+ }
+ if (row->ascrise < minascheight)
+ row->ascrise =
+ row->xheight * ((1.0 - X_HEIGHT_FRACTION) / X_HEIGHT_FRACTION);
+ }
+ if (row->descdrop > mindescheight) {
+ if (row->xheight >= lineheight * (1 - MAXHEIGHTVARIANCE)
+ && row->xheight <= lineheight * (1 + MAXHEIGHTVARIANCE))
+ /*set to average */
+ row->descdrop = descheight;
+ else
+ row->descdrop = -row->xheight * DESCENDER_FRACTION;
+ }
+ }
+ return static_cast<int>(lineheight); //block xheight
+}
+
+
+/**********************************************************************
+ * find_textlines
+ *
+ * Compute the baseline for the given row.
+ **********************************************************************/
+
+void Textord::find_textlines(TO_BLOCK *block, // block row is in
+ TO_ROW *row, // row to do
+ int degree, // required approximation
+ QSPLINE *spline) { // starting spline
+ int partcount; /*no of partitions of */
+ bool holed_line = false; //lost too many blobs
+ int bestpart; /*biggest partition */
+ int partsizes[MAXPARTS]; /*no in each partition */
+ int lineheight; /*guessed x-height */
+ float jumplimit; /*allowed delta change */
+ int blobcount; /*no of blobs on line */
+ int pointcount; /*no of coords */
+ int xstarts[SPLINESIZE + 1]; //segment boundaries
+ int segments; //no of segments
+
+ //no of blobs in row
+ blobcount = row->blob_list ()->length ();
+ // partition no of each blob
+ std::vector<char> partids(blobcount);
+ // useful sample points
+ std::vector<int> xcoords(blobcount);
+ // useful sample points
+ std::vector<int> ycoords(blobcount);
+ // edges of blob rectangles
+ std::vector<TBOX> blobcoords(blobcount);
+ // diffs from 1st approx
+ std::vector<float> ydiffs(blobcount);
+
+ lineheight = get_blob_coords(row, static_cast<int>(block->line_size), &blobcoords[0],
+ holed_line, blobcount);
+ /*limit for line change */
+ jumplimit = lineheight * textord_oldbl_jumplimit;
+ if (jumplimit < MINASCRISE)
+ jumplimit = MINASCRISE;
+
+ if (textord_oldbl_debug) {
+ tprintf
+ ("\nInput height=%g, Estimate x-height=%d pixels, jumplimit=%.2f\n",
+ block->line_size, lineheight, jumplimit);
+ }
+ if (holed_line)
+ make_holed_baseline(&blobcoords[0], blobcount, spline, &row->baseline,
+ row->line_m ());
+ else
+ make_first_baseline(&blobcoords[0], blobcount,
+ &xcoords[0], &ycoords[0], spline, &row->baseline, jumplimit);
+#ifndef GRAPHICS_DISABLED
+ if (textord_show_final_rows)
+ row->baseline.plot (to_win, ScrollView::GOLDENROD);
+#endif
+ if (blobcount > 1) {
+ bestpart = partition_line(&blobcoords[0], blobcount,
+ &partcount, &partids[0], partsizes,
+ &row->baseline, jumplimit, &ydiffs[0]);
+ pointcount = partition_coords(&blobcoords[0], blobcount,
+ &partids[0], bestpart, &xcoords[0], &ycoords[0]);
+ segments = segment_spline(&blobcoords[0], blobcount,
+ &xcoords[0], &ycoords[0], degree, pointcount, xstarts);
+ if (!holed_line) {
+ do {
+ row->baseline = QSPLINE(xstarts, segments,
+ &xcoords[0], &ycoords[0], pointcount, degree);
+ }
+ while (textord_oldbl_split_splines
+ && split_stepped_spline (&row->baseline, jumplimit / 2,
+ &xcoords[0], xstarts, segments));
+ }
+ find_lesser_parts(row, &blobcoords[0], blobcount,
+ &partids[0], partsizes, partcount, bestpart);
+
+ }
+ else {
+ row->xheight = -1.0f; /*failed */
+ row->descdrop = 0.0f;
+ row->ascrise = 0.0f;
+ }
+ row->baseline.extrapolate (row->line_m (),
+ block->block->pdblk.bounding_box ().left (),
+ block->block->pdblk.bounding_box ().right ());
+
+ if (textord_really_old_xheight) {
+ old_first_xheight (row, &blobcoords[0], lineheight,
+ blobcount, &row->baseline, jumplimit);
+ } else if (textord_old_xheight) {
+ make_first_xheight (row, &blobcoords[0], lineheight, static_cast<int>(block->line_size),
+ blobcount, &row->baseline, jumplimit);
+ } else {
+ compute_row_xheight(row, block->block->classify_rotation(),
+ row->line_m(), block->line_size);
+ }
+}
+
+/**********************************************************************
+ * get_blob_coords
+ *
+ * Fill the blobcoords array with the coordinates of the blobs
+ * in the row. The return value is the first guess at the line height.
+ **********************************************************************/
+
+int get_blob_coords( //get boxes
+ TO_ROW* row, //row to use
+ int32_t lineheight, //block level
+ TBOX* blobcoords, //output boxes
+ bool& holed_line, //lost a lot of blobs
+ int& outcount //no of real blobs
+) {
+ //blobs
+ BLOBNBOX_IT blob_it = row->blob_list ();
+ int blobindex; /*no along text line */
+ int losscount; //lost blobs
+ int maxlosscount; //greatest lost blobs
+ /*height stat collection */
+ STATS heightstat (0, MAXHEIGHT);
+
+ if (blob_it.empty ())
+ return 0; //none
+ maxlosscount = 0;
+ losscount = 0;
+ blob_it.mark_cycle_pt ();
+ blobindex = 0;
+ do {
+ blobcoords[blobindex] = box_next_pre_chopped (&blob_it);
+ if (blobcoords[blobindex].height () > lineheight * 0.25)
+ heightstat.add (blobcoords[blobindex].height (), 1);
+ if (blobindex == 0
+ || blobcoords[blobindex].height () > lineheight * 0.25
+ || blob_it.cycled_list ()) {
+ blobindex++; /*no of merged blobs */
+ losscount = 0;
+ }
+ else {
+ if (blobcoords[blobindex].height ()
+ < blobcoords[blobindex].width () * oldbl_dot_error_size
+ && blobcoords[blobindex].width ()
+ < blobcoords[blobindex].height () * oldbl_dot_error_size) {
+ //counts as dot
+ blobindex++;
+ losscount = 0;
+ }
+ else {
+ losscount++; //lost it
+ if (losscount > maxlosscount)
+ //remember max
+ maxlosscount = losscount;
+ }
+ }
+ }
+ while (!blob_it.cycled_list ());
+
+ holed_line = maxlosscount > oldbl_holed_losscount;
+ outcount = blobindex; /*total blobs */
+
+ if (heightstat.get_total () > 1)
+ /*guess x-height */
+ return static_cast<int>(heightstat.ile (0.25));
+ else
+ return blobcoords[0].height ();
+}
+
+
+/**********************************************************************
+ * make_first_baseline
+ *
+ * Make the first estimate at a baseline, either by shifting
+ * a supplied previous spline, or by doing a piecewise linear
+ * approximation using all the blobs.
+ **********************************************************************/
+
+void
+make_first_baseline ( //initial approximation
+TBOX blobcoords[], /*blob bounding boxes */
+int blobcount, /*no of blobcoords */
+int xcoords[], /*coords for spline */
+int ycoords[], /*approximator */
+QSPLINE * spline, /*initial spline */
+QSPLINE * baseline, /*output spline */
+float jumplimit /*guess half descenders */
+) {
+ int leftedge; /*left edge of line */
+ int rightedge; /*right edge of line */
+ int blobindex; /*current blob */
+ int segment; /*current segment */
+ float prevy, thisy, nexty; /*3 y coords */
+ float y1, y2, y3; /*3 smooth blobs */
+ float maxmax, minmin; /*absolute limits */
+ int x2 = 0; /*right edge of old y3 */
+ int ycount; /*no of ycoords in use */
+ float yturns[SPLINESIZE]; /*y coords of turn pts */
+ int xturns[SPLINESIZE]; /*xcoords of turn pts */
+ int xstarts[SPLINESIZE + 1];
+ int segments; //no of segments
+ ICOORD shift; //shift of spline
+
+ prevy = 0;
+ /*left edge of row */
+ leftedge = blobcoords[0].left ();
+ /*right edge of line */
+ rightedge = blobcoords[blobcount - 1].right ();
+ if (spline == nullptr /*no given spline */
+ || spline->segments < 3 /*or trivial */
+ /*or too non-overlap */
+ || spline->xcoords[1] > leftedge + MAXOVERLAP * (rightedge - leftedge)
+ || spline->xcoords[spline->segments - 1] < rightedge
+ - MAXOVERLAP * (rightedge - leftedge)) {
+ if (textord_oldbl_paradef)
+ return; //use default
+ xstarts[0] = blobcoords[0].left () - 1;
+ for (blobindex = 0; blobindex < blobcount; blobindex++) {
+ xcoords[blobindex] = (blobcoords[blobindex].left ()
+ + blobcoords[blobindex].right ()) / 2;
+ ycoords[blobindex] = blobcoords[blobindex].bottom ();
+ }
+ xstarts[1] = blobcoords[blobcount - 1].right () + 1;
+ segments = 1; /*no of segments */
+
+ /*linear */
+ *baseline = QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1);
+
+ if (blobcount >= 3) {
+ y1 = y2 = y3 = 0.0f;
+ ycount = 0;
+ segment = 0; /*no of segments */
+ maxmax = minmin = 0.0f;
+ thisy = ycoords[0] - baseline->y (xcoords[0]);
+ nexty = ycoords[1] - baseline->y (xcoords[1]);
+ for (blobindex = 2; blobindex < blobcount; blobindex++) {
+ prevy = thisy; /*shift ycoords */
+ thisy = nexty;
+ nexty = ycoords[blobindex] - baseline->y (xcoords[blobindex]);
+ /*middle of smooth y */
+ if (ABS (thisy - prevy) < jumplimit && ABS (thisy - nexty) < jumplimit) {
+ y1 = y2; /*shift window */
+ y2 = y3;
+ y3 = thisy; /*middle point */
+ ycount++;
+ /*local max */
+ if (ycount >= 3 && ((y1 < y2 && y2 >= y3)
+ /*local min */
+ || (y1 > y2 && y2 <= y3))) {
+ if (segment < SPLINESIZE - 2) {
+ /*turning pt */
+ xturns[segment] = x2;
+ yturns[segment] = y2;
+ segment++; /*no of spline segs */
+ }
+ }
+ if (ycount == 1) {
+ maxmax = minmin = y3;/*initialise limits */
+ }
+ else {
+ if (y3 > maxmax)
+ maxmax = y3; /*biggest max */
+ if (y3 < minmin)
+ minmin = y3; /*smallest min */
+ }
+ /*possible turning pt */
+ x2 = blobcoords[blobindex - 1].right ();
+ }
+ }
+
+ jumplimit *= 1.2f;
+ /*must be wavy */
+ if (maxmax - minmin > jumplimit) {
+ ycount = segment; /*no of segments */
+ for (blobindex = 0, segment = 1; blobindex < ycount;
+ blobindex++) {
+ if (yturns[blobindex] > minmin + jumplimit
+ || yturns[blobindex] < maxmax - jumplimit) {
+ /*significant peak */
+ if (segment == 1
+ || yturns[blobindex] > prevy + jumplimit
+ || yturns[blobindex] < prevy - jumplimit) {
+ /*different to previous */
+ xstarts[segment] = xturns[blobindex];
+ segment++;
+ prevy = yturns[blobindex];
+ }
+ /*bigger max */
+ else if ((prevy > minmin + jumplimit && yturns[blobindex] > prevy)
+ /*smaller min */
+ || (prevy < maxmax - jumplimit && yturns[blobindex] < prevy)) {
+ xstarts[segment - 1] = xturns[blobindex];
+ /*improved previous */
+ prevy = yturns[blobindex];
+ }
+ }
+ }
+ xstarts[segment] = blobcoords[blobcount - 1].right () + 1;
+ segments = segment; /*no of segments */
+ /*linear */
+ *baseline = QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1);
+ }
+ }
+ }
+ else {
+ *baseline = *spline; /*copy it */
+ shift = ICOORD (0, static_cast<int16_t>(blobcoords[0].bottom ()
+ - spline->y (blobcoords[0].right ())));
+ baseline->move (shift);
+ }
+}
+
+
+/**********************************************************************
+ * make_holed_baseline
+ *
+ * Make the first estimate at a baseline, either by shifting
+ * a supplied previous spline, or by doing a piecewise linear
+ * approximation using all the blobs.
+ **********************************************************************/
+
+void
+make_holed_baseline ( //initial approximation
+TBOX blobcoords[], /*blob bounding boxes */
+int blobcount, /*no of blobcoords */
+QSPLINE * spline, /*initial spline */
+QSPLINE * baseline, /*output spline */
+float gradient //of line
+) {
+ int leftedge; /*left edge of line */
+ int rightedge; /*right edge of line */
+ int blobindex; /*current blob */
+ float x; //centre of row
+ ICOORD shift; //shift of spline
+
+ tesseract::DetLineFit lms; // straight baseline
+ int32_t xstarts[2]; //straight line
+ double coeffs[3];
+ float c; //line parameter
+
+ /*left edge of row */
+ leftedge = blobcoords[0].left ();
+ /*right edge of line */
+ rightedge = blobcoords[blobcount - 1].right();
+ for (blobindex = 0; blobindex < blobcount; blobindex++) {
+ lms.Add(ICOORD((blobcoords[blobindex].left() +
+ blobcoords[blobindex].right()) / 2,
+ blobcoords[blobindex].bottom()));
+ }
+ lms.ConstrainedFit(gradient, &c);
+ xstarts[0] = leftedge;
+ xstarts[1] = rightedge;
+ coeffs[0] = 0;
+ coeffs[1] = gradient;
+ coeffs[2] = c;
+ *baseline = QSPLINE (1, xstarts, coeffs);
+ if (spline != nullptr /*no given spline */
+ && spline->segments >= 3 /*or trivial */
+ /*or too non-overlap */
+ && spline->xcoords[1] <= leftedge + MAXOVERLAP * (rightedge - leftedge)
+ && spline->xcoords[spline->segments - 1] >= rightedge
+ - MAXOVERLAP * (rightedge - leftedge)) {
+ *baseline = *spline; /*copy it */
+ x = (leftedge + rightedge) / 2.0;
+ shift = ICOORD (0, static_cast<int16_t>(gradient * x + c - spline->y (x)));
+ baseline->move (shift);
+ }
+}
+
+
+/**********************************************************************
+ * partition_line
+ *
+ * Partition a row of blobs into different groups of continuous
+ * y position. jumplimit specifies the max allowable limit on a jump
+ * before a new partition is started.
+ * The return value is the biggest partition
+ **********************************************************************/
+
+int
+partition_line ( //partition blobs
+TBOX blobcoords[], //bounding boxes
+int blobcount, /*no of blobs on row */
+int *numparts, /*number of partitions */
+char partids[], /*partition no of each blob */
+int partsizes[], /*no in each partition */
+QSPLINE * spline, /*curve to fit to */
+float jumplimit, /*allowed delta change */
+float ydiffs[] /*diff from spline */
+) {
+ int blobindex; /*no along text line */
+ int bestpart; /*best new partition */
+ int biggestpart; /*part with most members */
+ float diff; /*difference from line */
+ int startx; /*index of start blob */
+ float partdiffs[MAXPARTS]; /*step between parts */
+
+ for (bestpart = 0; bestpart < MAXPARTS; bestpart++)
+ partsizes[bestpart] = 0; /*zero them all */
+
+ startx = get_ydiffs (blobcoords, blobcount, spline, ydiffs);
+ *numparts = 1; /*1 partition */
+ bestpart = -1; /*first point */
+ float drift = 0.0f;
+ float last_delta = 0.0f;
+ for (blobindex = startx; blobindex < blobcount; blobindex++) {
+ /*do each blob in row */
+ diff = ydiffs[blobindex]; /*diff from line */
+ if (textord_oldbl_debug) {
+ tprintf ("%d(%d,%d), ", blobindex,
+ blobcoords[blobindex].left (),
+ blobcoords[blobindex].bottom ());
+ }
+ bestpart = choose_partition(diff, partdiffs, bestpart, jumplimit,
+ &drift, &last_delta, numparts);
+ /*record partition */
+ partids[blobindex] = bestpart;
+ partsizes[bestpart]++; /*another in it */
+ }
+
+ bestpart = -1; /*first point */
+ drift = 0.0f;
+ last_delta = 0.0f;
+ partsizes[0]--; /*doing 1st pt again */
+ /*do each blob in row */
+ for (blobindex = startx; blobindex >= 0; blobindex--) {
+ diff = ydiffs[blobindex]; /*diff from line */
+ if (textord_oldbl_debug) {
+ tprintf ("%d(%d,%d), ", blobindex,
+ blobcoords[blobindex].left (),
+ blobcoords[blobindex].bottom ());
+ }
+ bestpart = choose_partition(diff, partdiffs, bestpart, jumplimit,
+ &drift, &last_delta, numparts);
+ /*record partition */
+ partids[blobindex] = bestpart;
+ partsizes[bestpart]++; /*another in it */
+ }
+
+ for (biggestpart = 0, bestpart = 1; bestpart < *numparts; bestpart++)
+ if (partsizes[bestpart] >= partsizes[biggestpart])
+ biggestpart = bestpart; /*new biggest */
+ if (textord_oldbl_merge_parts)
+ merge_oldbl_parts(blobcoords,
+ blobcount,
+ partids,
+ partsizes,
+ biggestpart,
+ jumplimit);
+ return biggestpart; /*biggest partition */
+}
+
+
+/**********************************************************************
+ * merge_oldbl_parts
+ *
+ * For any adjacent group of blobs in a different part, put them in the
+ * main part if they fit closely to neighbours in the main part.
+ **********************************************************************/
+
+void
+merge_oldbl_parts ( //partition blobs
+TBOX blobcoords[], //bounding boxes
+int blobcount, /*no of blobs on row */
+char partids[], /*partition no of each blob */
+int partsizes[], /*no in each partition */
+int biggestpart, //major partition
+float jumplimit /*allowed delta change */
+) {
+ bool found_one; //found a bestpart blob
+ bool close_one; //found was close enough
+ int blobindex; /*no along text line */
+ int prevpart; //previous iteration
+ int runlength; //no in this part
+ float diff; /*difference from line */
+ int startx; /*index of start blob */
+ int test_blob; //another index
+ FCOORD coord; //blob coordinate
+ float m, c; //fitted line
+ QLSQ stats; //line stuff
+
+ prevpart = biggestpart;
+ runlength = 0;
+ startx = 0;
+ for (blobindex = 0; blobindex < blobcount; blobindex++) {
+ if (partids[blobindex] != prevpart) {
+ // tprintf("Partition change at (%d,%d) from %d to %d after run of %d\n",
+ // blobcoords[blobindex].left(),blobcoords[blobindex].bottom(),
+ // prevpart,partids[blobindex],runlength);
+ if (prevpart != biggestpart && runlength > MAXBADRUN) {
+ stats.clear ();
+ for (test_blob = startx; test_blob < blobindex; test_blob++) {
+ coord = FCOORD ((blobcoords[test_blob].left ()
+ + blobcoords[test_blob].right ()) / 2.0,
+ blobcoords[test_blob].bottom ());
+ stats.add (coord.x (), coord.y ());
+ }
+ stats.fit (1);
+ m = stats.get_b ();
+ c = stats.get_c ();
+ if (textord_oldbl_debug)
+ tprintf ("Fitted line y=%g x + %g\n", m, c);
+ found_one = false;
+ close_one = false;
+ for (test_blob = 1; !found_one
+ && (startx - test_blob >= 0
+ || blobindex + test_blob <= blobcount); test_blob++) {
+ if (startx - test_blob >= 0
+ && partids[startx - test_blob] == biggestpart) {
+ found_one = true;
+ coord = FCOORD ((blobcoords[startx - test_blob].left ()
+ + blobcoords[startx -
+ test_blob].right ()) /
+ 2.0,
+ blobcoords[startx -
+ test_blob].bottom ());
+ diff = m * coord.x () + c - coord.y ();
+ if (textord_oldbl_debug)
+ tprintf
+ ("Diff of common blob to suspect part=%g at (%g,%g)\n",
+ diff, coord.x (), coord.y ());
+ if (diff < jumplimit && -diff < jumplimit)
+ close_one = true;
+ }
+ if (blobindex + test_blob <= blobcount
+ && partids[blobindex + test_blob - 1] == biggestpart) {
+ found_one = true;
+ coord =
+ FCOORD ((blobcoords[blobindex + test_blob - 1].
+ left () + blobcoords[blobindex + test_blob -
+ 1].right ()) / 2.0,
+ blobcoords[blobindex + test_blob -
+ 1].bottom ());
+ diff = m * coord.x () + c - coord.y ();
+ if (textord_oldbl_debug)
+ tprintf
+ ("Diff of common blob to suspect part=%g at (%g,%g)\n",
+ diff, coord.x (), coord.y ());
+ if (diff < jumplimit && -diff < jumplimit)
+ close_one = true;
+ }
+ }
+ if (close_one) {
+ if (textord_oldbl_debug)
+ tprintf
+ ("Merged %d blobs back into part %d from %d starting at (%d,%d)\n",
+ runlength, biggestpart, prevpart,
+ blobcoords[startx].left (),
+ blobcoords[startx].bottom ());
+ //switch sides
+ partsizes[prevpart] -= runlength;
+ for (test_blob = startx; test_blob < blobindex; test_blob++)
+ partids[test_blob] = biggestpart;
+ }
+ }
+ prevpart = partids[blobindex];
+ runlength = 1;
+ startx = blobindex;
+ }
+ else
+ runlength++;
+ }
+}
+
+
+/**********************************************************************
+ * get_ydiffs
+ *
+ * Get the differences between the blobs and the spline,
+ * putting them in ydiffs. The return value is the index
+ * of the blob in the middle of the "best behaved" region
+ **********************************************************************/
+
+int
+get_ydiffs ( //evaluate differences
+TBOX blobcoords[], //bounding boxes
+int blobcount, /*no of blobs */
+QSPLINE * spline, /*approximating spline */
+float ydiffs[] /*output */
+) {
+ int blobindex; /*current blob */
+ int xcentre; /*xcoord */
+ int lastx; /*last xcentre */
+ float diffsum; /*sum of diffs */
+ float diff; /*current difference */
+ float drift; /*sum of spline steps */
+ float bestsum; /*smallest diffsum */
+ int bestindex; /*index of bestsum */
+
+ diffsum = 0.0f;
+ bestindex = 0;
+ bestsum = static_cast<float>(INT32_MAX);
+ drift = 0.0f;
+ lastx = blobcoords[0].left ();
+ /*do each blob in row */
+ for (blobindex = 0; blobindex < blobcount; blobindex++) {
+ /*centre of blob */
+ xcentre = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) >> 1;
+ //step functions in spline
+ drift += spline->step (lastx, xcentre);
+ lastx = xcentre;
+ diff = blobcoords[blobindex].bottom ();
+ diff -= spline->y (xcentre);
+ diff += drift;
+ ydiffs[blobindex] = diff; /*store difference */
+ if (blobindex > 2)
+ /*remove old one */
+ diffsum -= ABS (ydiffs[blobindex - 3]);
+ diffsum += ABS (diff); /*add new one */
+ if (blobindex >= 2 && diffsum < bestsum) {
+ bestsum = diffsum; /*find min sum */
+ bestindex = blobindex - 1; /*middle of set */
+ }
+ }
+ return bestindex;
+}
+
+
+/**********************************************************************
+ * choose_partition
+ *
+ * Choose a partition for the point and return the index.
+ **********************************************************************/
+
+int
+choose_partition ( //select partition
+float diff, /*diff from spline */
+float partdiffs[], /*diff on all parts */
+int lastpart, /*last assigned partition */
+float jumplimit, /*new part threshold */
+float* drift,
+float* lastdelta,
+int *partcount /*no of partitions */
+) {
+ int partition; /*partition no */
+ int bestpart; /*best new partition */
+ float bestdelta; /*best gap from a part */
+ float delta; /*diff from part */
+
+ if (lastpart < 0) {
+ partdiffs[0] = diff;
+ lastpart = 0; /*first point */
+ *drift = 0.0f;
+ *lastdelta = 0.0f;
+ }
+ /*adjusted diff from part */
+ delta = diff - partdiffs[lastpart] - *drift;
+ if (textord_oldbl_debug) {
+ tprintf ("Diff=%.2f, Delta=%.3f, Drift=%.3f, ", diff, delta, *drift);
+ }
+ if (ABS (delta) > jumplimit / 2) {
+ /*delta on part 0 */
+ bestdelta = diff - partdiffs[0] - *drift;
+ bestpart = 0; /*0 best so far */
+ for (partition = 1; partition < *partcount; partition++) {
+ delta = diff - partdiffs[partition] - *drift;
+ if (ABS (delta) < ABS (bestdelta)) {
+ bestdelta = delta;
+ bestpart = partition; /*part with nearest jump */
+ }
+ }
+ delta = bestdelta;
+ /*too far away */
+ if (ABS (bestdelta) > jumplimit
+ && *partcount < MAXPARTS) { /*and spare part left */
+ bestpart = (*partcount)++; /*best was new one */
+ /*start new one */
+ partdiffs[bestpart] = diff - *drift;
+ delta = 0.0f;
+ }
+ }
+ else {
+ bestpart = lastpart; /*best was last one */
+ }
+
+ if (bestpart == lastpart
+ && (ABS (delta - *lastdelta) < jumplimit / 2
+ || ABS (delta) < jumplimit / 2))
+ /*smooth the drift */
+ *drift = (3 * *drift + delta) / 3;
+ *lastdelta = delta;
+
+ if (textord_oldbl_debug) {
+ tprintf ("P=%d\n", bestpart);
+ }
+
+ return bestpart;
+}
+
+/**********************************************************************
+ * partition_coords
+ *
+ * Get the x,y coordinates of all points in the bestpart and put them
+ * in xcoords,ycoords. Return the number of points found.
+ **********************************************************************/
+
+int
+partition_coords ( //find relevant coords
+TBOX blobcoords[], //bounding boxes
+int blobcount, /*no of blobs in row */
+char partids[], /*partition no of each blob */
+int bestpart, /*best new partition */
+int xcoords[], /*points to work on */
+int ycoords[] /*points to work on */
+) {
+ int blobindex; /*no along text line */
+ int pointcount; /*no of points */
+
+ pointcount = 0;
+ for (blobindex = 0; blobindex < blobcount; blobindex++) {
+ if (partids[blobindex] == bestpart) {
+ /*centre of blob */
+ xcoords[pointcount] = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) >> 1;
+ ycoords[pointcount++] = blobcoords[blobindex].bottom ();
+ }
+ }
+ return pointcount; /*no of points found */
+}
+
+
+/**********************************************************************
+ * segment_spline
+ *
+ * Segment the row at midpoints between maxima and minima of the x,y pairs.
+ * The xstarts of the segments are returned and the number found.
+ **********************************************************************/
+
+int
+segment_spline ( //make xstarts
+TBOX blobcoords[], //boundign boxes
+int blobcount, /*no of blobs in row */
+int xcoords[], /*points to work on */
+int ycoords[], /*points to work on */
+int degree, int pointcount, /*no of points */
+int xstarts[] //result
+) {
+ int ptindex; /*no along text line */
+ int segment; /*partition no */
+ int lastmin, lastmax; /*possible turn points */
+ int turnpoints[SPLINESIZE]; /*good turning points */
+ int turncount; /*no of turning points */
+ int max_x; //max specified coord
+
+ xstarts[0] = xcoords[0] - 1; //leftmost defined pt
+ max_x = xcoords[pointcount - 1] + 1;
+ if (degree < 2)
+ pointcount = 0;
+ turncount = 0; /*no turning points yet */
+ if (pointcount > 3) {
+ ptindex = 1;
+ lastmax = lastmin = 0; /*start with first one */
+ while (ptindex < pointcount - 1 && turncount < SPLINESIZE - 1) {
+ /*minimum */
+ if (ycoords[ptindex - 1] > ycoords[ptindex] && ycoords[ptindex] <= ycoords[ptindex + 1]) {
+ if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT) {
+ if (turncount == 0 || turnpoints[turncount - 1] != lastmax)
+ /*new max point */
+ turnpoints[turncount++] = lastmax;
+ lastmin = ptindex; /*latest minimum */
+ }
+ else if (ycoords[ptindex] < ycoords[lastmin]) {
+ lastmin = ptindex; /*lower minimum */
+ }
+ }
+
+ /*maximum */
+ if (ycoords[ptindex - 1] < ycoords[ptindex] && ycoords[ptindex] >= ycoords[ptindex + 1]) {
+ if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT) {
+ if (turncount == 0 || turnpoints[turncount - 1] != lastmin)
+ /*new min point */
+ turnpoints[turncount++] = lastmin;
+ lastmax = ptindex; /*latest maximum */
+ }
+ else if (ycoords[ptindex] > ycoords[lastmax]) {
+ lastmax = ptindex; /*higher maximum */
+ }
+ }
+ ptindex++;
+ }
+ /*possible global min */
+ if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT
+ && (turncount == 0 || turnpoints[turncount - 1] != lastmax)) {
+ if (turncount < SPLINESIZE - 1)
+ /*2 more turns */
+ turnpoints[turncount++] = lastmax;
+ if (turncount < SPLINESIZE - 1)
+ turnpoints[turncount++] = ptindex;
+ }
+ else if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT
+ /*possible global max */
+ && (turncount == 0 || turnpoints[turncount - 1] != lastmin)) {
+ if (turncount < SPLINESIZE - 1)
+ /*2 more turns */
+ turnpoints[turncount++] = lastmin;
+ if (turncount < SPLINESIZE - 1)
+ turnpoints[turncount++] = ptindex;
+ }
+ else if (turncount > 0 && turnpoints[turncount - 1] == lastmin
+ && turncount < SPLINESIZE - 1) {
+ if (ycoords[ptindex] > ycoords[lastmax])
+ turnpoints[turncount++] = ptindex;
+ else
+ turnpoints[turncount++] = lastmax;
+ }
+ else if (turncount > 0 && turnpoints[turncount - 1] == lastmax
+ && turncount < SPLINESIZE - 1) {
+ if (ycoords[ptindex] < ycoords[lastmin])
+ turnpoints[turncount++] = ptindex;
+ else
+ turnpoints[turncount++] = lastmin;
+ }
+ }
+
+ if (textord_oldbl_debug && turncount > 0)
+ tprintf ("First turn is %d at (%d,%d)\n",
+ turnpoints[0], xcoords[turnpoints[0]], ycoords[turnpoints[0]]);
+ for (segment = 1; segment < turncount; segment++) {
+ /*centre y coord */
+ lastmax = (ycoords[turnpoints[segment - 1]] + ycoords[turnpoints[segment]]) / 2;
+
+ /* fix alg so that it works with both rising and falling sections */
+ if (ycoords[turnpoints[segment - 1]] < ycoords[turnpoints[segment]])
+ /*find rising y centre */
+ for (ptindex = turnpoints[segment - 1] + 1; ptindex < turnpoints[segment] && ycoords[ptindex + 1] <= lastmax; ptindex++);
+ else
+ /*find falling y centre */
+ for (ptindex = turnpoints[segment - 1] + 1; ptindex < turnpoints[segment] && ycoords[ptindex + 1] >= lastmax; ptindex++);
+
+ /*centre x */
+ xstarts[segment] = (xcoords[ptindex - 1] + xcoords[ptindex]
+ + xcoords[turnpoints[segment - 1]]
+ + xcoords[turnpoints[segment]] + 2) / 4;
+ /*halfway between turns */
+ if (textord_oldbl_debug)
+ tprintf ("Turn %d is %d at (%d,%d), mid pt is %d@%d, final @%d\n",
+ segment, turnpoints[segment],
+ xcoords[turnpoints[segment]], ycoords[turnpoints[segment]],
+ ptindex - 1, xcoords[ptindex - 1], xstarts[segment]);
+ }
+
+ xstarts[segment] = max_x;
+ return segment; /*no of splines */
+}
+
+
+/**********************************************************************
+ * split_stepped_spline
+ *
+ * Re-segment the spline in cases where there is a big step function.
+ * Return true if any were done.
+ **********************************************************************/
+
+bool
+split_stepped_spline( //make xstarts
+ QSPLINE* baseline, //current shot
+ float jumplimit, //max step function
+ int* xcoords, /*points to work on */
+ int* xstarts, //result
+ int& segments //no of segments
+) {
+ bool doneany; //return value
+ int segment; /*partition no */
+ int startindex, centreindex, endindex;
+ float leftcoord, rightcoord;
+ int leftindex, rightindex;
+ float step; //spline step
+
+ doneany = false;
+ startindex = 0;
+ for (segment = 1; segment < segments - 1; segment++) {
+ step = baseline->step ((xstarts[segment - 1] + xstarts[segment]) / 2.0,
+ (xstarts[segment] + xstarts[segment + 1]) / 2.0);
+ if (step < 0)
+ step = -step;
+ if (step > jumplimit) {
+ while (xcoords[startindex] < xstarts[segment - 1])
+ startindex++;
+ centreindex = startindex;
+ while (xcoords[centreindex] < xstarts[segment])
+ centreindex++;
+ endindex = centreindex;
+ while (xcoords[endindex] < xstarts[segment + 1])
+ endindex++;
+ if (segments >= SPLINESIZE) {
+ if (textord_debug_baselines)
+ tprintf ("Too many segments to resegment spline!!\n");
+ }
+ else if (endindex - startindex >= textord_spline_medianwin * 3) {
+ while (centreindex - startindex <
+ textord_spline_medianwin * 3 / 2)
+ centreindex++;
+ while (endindex - centreindex <
+ textord_spline_medianwin * 3 / 2)
+ centreindex--;
+ leftindex = (startindex + startindex + centreindex) / 3;
+ rightindex = (centreindex + endindex + endindex) / 3;
+ leftcoord =
+ (xcoords[startindex] * 2 + xcoords[centreindex]) / 3.0;
+ rightcoord =
+ (xcoords[centreindex] + xcoords[endindex] * 2) / 3.0;
+ while (xcoords[leftindex] > leftcoord
+ && leftindex - startindex > textord_spline_medianwin)
+ leftindex--;
+ while (xcoords[leftindex] < leftcoord
+ && centreindex - leftindex >
+ textord_spline_medianwin / 2)
+ leftindex++;
+ if (xcoords[leftindex] - leftcoord >
+ leftcoord - xcoords[leftindex - 1])
+ leftindex--;
+ while (xcoords[rightindex] > rightcoord
+ && rightindex - centreindex >
+ textord_spline_medianwin / 2)
+ rightindex--;
+ while (xcoords[rightindex] < rightcoord
+ && endindex - rightindex > textord_spline_medianwin)
+ rightindex++;
+ if (xcoords[rightindex] - rightcoord >
+ rightcoord - xcoords[rightindex - 1])
+ rightindex--;
+ if (textord_debug_baselines)
+ tprintf ("Splitting spline at %d with step %g at (%d,%d)\n",
+ xstarts[segment],
+ baseline->
+ step ((xstarts[segment - 1] +
+ xstarts[segment]) / 2.0,
+ (xstarts[segment] +
+ xstarts[segment + 1]) / 2.0),
+ (xcoords[leftindex - 1] + xcoords[leftindex]) / 2,
+ (xcoords[rightindex - 1] + xcoords[rightindex]) / 2);
+ insert_spline_point (xstarts, segment,
+ (xcoords[leftindex - 1] +
+ xcoords[leftindex]) / 2,
+ (xcoords[rightindex - 1] +
+ xcoords[rightindex]) / 2, segments);
+ doneany = true;
+ }
+ else if (textord_debug_baselines) {
+ tprintf
+ ("Resegmenting spline failed - insufficient pts (%d,%d,%d,%d)\n",
+ startindex, centreindex, endindex,
+ (int32_t) textord_spline_medianwin);
+ }
+ }
+ // else tprintf("Spline step at %d is %g\n",
+ // xstarts[segment],
+ // baseline->step((xstarts[segment-1]+xstarts[segment])/2.0,
+ // (xstarts[segment]+xstarts[segment+1])/2.0));
+ }
+ return doneany;
+}
+
+
+/**********************************************************************
+ * insert_spline_point
+ *
+ * Insert a new spline point and shuffle up the others.
+ **********************************************************************/
+
+void
+insert_spline_point ( //get descenders
+int xstarts[], //starts to shuffle
+int segment, //insertion pt
+int coord1, //coords to add
+int coord2, int &segments //total segments
+) {
+ int index; //for shuffling
+
+ for (index = segments; index > segment; index--)
+ xstarts[index + 1] = xstarts[index];
+ segments++;
+ xstarts[segment] = coord1;
+ xstarts[segment + 1] = coord2;
+}
+
+
+/**********************************************************************
+ * find_lesser_parts
+ *
+ * Average the step from the spline for the other partitions
+ * and find the commonest partition which has a descender.
+ **********************************************************************/
+
+void
+find_lesser_parts ( //get descenders
+TO_ROW * row, //row to process
+TBOX blobcoords[], //bounding boxes
+int blobcount, /*no of blobs */
+char partids[], /*partition of each blob */
+int partsizes[], /*size of each part */
+int partcount, /*no of partitions */
+int bestpart /*biggest partition */
+) {
+ int blobindex; /*index of blob */
+ int partition; /*current partition */
+ int xcentre; /*centre of blob */
+ int poscount; /*count of best up step */
+ int negcount; /*count of best down step */
+ float partsteps[MAXPARTS]; /*average step to part */
+ float bestneg; /*best down step */
+ int runlength; /*length of bad run */
+ int biggestrun; /*biggest bad run */
+
+ biggestrun = 0;
+ for (partition = 0; partition < partcount; partition++)
+ partsteps[partition] = 0.0; /*zero accumulators */
+ for (runlength = 0, blobindex = 0; blobindex < blobcount; blobindex++) {
+ xcentre = (blobcoords[blobindex].left ()
+ + blobcoords[blobindex].right ()) >> 1;
+ /*in other parts */
+ int part_id =
+ static_cast<int>(static_cast<unsigned char>(partids[blobindex]));
+ if (part_id != bestpart) {
+ runlength++; /*run of non bests */
+ if (runlength > biggestrun)
+ biggestrun = runlength;
+ partsteps[part_id] += blobcoords[blobindex].bottom()
+ - row->baseline.y(xcentre);
+ }
+ else
+ runlength = 0;
+ }
+ if (biggestrun > MAXBADRUN)
+ row->xheight = -1.0f; /*failed */
+ else
+ row->xheight = 1.0f; /*success */
+ poscount = negcount = 0;
+ bestneg = 0.0; /*no step yet */
+ for (partition = 0; partition < partcount; partition++) {
+ if (partition != bestpart) {
+ // by jetsoft divide by zero possible
+ if (partsizes[partition] == 0)
+ partsteps[partition] = 0;
+ else
+ partsteps[partition] /= partsizes[partition];
+ //
+
+ if (partsteps[partition] >= MINASCRISE
+ && partsizes[partition] > poscount) {
+ poscount = partsizes[partition];
+ }
+ if (partsteps[partition] <= -MINASCRISE
+ && partsizes[partition] > negcount) {
+ /*ascender rise */
+ bestneg = partsteps[partition];
+ /*2nd most popular */
+ negcount = partsizes[partition];
+ }
+ }
+ }
+ /*average x-height */
+ partsteps[bestpart] /= blobcount;
+ row->descdrop = bestneg;
+}
+
+
+/**********************************************************************
+ * old_first_xheight
+ *
+ * Makes an x-height spline by copying the baseline and shifting it.
+ * It estimates the x-height across the line to use as the shift.
+ * It also finds the ascender height if it can.
+ **********************************************************************/
+
+void
+old_first_xheight ( //the wiseowl way
+TO_ROW * row, /*current row */
+TBOX blobcoords[], /*blob bounding boxes */
+int initialheight, //initial guess
+int blobcount, /*blobs in blobcoords */
+QSPLINE * baseline, /*established */
+float jumplimit /*min ascender height */
+) {
+ int blobindex; /*current blob */
+ /*height statistics */
+ STATS heightstat (0, MAXHEIGHT);
+ int height; /*height of blob */
+ int xcentre; /*centre of blob */
+ int lineheight; /*approx xheight */
+ float ascenders; /*ascender sum */
+ int asccount; /*no of ascenders */
+ float xsum; /*xheight sum */
+ int xcount; /*xheight count */
+ float diff; /*height difference */
+
+ if (blobcount > 1) {
+ for (blobindex = 0; blobindex < blobcount; blobindex++) {
+ xcentre = (blobcoords[blobindex].left ()
+ + blobcoords[blobindex].right ()) / 2;
+ /*height of blob */
+ height = static_cast<int>(blobcoords[blobindex].top () - baseline->y (xcentre) + 0.5);
+ if (height > initialheight * oldbl_xhfract
+ && height > textord_min_xheight)
+ heightstat.add (height, 1);
+ }
+ if (heightstat.get_total () > 3) {
+ lineheight = static_cast<int>(heightstat.ile (0.25));
+ if (lineheight <= 0)
+ lineheight = static_cast<int>(heightstat.ile (0.5));
+ }
+ else
+ lineheight = initialheight;
+ }
+ else {
+ lineheight = static_cast<int>(blobcoords[0].top ()
+ - baseline->y ((blobcoords[0].left ()
+ + blobcoords[0].right ()) / 2) +
+ 0.5);
+ }
+
+ xsum = 0.0f;
+ xcount = 0;
+ for (ascenders = 0.0f, asccount = 0, blobindex = 0; blobindex < blobcount;
+ blobindex++) {
+ xcentre = (blobcoords[blobindex].left ()
+ + blobcoords[blobindex].right ()) / 2;
+ diff = blobcoords[blobindex].top () - baseline->y (xcentre);
+ /*is it ascender */
+ if (diff > lineheight + jumplimit) {
+ ascenders += diff;
+ asccount++; /*count ascenders */
+ }
+ else if (diff > lineheight - jumplimit) {
+ xsum += diff; /*mean xheight */
+ xcount++;
+ }
+ }
+ if (xcount > 0)
+ xsum /= xcount; /*average xheight */
+ else
+ xsum = static_cast<float>(lineheight); /*guess it */
+ row->xheight *= xsum;
+ if (asccount > 0)
+ row->ascrise = ascenders / asccount - xsum;
+ else
+ row->ascrise = 0.0f; /*had none */
+ if (row->xheight == 0)
+ row->xheight = -1.0f;
+}
+
+
+/**********************************************************************
+ * make_first_xheight
+ *
+ * Makes an x-height spline by copying the baseline and shifting it.
+ * It estimates the x-height across the line to use as the shift.
+ * It also finds the ascender height if it can.
+ **********************************************************************/
+
+void
+make_first_xheight ( //find xheight
+TO_ROW * row, /*current row */
+TBOX blobcoords[], /*blob bounding boxes */
+int lineheight, //initial guess
+int init_lineheight, //block level guess
+int blobcount, /*blobs in blobcoords */
+QSPLINE * baseline, /*established */
+float jumplimit /*min ascender height */
+) {
+ STATS heightstat (0, HEIGHTBUCKETS);
+ int lefts[HEIGHTBUCKETS];
+ int rights[HEIGHTBUCKETS];
+ int modelist[MODENUM];
+ int blobindex;
+ int mode_count; //blobs to count in thr
+ int sign_bit;
+ int mode_threshold;
+ const int kBaselineTouch = 2; // This really should change with resolution.
+ const int kGoodStrength = 8; // Strength of baseline-touching heights.
+ const float kMinHeight = 0.25; // Min fraction of lineheight to use.
+
+ sign_bit = row->xheight > 0 ? 1 : -1;
+
+ memset(lefts, 0, HEIGHTBUCKETS * sizeof(lefts[0]));
+ memset(rights, 0, HEIGHTBUCKETS * sizeof(rights[0]));
+ mode_count = 0;
+ for (blobindex = 0; blobindex < blobcount; blobindex++) {
+ int xcenter = (blobcoords[blobindex].left () +
+ blobcoords[blobindex].right ()) / 2;
+ float base = baseline->y(xcenter);
+ float bottomdiff = fabs(base - blobcoords[blobindex].bottom());
+ int strength = textord_ocropus_mode &&
+ bottomdiff <= kBaselineTouch ? kGoodStrength : 1;
+ int height = static_cast<int>(blobcoords[blobindex].top () - base + 0.5);
+ if (blobcoords[blobindex].height () > init_lineheight * kMinHeight) {
+ if (height > lineheight * oldbl_xhfract
+ && height > textord_min_xheight) {
+ heightstat.add (height, strength);
+ if (height < HEIGHTBUCKETS) {
+ if (xcenter > rights[height])
+ rights[height] = xcenter;
+ if (xcenter > 0 && (lefts[height] == 0 || xcenter < lefts[height]))
+ lefts[height] = xcenter;
+ }
+ }
+ mode_count += strength;
+ }
+ }
+
+ mode_threshold = static_cast<int>(blobcount * 0.1);
+ if (oldbl_dot_error_size > 1 || oldbl_xhfix)
+ mode_threshold = static_cast<int>(mode_count * 0.1);
+
+ if (textord_oldbl_debug) {
+ tprintf ("blobcount=%d, mode_count=%d, mode_t=%d\n",
+ blobcount, mode_count, mode_threshold);
+ }
+ find_top_modes(&heightstat, HEIGHTBUCKETS, modelist, MODENUM);
+ if (textord_oldbl_debug) {
+ for (blobindex = 0; blobindex < MODENUM; blobindex++)
+ tprintf ("mode[%d]=%d ", blobindex, modelist[blobindex]);
+ tprintf ("\n");
+ }
+ pick_x_height(row, modelist, lefts, rights, &heightstat, mode_threshold);
+
+ if (textord_oldbl_debug)
+ tprintf ("Output xheight=%g\n", row->xheight);
+ if (row->xheight < 0 && textord_oldbl_debug)
+ tprintf ("warning: Row Line height < 0; %4.2f\n", row->xheight);
+
+ if (sign_bit < 0)
+ row->xheight = -row->xheight;
+}
+
+/**********************************************************************
+ * find_top_modes
+ *
+ * Fill the input array with the indices of the top ten modes of the
+ * input distribution.
+ **********************************************************************/
+
+const int kMinModeFactorOcropus = 32;
+const int kMinModeFactor = 12;
+
+void
+find_top_modes ( //get modes
+STATS * stats, //stats to hack
+int statnum, //no of piles
+int modelist[], int modenum //no of modes to get
+) {
+ int mode_count;
+ int last_i = 0;
+ int last_max = INT32_MAX;
+ int i;
+ int mode;
+ int total_max = 0;
+ int mode_factor = textord_ocropus_mode ?
+ kMinModeFactorOcropus : kMinModeFactor;
+
+ for (mode_count = 0; mode_count < modenum; mode_count++) {
+ mode = 0;
+ for (i = 0; i < statnum; i++) {
+ if (stats->pile_count (i) > stats->pile_count (mode)) {
+ if ((stats->pile_count (i) < last_max) ||
+ ((stats->pile_count (i) == last_max) && (i > last_i))) {
+ mode = i;
+ }
+ }
+ }
+ last_i = mode;
+ last_max = stats->pile_count (last_i);
+ total_max += last_max;
+ if (last_max <= total_max / mode_factor)
+ mode = 0;
+ modelist[mode_count] = mode;
+ }
+}
+
+
+/**********************************************************************
+ * pick_x_height
+ *
+ * Choose based on the height modes the best x height value.
+ **********************************************************************/
+
+void pick_x_height(TO_ROW * row, //row to do
+ int modelist[],
+ int lefts[], int rights[],
+ STATS * heightstat,
+ int mode_threshold) {
+ int x;
+ int y;
+ int z;
+ float ratio;
+ int found_one_bigger = false;
+ int best_x_height = 0;
+ int best_asc = 0;
+ int num_in_best;
+
+ for (x = 0; x < MODENUM; x++) {
+ for (y = 0; y < MODENUM; y++) {
+ /* Check for two modes */
+ if (modelist[x] && modelist[y] &&
+ heightstat->pile_count (modelist[x]) > mode_threshold &&
+ (!textord_ocropus_mode ||
+ std::min(rights[modelist[x]], rights[modelist[y]]) >
+ std::max(lefts[modelist[x]], lefts[modelist[y]]))) {
+ ratio = static_cast<float>(modelist[y]) / static_cast<float>(modelist[x]);
+ if (1.2 < ratio && ratio < 1.8) {
+ /* Two modes found */
+ best_x_height = modelist[x];
+ num_in_best = heightstat->pile_count (modelist[x]);
+
+ /* Try to get one higher */
+ do {
+ found_one_bigger = false;
+ for (z = 0; z < MODENUM; z++) {
+ if (modelist[z] == best_x_height + 1 &&
+ (!textord_ocropus_mode ||
+ std::min(rights[modelist[x]], rights[modelist[y]]) >
+ std::max(lefts[modelist[x]], lefts[modelist[y]]))) {
+ ratio = static_cast<float>(modelist[y]) / static_cast<float>(modelist[z]);
+ if ((1.2 < ratio && ratio < 1.8) &&
+ /* Should be half of best */
+ heightstat->pile_count (modelist[z]) >
+ num_in_best * 0.5) {
+ best_x_height++;
+ found_one_bigger = true;
+ break;
+ }
+ }
+ }
+ }
+ while (found_one_bigger);
+
+ /* try to get a higher ascender */
+
+ best_asc = modelist[y];
+ num_in_best = heightstat->pile_count (modelist[y]);
+
+ /* Try to get one higher */
+ do {
+ found_one_bigger = false;
+ for (z = 0; z < MODENUM; z++) {
+ if (modelist[z] > best_asc &&
+ (!textord_ocropus_mode ||
+ std::min(rights[modelist[x]], rights[modelist[y]]) >
+ std::max(lefts[modelist[x]], lefts[modelist[y]]))) {
+ ratio = static_cast<float>(modelist[z]) / static_cast<float>(best_x_height);
+ if ((1.2 < ratio && ratio < 1.8) &&
+ /* Should be half of best */
+ heightstat->pile_count (modelist[z]) >
+ num_in_best * 0.5) {
+ best_asc = modelist[z];
+ found_one_bigger = true;
+ break;
+ }
+ }
+ }
+ }
+ while (found_one_bigger);
+
+ row->xheight = static_cast<float>(best_x_height);
+ row->ascrise = static_cast<float>(best_asc) - best_x_height;
+ return;
+ }
+ }
+ }
+ }
+
+ best_x_height = modelist[0]; /* Single Mode found */
+ num_in_best = heightstat->pile_count (best_x_height);
+ do {
+ /* Try to get one higher */
+ found_one_bigger = false;
+ for (z = 1; z < MODENUM; z++) {
+ /* Should be half of best */
+ if ((modelist[z] == best_x_height + 1) &&
+ (heightstat->pile_count (modelist[z]) > num_in_best * 0.5)) {
+ best_x_height++;
+ found_one_bigger = true;
+ break;
+ }
+ }
+ }
+ while (found_one_bigger);
+
+ row->ascrise = 0.0f;
+ row->xheight = static_cast<float>(best_x_height);
+ if (row->xheight == 0)
+ row->xheight = -1.0f;
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/textord/oldbasel.h b/tesseract/src/textord/oldbasel.h
new file mode 100644
index 00000000..0e25df0d
--- /dev/null
+++ b/tesseract/src/textord/oldbasel.h
@@ -0,0 +1,164 @@
+/**********************************************************************
+ * File: oldbasel.h (Formerly oldbl.h)
+ * Description: A re-implementation of the old baseline algorithm.
+ * Author: Ray Smith
+ *
+ * (C) Copyright 1993, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef OLDBASEL_H
+#define OLDBASEL_H
+
+#include "params.h"
+#include "blobbox.h"
+
+namespace tesseract {
+
+extern BOOL_VAR_H (textord_oldbl_debug, false,
+"Debug old baseline generation");
+
+int get_blob_coords( //get boxes
+ TO_ROW* row, //row to use
+ int32_t lineheight, //block level
+ TBOX* blobcoords, //output boxes
+ bool& holed_line, //lost a lot of blobs
+ int& outcount //no of real blobs
+);
+void make_first_baseline ( //initial approximation
+TBOX blobcoords[], /*blob bounding boxes */
+int blobcount, /*no of blobcoords */
+int xcoords[], /*coords for spline */
+int ycoords[], /*approximator */
+QSPLINE * spline, /*initial spline */
+QSPLINE * baseline, /*output spline */
+float jumplimit /*guess half descenders */
+);
+void make_holed_baseline ( //initial approximation
+TBOX blobcoords[], /*blob bounding boxes */
+int blobcount, /*no of blobcoords */
+QSPLINE * spline, /*initial spline */
+QSPLINE * baseline, /*output spline */
+float gradient //of line
+);
+int partition_line ( //partition blobs
+TBOX blobcoords[], //bounding boxes
+int blobcount, /*no of blobs on row */
+int *numparts, /*number of partitions */
+char partids[], /*partition no of each blob */
+int partsizes[], /*no in each partition */
+QSPLINE * spline, /*curve to fit to */
+float jumplimit, /*allowed delta change */
+float ydiffs[] /*diff from spline */
+);
+void merge_oldbl_parts ( //partition blobs
+TBOX blobcoords[], //bounding boxes
+int blobcount, /*no of blobs on row */
+char partids[], /*partition no of each blob */
+int partsizes[], /*no in each partition */
+int biggestpart, //major partition
+float jumplimit /*allowed delta change */
+);
+int get_ydiffs ( //evaluate differences
+TBOX blobcoords[], //bounding boxes
+int blobcount, /*no of blobs */
+QSPLINE * spline, /*approximating spline */
+float ydiffs[] /*output */
+);
+int choose_partition ( //select partition
+float diff, /*diff from spline */
+float partdiffs[], /*diff on all parts */
+int lastpart, /*last assigned partition */
+float jumplimit, /*new part threshold */
+float* drift,
+float* last_delta,
+int *partcount /*no of partitions */
+);
+int partition_coords ( //find relevant coords
+TBOX blobcoords[], //bounding boxes
+int blobcount, /*no of blobs in row */
+char partids[], /*partition no of each blob */
+int bestpart, /*best new partition */
+int xcoords[], /*points to work on */
+int ycoords[] /*points to work on */
+);
+int segment_spline ( //make xstarts
+TBOX blobcoords[], //boundign boxes
+int blobcount, /*no of blobs in row */
+int xcoords[], /*points to work on */
+int ycoords[], /*points to work on */
+int degree, int pointcount, /*no of points */
+int xstarts[] //result
+);
+bool split_stepped_spline( //make xstarts
+ QSPLINE* baseline, //current shot
+ float jumplimit, //max step function
+ int* xcoords, /*points to work on */
+ int* xstarts, //result
+ int& segments //no of segments
+);
+void insert_spline_point ( //get descenders
+int xstarts[], //starts to shuffle
+int segment, //insertion pt
+int coord1, //coords to add
+int coord2, int &segments //total segments
+);
+void find_lesser_parts ( //get descenders
+TO_ROW * row, //row to process
+TBOX blobcoords[], //bounding boxes
+int blobcount, /*no of blobs */
+char partids[], /*partition of each blob */
+int partsizes[], /*size of each part */
+int partcount, /*no of partitions */
+int bestpart /*biggest partition */
+);
+
+void old_first_xheight ( //the wiseowl way
+TO_ROW * row, /*current row */
+TBOX blobcoords[], /*blob bounding boxes */
+int initialheight, //initial guess
+int blobcount, /*blobs in blobcoords */
+QSPLINE * baseline, /*established */
+float jumplimit /*min ascender height */
+);
+
+void make_first_xheight ( //find xheight
+TO_ROW * row, /*current row */
+TBOX blobcoords[], /*blob bounding boxes */
+int lineheight, //initial guess
+int init_lineheight, //block level guess
+int blobcount, /*blobs in blobcoords */
+QSPLINE * baseline, /*established */
+float jumplimit /*min ascender height */
+);
+
+int *make_height_array ( //get array of heights
+TBOX blobcoords[], /*blob bounding boxes */
+int blobcount, /*blobs in blobcoords */
+QSPLINE * baseline /*established */
+);
+
+void find_top_modes ( //get modes
+STATS * stats, //stats to hack
+int statnum, //no of piles
+int modelist[], int modenum //no of modes to get
+);
+
+void pick_x_height(TO_ROW * row, //row to do
+int modelist[],
+int lefts[], int rights[],
+STATS * heightstat,
+int mode_threshold);
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/textord/pithsync.cpp b/tesseract/src/textord/pithsync.cpp
new file mode 100644
index 00000000..462f0b3c
--- /dev/null
+++ b/tesseract/src/textord/pithsync.cpp
@@ -0,0 +1,693 @@
+/**********************************************************************
+ * File: pithsync.cpp (Formerly pitsync2.c)
+ * Description: Code to find the optimum fixed pitch segmentation of some blobs.
+ * Author: Ray Smith
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#include "pithsync.h"
+
+#include "makerow.h"
+#include "pitsync1.h"
+#include "topitch.h"
+#include "tprintf.h"
+
+#include <cmath>
+#include <cfloat> // for FLT_MAX
+#include <vector> // for std::vector
+
+namespace tesseract {
+
+/**********************************************************************
+ * FPCUTPT::setup
+ *
+ * Constructor to make a new FPCUTPT.
+ **********************************************************************/
+
+void FPCUTPT::setup( //constructor
+ FPCUTPT *cutpts, //predecessors
+ int16_t array_origin, //start coord
+ STATS *projection, //vertical occupation
+ int16_t zero_count, //official zero
+ int16_t pitch, //proposed pitch
+ int16_t x, //position
+ int16_t offset //dist to gap
+ ) {
+ //half of pitch
+ int16_t half_pitch = pitch / 2 - 1;
+ uint32_t lead_flag; //new flag
+ int32_t ind; //current position
+
+ if (half_pitch > 31)
+ half_pitch = 31;
+ else if (half_pitch < 0)
+ half_pitch = 0;
+ lead_flag = 1 << half_pitch;
+
+ pred = nullptr;
+ mean_sum = 0;
+ sq_sum = offset * offset;
+ cost = sq_sum;
+ faked = false;
+ terminal = false;
+ fake_count = 0;
+ xpos = x;
+ region_index = 0;
+ mid_cuts = 0;
+ if (x == array_origin) {
+ back_balance = 0;
+ fwd_balance = 0;
+ for (ind = 0; ind <= half_pitch; ind++) {
+ fwd_balance >>= 1;
+ if (projection->pile_count (ind) > zero_count)
+ fwd_balance |= lead_flag;
+ }
+ }
+ else {
+ back_balance = cutpts[x - 1 - array_origin].back_balance << 1;
+ back_balance &= lead_flag + (lead_flag - 1);
+ if (projection->pile_count (x) > zero_count)
+ back_balance |= 1;
+ fwd_balance = cutpts[x - 1 - array_origin].fwd_balance >> 1;
+ if (projection->pile_count (x + half_pitch) > zero_count)
+ fwd_balance |= lead_flag;
+ }
+}
+
+
+/**********************************************************************
+ * FPCUTPT::assign
+ *
+ * Constructor to make a new FPCUTPT.
+ **********************************************************************/
+
+void FPCUTPT::assign( //constructor
+ FPCUTPT* cutpts, //predecessors
+ int16_t array_origin, //start coord
+ int16_t x, //position
+ bool faking, //faking this one
+ bool mid_cut, //cheap cut.
+ int16_t offset, //dist to gap
+ STATS* projection, //vertical occupation
+ float projection_scale, //scaling
+ int16_t zero_count, //official zero
+ int16_t pitch, //proposed pitch
+ int16_t pitch_error //allowed tolerance
+) {
+ int index; //test index
+ int balance_index; //for balance factor
+ int16_t balance_count; //ding factor
+ int16_t r_index; //test cut number
+ FPCUTPT *segpt; //segment point
+ int32_t dist; //from prev segment
+ double sq_dist; //squared distance
+ double mean; //mean pitch
+ double total; //total dists
+ double factor; //cost function
+ //half of pitch
+ int16_t half_pitch = pitch / 2 - 1;
+ uint32_t lead_flag; //new flag
+
+ if (half_pitch > 31)
+ half_pitch = 31;
+ else if (half_pitch < 0)
+ half_pitch = 0;
+ lead_flag = 1 << half_pitch;
+
+ back_balance = cutpts[x - 1 - array_origin].back_balance << 1;
+ back_balance &= lead_flag + (lead_flag - 1);
+ if (projection->pile_count (x) > zero_count)
+ back_balance |= 1;
+ fwd_balance = cutpts[x - 1 - array_origin].fwd_balance >> 1;
+ if (projection->pile_count (x + half_pitch) > zero_count)
+ fwd_balance |= lead_flag;
+
+ xpos = x;
+ cost = FLT_MAX;
+ pred = nullptr;
+ faked = faking;
+ terminal = false;
+ region_index = 0;
+ fake_count = INT16_MAX;
+ for (index = x - pitch - pitch_error; index <= x - pitch + pitch_error;
+ index++) {
+ if (index >= array_origin) {
+ segpt = &cutpts[index - array_origin];
+ dist = x - segpt->xpos;
+ if (!segpt->terminal && segpt->fake_count < INT16_MAX) {
+ balance_count = 0;
+ if (textord_balance_factor > 0) {
+ if (textord_fast_pitch_test) {
+ lead_flag = back_balance ^ segpt->fwd_balance;
+ balance_count = 0;
+ while (lead_flag != 0) {
+ balance_count++;
+ lead_flag &= lead_flag - 1;
+ }
+ }
+ else {
+ for (balance_index = 0;
+ index + balance_index < x - balance_index;
+ balance_index++)
+ balance_count +=
+ (projection->pile_count (index + balance_index) <=
+ zero_count) ^ (projection->pile_count (x -
+ balance_index)
+ <= zero_count);
+ }
+ balance_count =
+ static_cast<int16_t>(balance_count * textord_balance_factor /
+ projection_scale);
+ }
+ r_index = segpt->region_index + 1;
+ total = segpt->mean_sum + dist;
+ balance_count += offset;
+ sq_dist =
+ dist * dist + segpt->sq_sum + balance_count * balance_count;
+ mean = total / r_index;
+ factor = mean - pitch;
+ factor *= factor;
+ factor += sq_dist / (r_index) - mean * mean;
+ if (factor < cost && segpt->fake_count + faked <= fake_count) {
+ cost = factor; //find least cost
+ pred = segpt; //save path
+ mean_sum = total;
+ sq_sum = sq_dist;
+ fake_count = segpt->fake_count + faked;
+ mid_cuts = segpt->mid_cuts + mid_cut;
+ region_index = r_index;
+ }
+ }
+ }
+ }
+}
+
+
+/**********************************************************************
+ * FPCUTPT::assign_cheap
+ *
+ * Constructor to make a new FPCUTPT on the cheap.
+ **********************************************************************/
+
+void FPCUTPT::assign_cheap( //constructor
+ FPCUTPT *cutpts, //predecessors
+ int16_t array_origin, //start coord
+ int16_t x, //position
+ bool faking, //faking this one
+ bool mid_cut, //cheap cut.
+ int16_t offset, //dist to gap
+ STATS *projection, //vertical occupation
+ float projection_scale, //scaling
+ int16_t zero_count, //official zero
+ int16_t pitch, //proposed pitch
+ int16_t pitch_error //allowed tolerance
+ ) {
+ int index; //test index
+ int16_t balance_count; //ding factor
+ int16_t r_index; //test cut number
+ FPCUTPT *segpt; //segment point
+ int32_t dist; //from prev segment
+ double sq_dist; //squared distance
+ double mean; //mean pitch
+ double total; //total dists
+ double factor; //cost function
+ //half of pitch
+ int16_t half_pitch = pitch / 2 - 1;
+ uint32_t lead_flag; //new flag
+
+ if (half_pitch > 31)
+ half_pitch = 31;
+ else if (half_pitch < 0)
+ half_pitch = 0;
+ lead_flag = 1 << half_pitch;
+
+ back_balance = cutpts[x - 1 - array_origin].back_balance << 1;
+ back_balance &= lead_flag + (lead_flag - 1);
+ if (projection->pile_count (x) > zero_count)
+ back_balance |= 1;
+ fwd_balance = cutpts[x - 1 - array_origin].fwd_balance >> 1;
+ if (projection->pile_count (x + half_pitch) > zero_count)
+ fwd_balance |= lead_flag;
+
+ xpos = x;
+ cost = FLT_MAX;
+ pred = nullptr;
+ faked = faking;
+ terminal = false;
+ region_index = 0;
+ fake_count = INT16_MAX;
+ index = x - pitch;
+ if (index >= array_origin) {
+ segpt = &cutpts[index - array_origin];
+ dist = x - segpt->xpos;
+ if (!segpt->terminal && segpt->fake_count < INT16_MAX) {
+ balance_count = 0;
+ if (textord_balance_factor > 0) {
+ lead_flag = back_balance ^ segpt->fwd_balance;
+ balance_count = 0;
+ while (lead_flag != 0) {
+ balance_count++;
+ lead_flag &= lead_flag - 1;
+ }
+ balance_count = static_cast<int16_t>(balance_count * textord_balance_factor
+ / projection_scale);
+ }
+ r_index = segpt->region_index + 1;
+ total = segpt->mean_sum + dist;
+ balance_count += offset;
+ sq_dist =
+ dist * dist + segpt->sq_sum + balance_count * balance_count;
+ mean = total / r_index;
+ factor = mean - pitch;
+ factor *= factor;
+ factor += sq_dist / (r_index) - mean * mean;
+ cost = factor; //find least cost
+ pred = segpt; //save path
+ mean_sum = total;
+ sq_sum = sq_dist;
+ fake_count = segpt->fake_count + faked;
+ mid_cuts = segpt->mid_cuts + mid_cut;
+ region_index = r_index;
+ }
+ }
+}
+
+
+/**********************************************************************
+ * check_pitch_sync
+ *
+ * Construct the lattice of possible segmentation points and choose the
+ * optimal path. Return the optimal path only.
+ * The return value is a measure of goodness of the sync.
+ **********************************************************************/
+
+double check_pitch_sync2( //find segmentation
+ BLOBNBOX_IT *blob_it, //blobs to do
+ int16_t blob_count, //no of blobs
+ int16_t pitch, //pitch estimate
+ int16_t pitch_error, //tolerance
+ STATS *projection, //vertical
+ int16_t projection_left, //edges //scale factor
+ int16_t projection_right,
+ float projection_scale,
+ int16_t &occupation_count, //no of occupied cells
+ FPSEGPT_LIST *seg_list, //output list
+ int16_t start, //start of good range
+ int16_t end //end of good range
+ ) {
+ bool faking; //illegal cut pt
+ bool mid_cut; //cheap cut pt.
+ int16_t x; //current coord
+ int16_t blob_index; //blob number
+ int16_t left_edge; //of word
+ int16_t right_edge; //of word
+ int16_t array_origin; //x coord of array
+ int16_t offset; //dist to legal area
+ int16_t zero_count; //projection zero
+ int16_t best_left_x = 0; //for equals
+ int16_t best_right_x = 0; //right edge
+ TBOX this_box; //bounding box
+ TBOX next_box; //box of next blob
+ FPSEGPT *segpt; //segment point
+ double best_cost; //best path
+ double mean_sum; //computes result
+ FPCUTPT *best_end; //end of best path
+ int16_t best_fake; //best fake level
+ int16_t best_count; //no of cuts
+ BLOBNBOX_IT this_it; //copy iterator
+ FPSEGPT_IT seg_it = seg_list; //output iterator
+
+ // tprintf("Computing sync on word of %d blobs with pitch %d\n",
+ // blob_count, pitch);
+ // if (blob_count==8 && pitch==27)
+ // projection->print(stdout,true);
+ zero_count = 0;
+ if (pitch < 3)
+ pitch = 3; //nothing ludicrous
+ if ((pitch - 3) / 2 < pitch_error)
+ pitch_error = (pitch - 3) / 2;
+ this_it = *blob_it;
+ this_box = box_next (&this_it);//get box
+ // left_edge=this_box.left(); //left of word
+ // right_edge=this_box.right();
+ // for (blob_index=1;blob_index<blob_count;blob_index++)
+ // {
+ // this_box=box_next(&this_it);
+ // if (this_box.right()>right_edge)
+ // right_edge=this_box.right();
+ // }
+ for (left_edge = projection_left; projection->pile_count (left_edge) == 0
+ && left_edge < projection_right; left_edge++);
+ for (right_edge = projection_right; projection->pile_count (right_edge) == 0
+ && right_edge > left_edge; right_edge--);
+ ASSERT_HOST (right_edge >= left_edge);
+ if (pitsync_linear_version >= 4)
+ return check_pitch_sync3 (projection_left, projection_right, zero_count,
+ pitch, pitch_error, projection,
+ projection_scale, occupation_count, seg_list,
+ start, end);
+ array_origin = left_edge - pitch;
+ // array of points
+ std::vector<FPCUTPT> cutpts(right_edge - left_edge + pitch * 2 + 1);
+ for (x = array_origin; x < left_edge; x++)
+ //free cuts
+ cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection,
+ zero_count, pitch, x, 0);
+ for (offset = 0; offset <= pitch_error; offset++, x++)
+ //not quite free
+ cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection,
+ zero_count, pitch, x, offset);
+
+ this_it = *blob_it;
+ best_cost = FLT_MAX;
+ best_end = nullptr;
+ this_box = box_next (&this_it);//first box
+ next_box = box_next (&this_it);//second box
+ blob_index = 1;
+ while (x < right_edge - pitch_error) {
+ if (x > this_box.right () + pitch_error && blob_index < blob_count) {
+ this_box = next_box;
+ next_box = box_next (&this_it);
+ blob_index++;
+ }
+ faking = false;
+ mid_cut = false;
+ if (x <= this_box.left ())
+ offset = 0;
+ else if (x <= this_box.left () + pitch_error)
+ offset = x - this_box.left ();
+ else if (x >= this_box.right ())
+ offset = 0;
+ else if (x >= next_box.left () && blob_index < blob_count) {
+ offset = x - next_box.left ();
+ if (this_box.right () - x < offset)
+ offset = this_box.right () - x;
+ }
+ else if (x >= this_box.right () - pitch_error)
+ offset = this_box.right () - x;
+ else if (x - this_box.left () > pitch * pitsync_joined_edge
+ && this_box.right () - x > pitch * pitsync_joined_edge) {
+ mid_cut = true;
+ offset = 0;
+ }
+ else {
+ faking = true;
+ offset = projection->pile_count (x);
+ }
+ cutpts[x - array_origin].assign (&cutpts[0], array_origin, x,
+ faking, mid_cut, offset, projection,
+ projection_scale, zero_count, pitch,
+ pitch_error);
+ x++;
+ }
+
+ best_fake = INT16_MAX;
+ best_cost = INT32_MAX;
+ best_count = INT16_MAX;
+ while (x < right_edge + pitch) {
+ offset = x < right_edge ? right_edge - x : 0;
+ cutpts[x - array_origin].assign (&cutpts[0], array_origin, x,
+ false, false, offset, projection,
+ projection_scale, zero_count, pitch,
+ pitch_error);
+ cutpts[x - array_origin].terminal = true;
+ if (cutpts[x - array_origin].index () +
+ cutpts[x - array_origin].fake_count <= best_count + best_fake) {
+ if (cutpts[x - array_origin].fake_count < best_fake
+ || (cutpts[x - array_origin].fake_count == best_fake
+ && cutpts[x - array_origin].cost_function () < best_cost)) {
+ best_fake = cutpts[x - array_origin].fake_count;
+ best_cost = cutpts[x - array_origin].cost_function ();
+ best_left_x = x;
+ best_right_x = x;
+ best_count = cutpts[x - array_origin].index ();
+ }
+ else if (cutpts[x - array_origin].fake_count == best_fake
+ && x == best_right_x + 1
+ && cutpts[x - array_origin].cost_function () == best_cost) {
+ //exactly equal
+ best_right_x = x;
+ }
+ }
+ x++;
+ }
+ ASSERT_HOST (best_fake < INT16_MAX);
+
+ best_end = &cutpts[(best_left_x + best_right_x) / 2 - array_origin];
+ if (this_box.right () == textord_test_x
+ && this_box.top () == textord_test_y) {
+ for (x = left_edge - pitch; x < right_edge + pitch; x++) {
+ tprintf ("x=%d, C=%g, s=%g, sq=%g, prev=%d\n",
+ x, cutpts[x - array_origin].cost_function (),
+ cutpts[x - array_origin].sum (),
+ cutpts[x - array_origin].squares (),
+ cutpts[x - array_origin].previous ()->position ());
+ }
+ }
+ occupation_count = -1;
+ do {
+ for (x = best_end->position () - pitch + pitch_error;
+ x < best_end->position () - pitch_error
+ && projection->pile_count (x) == 0; x++);
+ if (x < best_end->position () - pitch_error)
+ occupation_count++;
+ //copy it
+ segpt = new FPSEGPT (best_end);
+ seg_it.add_before_then_move (segpt);
+ best_end = best_end->previous ();
+ }
+ while (best_end != nullptr);
+ seg_it.move_to_last ();
+ mean_sum = seg_it.data ()->sum ();
+ mean_sum = mean_sum * mean_sum / best_count;
+ if (seg_it.data ()->squares () - mean_sum < 0)
+ tprintf ("Impossible sqsum=%g, mean=%g, total=%d\n",
+ seg_it.data ()->squares (), seg_it.data ()->sum (), best_count);
+ // tprintf("blob_count=%d, pitch=%d, sync=%g, occ=%d\n",
+ // blob_count,pitch,seg_it.data()->squares()-mean_sum,
+ // occupation_count);
+ return seg_it.data ()->squares () - mean_sum;
+}
+
+
+/**********************************************************************
+ * check_pitch_sync
+ *
+ * Construct the lattice of possible segmentation points and choose the
+ * optimal path. Return the optimal path only.
+ * The return value is a measure of goodness of the sync.
+ **********************************************************************/
+
+double check_pitch_sync3( //find segmentation
+ int16_t projection_left, //edges //to be considered 0
+ int16_t projection_right,
+ int16_t zero_count,
+ int16_t pitch, //pitch estimate
+ int16_t pitch_error, //tolerance
+ STATS *projection, //vertical
+ float projection_scale, //scale factor
+ int16_t &occupation_count, //no of occupied cells
+ FPSEGPT_LIST *seg_list, //output list
+ int16_t start, //start of good range
+ int16_t end //end of good range
+ ) {
+ bool faking; //illegal cut pt
+ bool mid_cut; //cheap cut pt.
+ int16_t left_edge; //of word
+ int16_t right_edge; //of word
+ int16_t x; //current coord
+ int16_t array_origin; //x coord of array
+ int16_t offset; //dist to legal area
+ int16_t projection_offset; //from scaled projection
+ int16_t prev_zero; //previous zero dist
+ int16_t next_zero; //next zero dist
+ int16_t zero_offset; //scan window
+ int16_t best_left_x = 0; //for equals
+ int16_t best_right_x = 0; //right edge
+ FPSEGPT *segpt; //segment point
+ int minindex; //next input position
+ int test_index; //index to mins
+ double best_cost; //best path
+ double mean_sum; //computes result
+ FPCUTPT *best_end; //end of best path
+ int16_t best_fake; //best fake level
+ int16_t best_count; //no of cuts
+ FPSEGPT_IT seg_it = seg_list; //output iterator
+
+ end = (end - start) % pitch;
+ if (pitch < 3)
+ pitch = 3; //nothing ludicrous
+ if ((pitch - 3) / 2 < pitch_error)
+ pitch_error = (pitch - 3) / 2;
+ //min dist of zero
+ zero_offset = static_cast<int16_t>(pitch * pitsync_joined_edge);
+ for (left_edge = projection_left; projection->pile_count (left_edge) == 0
+ && left_edge < projection_right; left_edge++);
+ for (right_edge = projection_right; projection->pile_count (right_edge) == 0
+ && right_edge > left_edge; right_edge--);
+ array_origin = left_edge - pitch;
+ // array of points
+ std::vector<FPCUTPT> cutpts(right_edge - left_edge + pitch * 2 + 1);
+ // local min results
+ std::vector<bool> mins(pitch_error * 2 + 1);
+ for (x = array_origin; x < left_edge; x++)
+ //free cuts
+ cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection,
+ zero_count, pitch, x, 0);
+ prev_zero = left_edge - 1;
+ for (offset = 0; offset <= pitch_error; offset++, x++)
+ //not quite free
+ cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection,
+ zero_count, pitch, x, offset);
+
+ best_cost = FLT_MAX;
+ best_end = nullptr;
+ for (offset = -pitch_error, minindex = 0; offset < pitch_error;
+ offset++, minindex++)
+ mins[minindex] = projection->local_min (x + offset);
+ next_zero = x + zero_offset + 1;
+ for (offset = next_zero - 1; offset >= x; offset--) {
+ if (projection->pile_count (offset) <= zero_count) {
+ next_zero = offset;
+ break;
+ }
+ }
+ while (x < right_edge - pitch_error) {
+ mins[minindex] = projection->local_min (x + pitch_error);
+ minindex++;
+ if (minindex > pitch_error * 2)
+ minindex = 0;
+ faking = false;
+ mid_cut = false;
+ offset = 0;
+ if (projection->pile_count (x) <= zero_count) {
+ prev_zero = x;
+ }
+ else {
+ for (offset = 1; offset <= pitch_error; offset++)
+ if (projection->pile_count (x + offset) <= zero_count
+ || projection->pile_count (x - offset) <= zero_count)
+ break;
+ }
+ if (offset > pitch_error) {
+ if (x - prev_zero > zero_offset && next_zero - x > zero_offset) {
+ for (offset = 0; offset <= pitch_error; offset++) {
+ test_index = minindex + pitch_error + offset;
+ if (test_index > pitch_error * 2)
+ test_index -= pitch_error * 2 + 1;
+ if (mins[test_index])
+ break;
+ test_index = minindex + pitch_error - offset;
+ if (test_index > pitch_error * 2)
+ test_index -= pitch_error * 2 + 1;
+ if (mins[test_index])
+ break;
+ }
+ }
+ if (offset > pitch_error) {
+ offset = projection->pile_count (x);
+ faking = true;
+ }
+ else {
+ projection_offset =
+ static_cast<int16_t>(projection->pile_count (x) / projection_scale);
+ if (projection_offset > offset)
+ offset = projection_offset;
+ mid_cut = true;
+ }
+ }
+ if ((start == 0 && end == 0)
+ || !textord_fast_pitch_test
+ || (x - projection_left - start) % pitch <= end)
+ cutpts[x - array_origin].assign(&cutpts[0], array_origin, x,
+ faking, mid_cut, offset, projection,
+ projection_scale, zero_count, pitch,
+ pitch_error);
+ else
+ cutpts[x - array_origin].assign_cheap(&cutpts[0], array_origin, x,
+ faking, mid_cut, offset,
+ projection, projection_scale,
+ zero_count, pitch,
+ pitch_error);
+ x++;
+ if (next_zero < x || next_zero == x + zero_offset)
+ next_zero = x + zero_offset + 1;
+ if (projection->pile_count (x + zero_offset) <= zero_count)
+ next_zero = x + zero_offset;
+ }
+
+ best_fake = INT16_MAX;
+ best_cost = INT32_MAX;
+ best_count = INT16_MAX;
+ while (x < right_edge + pitch) {
+ offset = x < right_edge ? right_edge - x : 0;
+ cutpts[x - array_origin].assign(&cutpts[0], array_origin, x,
+ false, false, offset, projection,
+ projection_scale, zero_count, pitch,
+ pitch_error);
+ cutpts[x - array_origin].terminal = true;
+ if (cutpts[x - array_origin].index () +
+ cutpts[x - array_origin].fake_count <= best_count + best_fake) {
+ if (cutpts[x - array_origin].fake_count < best_fake
+ || (cutpts[x - array_origin].fake_count == best_fake
+ && cutpts[x - array_origin].cost_function () < best_cost)) {
+ best_fake = cutpts[x - array_origin].fake_count;
+ best_cost = cutpts[x - array_origin].cost_function ();
+ best_left_x = x;
+ best_right_x = x;
+ best_count = cutpts[x - array_origin].index ();
+ }
+ else if (cutpts[x - array_origin].fake_count == best_fake
+ && x == best_right_x + 1
+ && cutpts[x - array_origin].cost_function () == best_cost) {
+ //exactly equal
+ best_right_x = x;
+ }
+ }
+ x++;
+ }
+ ASSERT_HOST (best_fake < INT16_MAX);
+
+ best_end = &cutpts[(best_left_x + best_right_x) / 2 - array_origin];
+ // for (x=left_edge-pitch;x<right_edge+pitch;x++)
+ // {
+ // tprintf("x=%d, C=%g, s=%g, sq=%g, prev=%d\n",
+ // x,cutpts[x-array_origin].cost_function(),
+ // cutpts[x-array_origin].sum(),
+ // cutpts[x-array_origin].squares(),
+ // cutpts[x-array_origin].previous()->position());
+ // }
+ occupation_count = -1;
+ do {
+ for (x = best_end->position () - pitch + pitch_error;
+ x < best_end->position () - pitch_error
+ && projection->pile_count (x) == 0; x++);
+ if (x < best_end->position () - pitch_error)
+ occupation_count++;
+ //copy it
+ segpt = new FPSEGPT (best_end);
+ seg_it.add_before_then_move (segpt);
+ best_end = best_end->previous ();
+ }
+ while (best_end != nullptr);
+ seg_it.move_to_last ();
+ mean_sum = seg_it.data ()->sum ();
+ mean_sum = mean_sum * mean_sum / best_count;
+ if (seg_it.data ()->squares () - mean_sum < 0)
+ tprintf ("Impossible sqsum=%g, mean=%g, total=%d\n",
+ seg_it.data ()->squares (), seg_it.data ()->sum (), best_count);
+ return seg_it.data ()->squares () - mean_sum;
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/textord/pithsync.h b/tesseract/src/textord/pithsync.h
new file mode 100644
index 00000000..f6309f19
--- /dev/null
+++ b/tesseract/src/textord/pithsync.h
@@ -0,0 +1,136 @@
+/**********************************************************************
+ * File: pithsync.h (Formerly pitsync2.h)
+ * Description: Code to find the optimum fixed pitch segmentation of some blobs.
+ * Author: Ray Smith
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef PITHSYNC_H
+#define PITHSYNC_H
+
+#include "blobbox.h"
+#include "params.h"
+#include "statistc.h"
+
+namespace tesseract {
+
+class FPSEGPT_LIST;
+
+class FPCUTPT
+{
+ public:
+ FPCUTPT() = default;
+ void setup ( //start of cut
+ FPCUTPT cutpts[], //predecessors
+ int16_t array_origin, //start coord
+ STATS * projection, //occupation
+ int16_t zero_count, //official zero
+ int16_t pitch, //proposed pitch
+ int16_t x, //position
+ int16_t offset); //dist to gap
+
+ void assign( //evaluate cut
+ FPCUTPT cutpts[], //predecessors
+ int16_t array_origin, //start coord
+ int16_t x, //position
+ bool faking, //faking this one
+ bool mid_cut, //doing free cut
+ int16_t offset, //extra cost dist
+ STATS* projection, //occupation
+ float projection_scale, //scaling
+ int16_t zero_count, //official zero
+ int16_t pitch, //proposed pitch
+ int16_t pitch_error); //allowed tolerance
+
+ void assign_cheap ( //evaluate cut
+ FPCUTPT cutpts[], //predecessors
+ int16_t array_origin, //start coord
+ int16_t x, //position
+ bool faking, //faking this one
+ bool mid_cut, //doing free cut
+ int16_t offset, //extra cost dist
+ STATS * projection, //occupation
+ float projection_scale, //scaling
+ int16_t zero_count, //official zero
+ int16_t pitch, //proposed pitch
+ int16_t pitch_error); //allowed tolerance
+
+ int32_t position() { // access func
+ return xpos;
+ }
+ double cost_function() {
+ return cost;
+ }
+ double squares() {
+ return sq_sum;
+ }
+ double sum() {
+ return mean_sum;
+ }
+ FPCUTPT *previous() {
+ return pred;
+ }
+ int16_t cheap_cuts() const { //no of mi cuts
+ return mid_cuts;
+ }
+ int16_t index() const {
+ return region_index;
+ }
+
+ bool faked; //faked split point
+ bool terminal; //successful end
+ int16_t fake_count; //total fakes to here
+
+ private:
+ int16_t region_index; //cut serial number
+ int16_t mid_cuts; //no of cheap cuts
+ int32_t xpos; //location
+ uint32_t back_balance; //proj backwards
+ uint32_t fwd_balance; //proj forwards
+ FPCUTPT *pred; //optimal previous
+ double mean_sum; //mean so far
+ double sq_sum; //summed distsances
+ double cost; //cost function
+};
+double check_pitch_sync2( //find segmentation
+ BLOBNBOX_IT *blob_it, //blobs to do
+ int16_t blob_count, //no of blobs
+ int16_t pitch, //pitch estimate
+ int16_t pitch_error, //tolerance
+ STATS *projection, //vertical
+ int16_t projection_left, //edges //scale factor
+ int16_t projection_right,
+ float projection_scale,
+ int16_t &occupation_count, //no of occupied cells
+ FPSEGPT_LIST *seg_list, //output list
+ int16_t start, //start of good range
+ int16_t end //end of good range
+ );
+double check_pitch_sync3( //find segmentation
+ int16_t projection_left, //edges //to be considered 0
+ int16_t projection_right,
+ int16_t zero_count,
+ int16_t pitch, //pitch estimate
+ int16_t pitch_error, //tolerance
+ STATS *projection, //vertical
+ float projection_scale, //scale factor
+ int16_t &occupation_count, //no of occupied cells
+ FPSEGPT_LIST *seg_list, //output list
+ int16_t start, //start of good range
+ int16_t end //end of good range
+ );
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/textord/pitsync1.cpp b/tesseract/src/textord/pitsync1.cpp
new file mode 100644
index 00000000..ca46dc84
--- /dev/null
+++ b/tesseract/src/textord/pitsync1.cpp
@@ -0,0 +1,422 @@
+/**********************************************************************
+ * File: pitsync1.cpp (Formerly pitsync.c)
+ * Description: Code to find the optimum fixed pitch segmentation of some blobs.
+ * Author: Ray Smith
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#include "pitsync1.h"
+
+#include <cfloat> // for FLT_MAX
+#include <cmath>
+
+namespace tesseract {
+
+ELISTIZE (FPSEGPT) CLISTIZE (FPSEGPT_LIST)
+
+INT_VAR(pitsync_linear_version, 6, "Use new fast algorithm");
+double_VAR(pitsync_joined_edge, 0.75, "Dist inside big blob for chopping");
+double_VAR(pitsync_offset_freecut_fraction, 0.25,
+ "Fraction of cut for free cuts");
+INT_VAR(pitsync_fake_depth, 1, "Max advance fake generation");
+
+/**********************************************************************
+ * FPSEGPT::FPSEGPT
+ *
+ * Constructor to make a new FPSEGPT.
+ * The existing FPCUTPT is duplicated.
+ **********************************************************************/
+
+FPSEGPT::FPSEGPT( //constructor
+ FPCUTPT *cutpt //create from new form
+ ) {
+ pred = nullptr;
+ mean_sum = cutpt->sum ();
+ sq_sum = cutpt->squares ();
+ cost = cutpt->cost_function ();
+ faked = cutpt->faked;
+ terminal = cutpt->terminal;
+ fake_count = cutpt->fake_count;
+ xpos = cutpt->position ();
+ mid_cuts = cutpt->cheap_cuts ();
+}
+
+
+/**********************************************************************
+ * FPSEGPT::FPSEGPT
+ *
+ * Constructor to make a new FPSEGPT.
+ **********************************************************************/
+
+FPSEGPT::FPSEGPT ( //constructor
+int16_t x //position
+):xpos (x) {
+ pred = nullptr;
+ mean_sum = 0;
+ sq_sum = 0;
+ cost = 0;
+ faked = false;
+ terminal = false;
+ fake_count = 0;
+ mid_cuts = 0;
+}
+
+
+/**********************************************************************
+ * FPSEGPT::FPSEGPT
+ *
+ * Constructor to make a new FPSEGPT.
+ **********************************************************************/
+
+FPSEGPT::FPSEGPT ( //constructor
+int16_t x, //position
+bool faking, //faking this one
+int16_t offset, //dist to gap
+int16_t region_index, //segment number
+int16_t pitch, //proposed pitch
+int16_t pitch_error, //allowed tolerance
+FPSEGPT_LIST * prev_list //previous segment
+)
+: fake_count(0),
+ xpos(x),
+ mean_sum(0.0),
+ sq_sum(0.0)
+{
+ int16_t best_fake; //on previous
+ FPSEGPT *segpt; //segment point
+ int32_t dist; //from prev segment
+ double sq_dist; //squared distance
+ double mean; //mean pitch
+ double total; //total dists
+ double factor; //cost function
+ FPSEGPT_IT pred_it = prev_list;//for previuos segment
+
+ cost = FLT_MAX;
+ pred = nullptr;
+ faked = faking;
+ terminal = false;
+ best_fake = INT16_MAX;
+ mid_cuts = 0;
+ for (pred_it.mark_cycle_pt (); !pred_it.cycled_list (); pred_it.forward ()) {
+ segpt = pred_it.data ();
+ if (segpt->fake_count < best_fake)
+ best_fake = segpt->fake_count;
+ dist = x - segpt->xpos;
+ if (dist >= pitch - pitch_error && dist <= pitch + pitch_error
+ && !segpt->terminal) {
+ total = segpt->mean_sum + dist;
+ sq_dist = dist * dist + segpt->sq_sum + offset * offset;
+ //sum of squarees
+ mean = total / region_index;
+ factor = mean - pitch;
+ factor *= factor;
+ factor += sq_dist / (region_index) - mean * mean;
+ if (factor < cost) {
+ cost = factor; //find least cost
+ pred = segpt; //save path
+ mean_sum = total;
+ sq_sum = sq_dist;
+ fake_count = segpt->fake_count + faked;
+ }
+ }
+ }
+ if (fake_count > best_fake + 1)
+ pred = nullptr; //fail it
+}
+
+/**********************************************************************
+ * check_pitch_sync
+ *
+ * Construct the lattice of possible segmentation points and choose the
+ * optimal path. Return the optimal path only.
+ * The return value is a measure of goodness of the sync.
+ **********************************************************************/
+
+double check_pitch_sync( //find segmentation
+ BLOBNBOX_IT *blob_it, //blobs to do
+ int16_t blob_count, //no of blobs
+ int16_t pitch, //pitch estimate
+ int16_t pitch_error, //tolerance
+ STATS *projection, //vertical
+ FPSEGPT_LIST *seg_list //output list
+ ) {
+ int16_t x; //current coord
+ int16_t min_index; //blob number
+ int16_t max_index; //blob number
+ int16_t left_edge; //of word
+ int16_t right_edge; //of word
+ int16_t right_max; //max allowed x
+ int16_t min_x; //in this region
+ int16_t max_x;
+ int16_t region_index;
+ int16_t best_region_index = 0; //for best result
+ int16_t offset; //dist to legal area
+ int16_t left_best_x; //edge of good region
+ int16_t right_best_x; //right edge
+ TBOX min_box; //bounding box
+ TBOX max_box; //bounding box
+ TBOX next_box; //box of next blob
+ FPSEGPT *segpt; //segment point
+ FPSEGPT_LIST *segpts; //points in a segment
+ double best_cost; //best path
+ double mean_sum; //computes result
+ FPSEGPT *best_end; //end of best path
+ BLOBNBOX_IT min_it; //copy iterator
+ BLOBNBOX_IT max_it; //copy iterator
+ FPSEGPT_IT segpt_it; //iterator
+ //output segments
+ FPSEGPT_IT outseg_it = seg_list;
+ FPSEGPT_LIST_CLIST lattice; //list of lists
+ //region iterator
+ FPSEGPT_LIST_C_IT lattice_it = &lattice;
+
+ // tprintf("Computing sync on word of %d blobs with pitch %d\n",
+ // blob_count, pitch);
+ // if (blob_count==8 && pitch==27)
+ // projection->print(stdout,true);
+ if (pitch < 3)
+ pitch = 3; //nothing ludicrous
+ if ((pitch - 3) / 2 < pitch_error)
+ pitch_error = (pitch - 3) / 2;
+ min_it = *blob_it;
+ min_box = box_next (&min_it); //get box
+ // if (blob_count==8 && pitch==27)
+ // tprintf("1st box at (%d,%d)->(%d,%d)\n",
+ // min_box.left(),min_box.bottom(),
+ // min_box.right(),min_box.top());
+ //left of word
+ left_edge = min_box.left () + pitch_error;
+ for (min_index = 1; min_index < blob_count; min_index++) {
+ min_box = box_next (&min_it);
+ // if (blob_count==8 && pitch==27)
+ // tprintf("Box at (%d,%d)->(%d,%d)\n",
+ // min_box.left(),min_box.bottom(),
+ // min_box.right(),min_box.top());
+ }
+ right_edge = min_box.right (); //end of word
+ max_x = left_edge;
+ //min permissible
+ min_x = max_x - pitch + pitch_error * 2 + 1;
+ right_max = right_edge + pitch - pitch_error - 1;
+ segpts = new FPSEGPT_LIST; //list of points
+ segpt_it.set_to_list (segpts);
+ for (x = min_x; x <= max_x; x++) {
+ segpt = new FPSEGPT (x); //make a new one
+ //put in list
+ segpt_it.add_after_then_move (segpt);
+ }
+ //first segment
+ lattice_it.add_before_then_move (segpts);
+ min_index = 0;
+ region_index = 1;
+ best_cost = FLT_MAX;
+ best_end = nullptr;
+ min_it = *blob_it;
+ min_box = box_next (&min_it); //first box
+ do {
+ left_best_x = -1;
+ right_best_x = -1;
+ segpts = new FPSEGPT_LIST; //list of points
+ segpt_it.set_to_list (segpts);
+ min_x += pitch - pitch_error;//next limits
+ max_x += pitch + pitch_error;
+ while (min_box.right () < min_x && min_index < blob_count) {
+ min_index++;
+ min_box = box_next (&min_it);
+ }
+ max_it = min_it;
+ max_index = min_index;
+ max_box = min_box;
+ next_box = box_next (&max_it);
+ for (x = min_x; x <= max_x && x <= right_max; x++) {
+ while (x < right_edge && max_index < blob_count
+ && x > max_box.right ()) {
+ max_index++;
+ max_box = next_box;
+ next_box = box_next (&max_it);
+ }
+ if (x <= max_box.left () + pitch_error
+ || x >= max_box.right () - pitch_error || x >= right_edge
+ || (max_index < blob_count - 1 && x >= next_box.left ())
+ || (x - max_box.left () > pitch * pitsync_joined_edge
+ && max_box.right () - x > pitch * pitsync_joined_edge)) {
+ // || projection->local_min(x))
+ if (x - max_box.left () > 0
+ && x - max_box.left () <= pitch_error)
+ //dist to real break
+ offset = x - max_box.left ();
+ else if (max_box.right () - x > 0
+ && max_box.right () - x <= pitch_error
+ && (max_index >= blob_count - 1
+ || x < next_box.left ()))
+ offset = max_box.right () - x;
+ else
+ offset = 0;
+ // offset=pitsync_offset_freecut_fraction*projection->pile_count(x);
+ segpt = new FPSEGPT (x, false, offset, region_index,
+ pitch, pitch_error, lattice_it.data ());
+ }
+ else {
+ offset = projection->pile_count (x);
+ segpt = new FPSEGPT (x, true, offset, region_index,
+ pitch, pitch_error, lattice_it.data ());
+ }
+ if (segpt->previous () != nullptr) {
+ segpt_it.add_after_then_move (segpt);
+ if (x >= right_edge - pitch_error) {
+ segpt->terminal = true;//no more wanted
+ if (segpt->cost_function () < best_cost) {
+ best_cost = segpt->cost_function ();
+ //find least
+ best_end = segpt;
+ best_region_index = region_index;
+ left_best_x = x;
+ right_best_x = x;
+ }
+ else if (segpt->cost_function () == best_cost
+ && right_best_x == x - 1)
+ right_best_x = x;
+ }
+ }
+ else {
+ delete segpt; //no good
+ }
+ }
+ if (segpts->empty ()) {
+ if (best_end != nullptr)
+ break; //already found one
+ make_illegal_segment (lattice_it.data (), min_box, min_it,
+ region_index, pitch, pitch_error, segpts);
+ }
+ else {
+ if (right_best_x > left_best_x + 1) {
+ left_best_x = (left_best_x + right_best_x + 1) / 2;
+ for (segpt_it.mark_cycle_pt (); !segpt_it.cycled_list ()
+ && segpt_it.data ()->position () != left_best_x;
+ segpt_it.forward ());
+ if (segpt_it.data ()->position () == left_best_x)
+ //middle of region
+ best_end = segpt_it.data ();
+ }
+ }
+ //new segment
+ lattice_it.add_before_then_move (segpts);
+ region_index++;
+ }
+ while (min_x < right_edge);
+ ASSERT_HOST (best_end != nullptr);//must always find some
+
+ for (lattice_it.mark_cycle_pt (); !lattice_it.cycled_list ();
+ lattice_it.forward ()) {
+ segpts = lattice_it.data ();
+ segpt_it.set_to_list (segpts);
+ // if (blob_count==8 && pitch==27)
+ // {
+ // for (segpt_it.mark_cycle_pt();!segpt_it.cycled_list();segpt_it.forward())
+ // {
+ // segpt=segpt_it.data();
+ // tprintf("At %d, (%x) cost=%g, m=%g, sq=%g, pred=%x\n",
+ // segpt->position(),segpt,segpt->cost_function(),
+ // segpt->sum(),segpt->squares(),segpt->previous());
+ // }
+ // tprintf("\n");
+ // }
+ for (segpt_it.mark_cycle_pt (); !segpt_it.cycled_list ()
+ && segpt_it.data () != best_end; segpt_it.forward ());
+ if (segpt_it.data () == best_end) {
+ //save good one
+ segpt = segpt_it.extract ();
+ outseg_it.add_before_then_move (segpt);
+ best_end = segpt->previous ();
+ }
+ }
+ ASSERT_HOST (best_end == nullptr);
+ ASSERT_HOST (!outseg_it.empty ());
+ outseg_it.move_to_last ();
+ mean_sum = outseg_it.data ()->sum ();
+ mean_sum = mean_sum * mean_sum / best_region_index;
+ if (outseg_it.data ()->squares () - mean_sum < 0)
+ tprintf ("Impossible sqsum=%g, mean=%g, total=%d\n",
+ outseg_it.data ()->squares (), outseg_it.data ()->sum (),
+ best_region_index);
+ lattice.deep_clear (); //shift the lot
+ return outseg_it.data ()->squares () - mean_sum;
+}
+
+
+/**********************************************************************
+ * make_illegal_segment
+ *
+ * Make a fake set of chop points due to having no legal places.
+ **********************************************************************/
+
+void make_illegal_segment( //find segmentation
+ FPSEGPT_LIST *prev_list, //previous segments
+ TBOX blob_box, //bounding box
+ BLOBNBOX_IT blob_it, //iterator
+ int16_t region_index, //number of segment
+ int16_t pitch, //pitch estimate
+ int16_t pitch_error, //tolerance
+ FPSEGPT_LIST *seg_list //output list
+ ) {
+ int16_t x; //current coord
+ int16_t min_x = 0; //in this region
+ int16_t max_x = 0;
+ int16_t offset; //dist to edge
+ FPSEGPT *segpt; //segment point
+ FPSEGPT *prevpt; //previous point
+ float best_cost; //best path
+ FPSEGPT_IT segpt_it = seg_list;//iterator
+ //previous points
+ FPSEGPT_IT prevpt_it = prev_list;
+
+ best_cost = FLT_MAX;
+ for (prevpt_it.mark_cycle_pt (); !prevpt_it.cycled_list ();
+ prevpt_it.forward ()) {
+ prevpt = prevpt_it.data ();
+ if (prevpt->cost_function () < best_cost) {
+ //find least
+ best_cost = prevpt->cost_function ();
+ min_x = prevpt->position ();
+ max_x = min_x; //limits on coords
+ }
+ else if (prevpt->cost_function () == best_cost) {
+ max_x = prevpt->position ();
+ }
+ }
+ min_x += pitch - pitch_error;
+ max_x += pitch + pitch_error;
+ for (x = min_x; x <= max_x; x++) {
+ while (x > blob_box.right ()) {
+ blob_box = box_next (&blob_it);
+ }
+ offset = x - blob_box.left ();
+ if (blob_box.right () - x < offset)
+ offset = blob_box.right () - x;
+ segpt = new FPSEGPT (x, false, offset,
+ region_index, pitch, pitch_error, prev_list);
+ if (segpt->previous () != nullptr) {
+ ASSERT_HOST (offset >= 0);
+ fprintf (stderr, "made fake at %d\n", x);
+ //make one up
+ segpt_it.add_after_then_move (segpt);
+ segpt->faked = true;
+ segpt->fake_count++;
+ }
+ else
+ delete segpt;
+ }
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/textord/pitsync1.h b/tesseract/src/textord/pitsync1.h
new file mode 100644
index 00000000..310a6d8a
--- /dev/null
+++ b/tesseract/src/textord/pitsync1.h
@@ -0,0 +1,125 @@
+/**********************************************************************
+ * File: pitsync1.h (Formerly pitsync.h)
+ * Description: Code to find the optimum fixed pitch segmentation of some blobs.
+ * Author: Ray Smith
+ * Created: Thu Nov 19 11:48:05 GMT 1992
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef PITSYNC1_H
+#define PITSYNC1_H
+
+#include "elst.h"
+#include "clst.h"
+#include "blobbox.h"
+#include "params.h"
+#include "statistc.h"
+#include "pithsync.h"
+
+namespace tesseract {
+
+class FPSEGPT_LIST;
+
+class FPSEGPT : public ELIST_LINK
+{
+ public:
+ FPSEGPT() = default;
+ FPSEGPT( //constructor
+ int16_t x); //position
+ FPSEGPT( //constructor
+ int16_t x, //position
+ bool faking, //faking this one
+ int16_t offset, //extra cost dist
+ int16_t region_index, //segment number
+ int16_t pitch, //proposed pitch
+ int16_t pitch_error, //allowed tolerance
+ FPSEGPT_LIST *prev_list); //previous segment
+ FPSEGPT(FPCUTPT *cutpt); //build from new type
+
+ int32_t position() { // access func
+ return xpos;
+ }
+ double cost_function() {
+ return cost;
+ }
+ double squares() {
+ return sq_sum;
+ }
+ double sum() {
+ return mean_sum;
+ }
+ FPSEGPT *previous() {
+ return pred;
+ }
+ int16_t cheap_cuts() const { //no of cheap cuts
+ return mid_cuts;
+ }
+
+ bool faked; //faked split point
+ bool terminal; //successful end
+ int16_t fake_count; //total fakes to here
+
+ private:
+ int16_t mid_cuts; //no of cheap cuts
+ int32_t xpos; //location
+ FPSEGPT *pred; //optimal previous
+ double mean_sum; //mean so far
+ double sq_sum; //summed distsances
+ double cost; //cost function
+};
+
+ELISTIZEH (FPSEGPT) CLISTIZEH (FPSEGPT_LIST)
+extern
+INT_VAR_H (pitsync_linear_version, 0, "Use new fast algorithm");
+extern
+double_VAR_H (pitsync_joined_edge, 0.75,
+"Dist inside big blob for chopping");
+extern
+double_VAR_H (pitsync_offset_freecut_fraction, 0.25,
+"Fraction of cut for free cuts");
+extern
+INT_VAR_H (pitsync_fake_depth, 1, "Max advance fake generation");
+double check_pitch_sync( //find segmentation
+ BLOBNBOX_IT *blob_it, //blobs to do
+ int16_t blob_count, //no of blobs
+ int16_t pitch, //pitch estimate
+ int16_t pitch_error, //tolerance
+ STATS *projection, //vertical
+ FPSEGPT_LIST *seg_list //output list
+ );
+void make_illegal_segment( //find segmentation
+ FPSEGPT_LIST *prev_list, //previous segments
+ TBOX blob_box, //bounding box
+ BLOBNBOX_IT blob_it, //iterator
+ int16_t region_index, //number of segment
+ int16_t pitch, //pitch estimate
+ int16_t pitch_error, //tolerance
+ FPSEGPT_LIST *seg_list //output list
+ );
+int16_t vertical_torow_projection( //project whole row
+ TO_ROW *row, //row to do
+ STATS *projection //output
+ );
+void vertical_cblob_projection( //project outlines
+ C_BLOB *blob, //blob to project
+ STATS *stats //output
+ );
+void vertical_coutline_projection( //project outlines
+ C_OUTLINE *outline, //outline to project
+ STATS *stats //output
+ );
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/textord/scanedg.cpp b/tesseract/src/textord/scanedg.cpp
new file mode 100644
index 00000000..fa0608cb
--- /dev/null
+++ b/tesseract/src/textord/scanedg.cpp
@@ -0,0 +1,405 @@
+/**********************************************************************
+ * File: scanedg.cpp (Formerly scanedge.c)
+ * Description: Raster scanning crack based edge extractor.
+ * Author: Ray Smith
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#include "scanedg.h"
+
+#include "crakedge.h"
+#include "edgloop.h"
+#include "pdblock.h"
+
+#include "allheaders.h"
+
+#include <memory> // std::unique_ptr
+
+namespace tesseract {
+
+#define WHITE_PIX 1 /*thresholded colours */
+#define BLACK_PIX 0
+// Flips between WHITE_PIX and BLACK_PIX.
+#define FLIP_COLOUR(pix) (1-(pix))
+
+struct CrackPos {
+ CRACKEDGE** free_cracks; // Freelist for fast allocation.
+ int x; // Position of new edge.
+ int y;
+};
+
+static void free_crackedges(CRACKEDGE* start);
+
+static void join_edges(CRACKEDGE* edge1, CRACKEDGE* edge2,
+ CRACKEDGE** free_cracks,
+ C_OUTLINE_IT* outline_it);
+
+static void line_edges(int16_t x, int16_t y, int16_t xext, uint8_t uppercolour,
+ uint8_t* bwpos,
+ CRACKEDGE** prevline, CRACKEDGE** free_cracks,
+ C_OUTLINE_IT* outline_it);
+
+static void make_margins(PDBLK* block, BLOCK_LINE_IT* line_it,
+ uint8_t* pixels, uint8_t margin,
+ int16_t left, int16_t right, int16_t y);
+
+static CRACKEDGE* h_edge(int sign, CRACKEDGE* join, CrackPos* pos);
+static CRACKEDGE* v_edge(int sign, CRACKEDGE* join, CrackPos* pos);
+
+/**********************************************************************
+ * block_edges
+ *
+ * Extract edges from a PDBLK.
+ **********************************************************************/
+
+void block_edges(Pix *t_pix, // thresholded image
+ PDBLK *block, // block in image
+ C_OUTLINE_IT* outline_it) {
+ ICOORD bleft; // bounding box
+ ICOORD tright;
+ BLOCK_LINE_IT line_it = block; // line iterator
+
+ int width = pixGetWidth(t_pix);
+ int height = pixGetHeight(t_pix);
+ int wpl = pixGetWpl(t_pix);
+ // lines in progress
+ std::unique_ptr<CRACKEDGE*[]> ptrline(new CRACKEDGE*[width + 1]);
+ CRACKEDGE *free_cracks = nullptr;
+
+ block->bounding_box(bleft, tright); // block box
+ ASSERT_HOST(tright.x() <= width);
+ ASSERT_HOST(tright.y() <= height);
+ int block_width = tright.x() - bleft.x();
+ for (int x = block_width; x >= 0; x--)
+ ptrline[x] = nullptr; // no lines in progress
+
+ std::unique_ptr<uint8_t[]> bwline(new uint8_t[width]);
+
+ const uint8_t margin = WHITE_PIX;
+
+ for (int y = tright.y() - 1; y >= bleft.y() - 1; y--) {
+ if (y >= bleft.y() && y < tright.y()) {
+ // Get the binary pixels from the image.
+ l_uint32* line = pixGetData(t_pix) + wpl * (height - 1 - y);
+ for (int x = 0; x < block_width; ++x) {
+ bwline[x] = GET_DATA_BIT(line, x + bleft.x()) ^ 1;
+ }
+ make_margins(block, &line_it, bwline.get(), margin, bleft.x(), tright.x(), y);
+ } else {
+ memset(bwline.get(), margin, block_width * sizeof(bwline[0]));
+ }
+ line_edges(bleft.x(), y, block_width,
+ margin, bwline.get(), ptrline.get(), &free_cracks, outline_it);
+ }
+
+ free_crackedges(free_cracks); // really free them
+}
+
+
+/**********************************************************************
+ * make_margins
+ *
+ * Get an image line and set to margin non-text pixels.
+ **********************************************************************/
+
+static
+void make_margins( //get a line
+ PDBLK *block, //block in image
+ BLOCK_LINE_IT *line_it, //for old style
+ uint8_t *pixels, //pixels to strip
+ uint8_t margin, //white-out pixel
+ int16_t left, //block edges
+ int16_t right,
+ int16_t y //line coord
+ ) {
+ ICOORDELT_IT seg_it;
+ int32_t start; //of segment
+ int16_t xext; //of segment
+ int xindex; //index to pixel
+
+ if (block->poly_block () != nullptr) {
+ std::unique_ptr<PB_LINE_IT> lines(new PB_LINE_IT (block->poly_block ()));
+ const std::unique_ptr</*non-const*/ ICOORDELT_LIST> segments(
+ lines->get_line(y));
+ if (!segments->empty ()) {
+ seg_it.set_to_list(segments.get());
+ seg_it.mark_cycle_pt ();
+ start = seg_it.data ()->x ();
+ xext = seg_it.data ()->y ();
+ for (xindex = left; xindex < right; xindex++) {
+ if (xindex >= start && !seg_it.cycled_list ()) {
+ xindex = start + xext - 1;
+ seg_it.forward ();
+ start = seg_it.data ()->x ();
+ xext = seg_it.data ()->y ();
+ }
+ else
+ pixels[xindex - left] = margin;
+ }
+ }
+ else {
+ for (xindex = left; xindex < right; xindex++)
+ pixels[xindex - left] = margin;
+ }
+ }
+ else {
+ start = line_it->get_line (y, xext);
+ for (xindex = left; xindex < start; xindex++)
+ pixels[xindex - left] = margin;
+ for (xindex = start + xext; xindex < right; xindex++)
+ pixels[xindex - left] = margin;
+ }
+}
+
+/**********************************************************************
+ * line_edges
+ *
+ * Scan a line for edges and update the edges in progress.
+ * When edges close into loops, send them for approximation.
+ **********************************************************************/
+
+static
+void line_edges(int16_t x, // coord of line start
+ int16_t y, // coord of line
+ int16_t xext, // width of line
+ uint8_t uppercolour, // start of prev line
+ uint8_t * bwpos, // thresholded line
+ CRACKEDGE ** prevline, // edges in progress
+ CRACKEDGE **free_cracks,
+ C_OUTLINE_IT* outline_it) {
+ CrackPos pos = {free_cracks, x, y };
+ int xmax; // max x coord
+ int prevcolour; // of previous pixel
+ CRACKEDGE *current; // current h edge
+ CRACKEDGE *newcurrent; // new h edge
+
+ xmax = x + xext; // max allowable coord
+ prevcolour = uppercolour; // forced plain margin
+ current = nullptr; // nothing yet
+
+ // do each pixel
+ for (; pos.x < xmax; pos.x++, prevline++) {
+ const int colour = *bwpos++; // current pixel
+ if (*prevline != nullptr) {
+ // changed above
+ // change colour
+ uppercolour = FLIP_COLOUR(uppercolour);
+ if (colour == prevcolour) {
+ if (colour == uppercolour) {
+ // finish a line
+ join_edges(current, *prevline, free_cracks, outline_it);
+ current = nullptr; // no edge now
+ } else {
+ // new horiz edge
+ current = h_edge(uppercolour - colour, *prevline, &pos);
+ }
+ *prevline = nullptr; // no change this time
+ } else {
+ if (colour == uppercolour)
+ *prevline = v_edge(colour - prevcolour, *prevline, &pos);
+ // 8 vs 4 connection
+ else if (colour == WHITE_PIX) {
+ join_edges(current, *prevline, free_cracks, outline_it);
+ current = h_edge(uppercolour - colour, nullptr, &pos);
+ *prevline = v_edge(colour - prevcolour, current, &pos);
+ } else {
+ newcurrent = h_edge(uppercolour - colour, *prevline, &pos);
+ *prevline = v_edge(colour - prevcolour, current, &pos);
+ current = newcurrent; // right going h edge
+ }
+ prevcolour = colour; // remember new colour
+ }
+ } else {
+ if (colour != prevcolour) {
+ *prevline = current = v_edge(colour - prevcolour, current, &pos);
+ prevcolour = colour;
+ }
+ if (colour != uppercolour)
+ current = h_edge(uppercolour - colour, current, &pos);
+ else
+ current = nullptr; // no edge now
+ }
+ }
+ if (current != nullptr) {
+ // out of block
+ if (*prevline != nullptr) { // got one to join to?
+ join_edges(current, *prevline, free_cracks, outline_it);
+ *prevline = nullptr; // tidy now
+ } else {
+ // fake vertical
+ *prevline = v_edge(FLIP_COLOUR(prevcolour)-prevcolour, current, &pos);
+ }
+ } else if (*prevline != nullptr) {
+ //continue fake
+ *prevline = v_edge(FLIP_COLOUR(prevcolour)-prevcolour, *prevline, &pos);
+ }
+}
+
+
+/**********************************************************************
+ * h_edge
+ *
+ * Create a new horizontal CRACKEDGE and join it to the given edge.
+ **********************************************************************/
+
+static
+CRACKEDGE *h_edge(int sign, // sign of edge
+ CRACKEDGE* join, // edge to join to
+ CrackPos* pos) {
+ CRACKEDGE *newpt; // return value
+
+ if (*pos->free_cracks != nullptr) {
+ newpt = *pos->free_cracks;
+ *pos->free_cracks = newpt->next; // get one fast
+ } else {
+ newpt = new CRACKEDGE;
+ }
+ newpt->pos.set_y(pos->y + 1); // coords of pt
+ newpt->stepy = 0; // edge is horizontal
+
+ if (sign > 0) {
+ newpt->pos.set_x(pos->x + 1); // start location
+ newpt->stepx = -1;
+ newpt->stepdir = 0;
+ } else {
+ newpt->pos.set_x(pos->x); // start location
+ newpt->stepx = 1;
+ newpt->stepdir = 2;
+ }
+
+ if (join == nullptr) {
+ newpt->next = newpt; // ptrs to other ends
+ newpt->prev = newpt;
+ } else {
+ if (newpt->pos.x() + newpt->stepx == join->pos.x()
+ && newpt->pos.y() == join->pos.y()) {
+ newpt->prev = join->prev; // update other ends
+ newpt->prev->next = newpt;
+ newpt->next = join; // join up
+ join->prev = newpt;
+ } else {
+ newpt->next = join->next; // update other ends
+ newpt->next->prev = newpt;
+ newpt->prev = join; // join up
+ join->next = newpt;
+ }
+ }
+ return newpt;
+}
+
+
+/**********************************************************************
+ * v_edge
+ *
+ * Create a new vertical CRACKEDGE and join it to the given edge.
+ **********************************************************************/
+
+static
+CRACKEDGE *v_edge(int sign, // sign of edge
+ CRACKEDGE* join,
+ CrackPos* pos) {
+ CRACKEDGE *newpt; // return value
+
+ if (*pos->free_cracks != nullptr) {
+ newpt = *pos->free_cracks;
+ *pos->free_cracks = newpt->next; // get one fast
+ } else {
+ newpt = new CRACKEDGE;
+ }
+ newpt->pos.set_x(pos->x); // coords of pt
+ newpt->stepx = 0; // edge is vertical
+
+ if (sign > 0) {
+ newpt->pos.set_y(pos->y); // start location
+ newpt->stepy = 1;
+ newpt->stepdir = 3;
+ } else {
+ newpt->pos.set_y(pos->y + 1); // start location
+ newpt->stepy = -1;
+ newpt->stepdir = 1;
+ }
+
+ if (join == nullptr) {
+ newpt->next = newpt; //ptrs to other ends
+ newpt->prev = newpt;
+ } else {
+ if (newpt->pos.x() == join->pos.x()
+ && newpt->pos.y() + newpt->stepy == join->pos.y()) {
+ newpt->prev = join->prev; // update other ends
+ newpt->prev->next = newpt;
+ newpt->next = join; // join up
+ join->prev = newpt;
+ } else {
+ newpt->next = join->next; // update other ends
+ newpt->next->prev = newpt;
+ newpt->prev = join; // join up
+ join->next = newpt;
+ }
+ }
+ return newpt;
+}
+
+
+/**********************************************************************
+ * join_edges
+ *
+ * Join 2 edges together. Send the outline for approximation when a
+ * closed loop is formed.
+ **********************************************************************/
+
+static
+void join_edges(CRACKEDGE *edge1, // edges to join
+ CRACKEDGE *edge2, // no specific order
+ CRACKEDGE **free_cracks,
+ C_OUTLINE_IT* outline_it) {
+ if (edge1->pos.x() + edge1->stepx != edge2->pos.x()
+ || edge1->pos.y() + edge1->stepy != edge2->pos.y()) {
+ CRACKEDGE *tempedge = edge1;
+ edge1 = edge2; // swap around
+ edge2 = tempedge;
+ }
+
+ if (edge1->next == edge2) {
+ // already closed
+ complete_edge(edge1, outline_it);
+ // attach freelist to end
+ edge1->prev->next = *free_cracks;
+ *free_cracks = edge1; // and free list
+ } else {
+ // update opposite ends
+ edge2->prev->next = edge1->next;
+ edge1->next->prev = edge2->prev;
+ edge1->next = edge2; // make joins
+ edge2->prev = edge1;
+ }
+}
+
+
+/**********************************************************************
+ * free_crackedges
+ *
+ * Really free the CRACKEDGEs by giving them back to delete.
+ **********************************************************************/
+
+static void free_crackedges(CRACKEDGE *start) {
+ CRACKEDGE *current; // current edge to free
+ CRACKEDGE *next; // next one to free
+
+ for (current = start; current != nullptr; current = next) {
+ next = current->next;
+ delete current; // delete them all
+ }
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/textord/scanedg.h b/tesseract/src/textord/scanedg.h
new file mode 100644
index 00000000..96bf6478
--- /dev/null
+++ b/tesseract/src/textord/scanedg.h
@@ -0,0 +1,38 @@
+/**********************************************************************
+ * File: scanedg.h (Formerly scanedge.h)
+ * Description: Raster scanning crack based edge extractor.
+ * Author: Ray Smith
+ *
+ * (C) Copyright 1991, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef SCANEDG_H
+#define SCANEDG_H
+
+#include "params.h"
+#include "scrollview.h"
+
+struct Pix;
+
+namespace tesseract {
+
+class C_OUTLINE_IT;
+class PDBLK;
+
+void block_edges(Pix* t_image, // thresholded image
+ PDBLK* block, // block in image
+ C_OUTLINE_IT* outline_it);
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/textord/sortflts.cpp b/tesseract/src/textord/sortflts.cpp
new file mode 100644
index 00000000..01548e9f
--- /dev/null
+++ b/tesseract/src/textord/sortflts.cpp
@@ -0,0 +1,81 @@
+/**********************************************************************
+ * File: sortflts.cpp (Formerly sfloats.c)
+ * Description: Code to maintain a sorted list of floats.
+ * Author: Ray Smith
+ *
+ * (C) Copyright 1993, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#include "sortflts.h"
+
+namespace tesseract {
+
+ELISTIZE (SORTED_FLOAT)
+/**
+ * @name SORTED_FLOATS::add
+ *
+ * Add a new entry to the sorted list of floats.
+ */
+void SORTED_FLOATS::add( //add new entry
+ float value,
+ int32_t key) {
+ auto *new_float = new SORTED_FLOAT (value, key);
+
+ if (list.empty ())
+ it.add_after_stay_put (new_float);
+ else {
+ it.move_to_first ();
+ while (!it.at_last () && it.data ()->entry < value)
+ it.forward ();
+ if (it.data ()->entry < value)
+ it.add_after_stay_put (new_float);
+ else
+ it.add_before_stay_put (new_float);
+ }
+}
+
+
+/**
+ * @name SORTED_FLOATS::remove
+ *
+ * Remove an entry from the sorted list of floats.
+ */
+
+void SORTED_FLOATS::remove( //remove the entry
+ int32_t key) {
+ if (!list.empty ()) {
+ for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) {
+ if (it.data ()->address == key) {
+ delete it.extract ();
+ return;
+ }
+ }
+ }
+}
+
+
+/**
+ * @name SORTED_FLOATS::operator[]
+ *
+ * Return the floating point value of the given index into the list.
+ */
+
+float
+SORTED_FLOATS::operator[] ( //get an entry
+int32_t index //to list
+) {
+ it.move_to_first ();
+ return it.data_relative (index)->entry;
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/textord/sortflts.h b/tesseract/src/textord/sortflts.h
new file mode 100644
index 00000000..710a7a3d
--- /dev/null
+++ b/tesseract/src/textord/sortflts.h
@@ -0,0 +1,76 @@
+/**********************************************************************
+ * File: sortflts.h (Formerly sfloats.h)
+ * Description: Code to maintain a sorted list of floats.
+ * Author: Ray Smith
+ *
+ * (C) Copyright 1993, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef SORTFLTS_H
+#define SORTFLTS_H
+
+#include "elst.h"
+
+namespace tesseract {
+
+class SORTED_FLOAT : public ELIST_LINK
+{
+ friend class SORTED_FLOATS;
+
+ public:
+ SORTED_FLOAT() = default;
+ SORTED_FLOAT( //create one
+ float value, //value of entry
+ int32_t key) { //reference
+ entry = value;
+ address = key;
+ }
+ private:
+ float entry; //value of float
+ int32_t address; //key
+};
+
+ELISTIZEH (SORTED_FLOAT)
+class SORTED_FLOATS
+{
+ public:
+ /** empty constructor */
+ SORTED_FLOATS() {
+ it.set_to_list (&list);
+ }
+ /**
+ * add sample
+ * @param value sample float
+ * @param key retrieval key
+ */
+ void add(float value,
+ int32_t key);
+ /**
+ * delete sample
+ * @param key key to delete
+ */
+ void remove(int32_t key);
+ /**
+ * index to list
+ * @param index item to get
+ */
+ float operator[] (int32_t index);
+
+ private:
+ SORTED_FLOAT_LIST list; //list of floats
+ SORTED_FLOAT_IT it; //iterator built-in
+};
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/textord/strokewidth.cpp b/tesseract/src/textord/strokewidth.cpp
new file mode 100644
index 00000000..6543c6ac
--- /dev/null
+++ b/tesseract/src/textord/strokewidth.cpp
@@ -0,0 +1,2030 @@
+///////////////////////////////////////////////////////////////////////
+// File: strokewidth.cpp
+// Description: Subclass of BBGrid to find uniformity of strokewidth.
+// Author: Ray Smith
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "strokewidth.h"
+
+#include <algorithm>
+#include <cmath>
+
+#include "blobbox.h"
+#include "colpartition.h"
+#include "colpartitiongrid.h"
+#include "imagefind.h"
+#include "linlsq.h"
+#include "statistc.h"
+#include "tabfind.h"
+#include "textlineprojection.h"
+#include "tordmain.h" // For SetBlobStrokeWidth.
+
+namespace tesseract {
+
+#ifndef GRAPHICS_DISABLED
+static INT_VAR(textord_tabfind_show_strokewidths, 0, "Show stroke widths (ScrollView)");
+#else
+static INT_VAR(textord_tabfind_show_strokewidths, 0, "Show stroke widths");
+#endif
+static BOOL_VAR(textord_tabfind_only_strokewidths, false, "Only run stroke widths");
+
+/** Allowed proportional change in stroke width to be the same font. */
+const double kStrokeWidthFractionTolerance = 0.125;
+/**
+ * Allowed constant change in stroke width to be the same font.
+ * Really 1.5 pixels.
+ */
+const double kStrokeWidthTolerance = 1.5;
+// Same but for CJK we are a bit more generous.
+const double kStrokeWidthFractionCJK = 0.25;
+const double kStrokeWidthCJK = 2.0;
+// Radius in grid cells of search for broken CJK. Doesn't need to be very
+// large as the grid size should be about the size of a character anyway.
+const int kCJKRadius = 2;
+// Max distance fraction of size to join close but broken CJK characters.
+const double kCJKBrokenDistanceFraction = 0.25;
+// Max number of components in a broken CJK character.
+const int kCJKMaxComponents = 8;
+// Max aspect ratio of CJK broken characters when put back together.
+const double kCJKAspectRatio = 1.25;
+// Max increase in aspect ratio of CJK broken characters when merged.
+const double kCJKAspectRatioIncrease = 1.0625;
+// Max multiple of the grid size that will be used in computing median CJKsize.
+const int kMaxCJKSizeRatio = 5;
+// Min fraction of blobs broken CJK to iterate and run it again.
+const double kBrokenCJKIterationFraction = 0.125;
+// Multiple of gridsize as x-padding for a search box for diacritic base
+// characters.
+const double kDiacriticXPadRatio = 7.0;
+// Multiple of gridsize as y-padding for a search box for diacritic base
+// characters.
+const double kDiacriticYPadRatio = 1.75;
+// Min multiple of diacritic height that a neighbour must be to be a
+// convincing base character.
+const double kMinDiacriticSizeRatio = 1.0625;
+// Max multiple of a textline's median height as a threshold for the sum of
+// a diacritic's farthest x and y distances (gap + size).
+const double kMaxDiacriticDistanceRatio = 1.25;
+// Max x-gap between a diacritic and its base char as a fraction of the height
+// of the base char (allowing other blobs to fill the gap.)
+const double kMaxDiacriticGapToBaseCharHeight = 1.0;
+// Ratio between longest side of a line and longest side of a character.
+// (neighbor_min > blob_min * kLineTrapShortest &&
+// neighbor_max < blob_max / kLineTrapLongest)
+// => neighbor is a grapheme and blob is a line.
+const int kLineTrapLongest = 4;
+// Ratio between shortest side of a line and shortest side of a character.
+const int kLineTrapShortest = 2;
+// Max aspect ratio of the total box before CountNeighbourGaps
+// decides immediately based on the aspect ratio.
+const int kMostlyOneDirRatio = 3;
+// Aspect ratio for a blob to be considered as line residue.
+const double kLineResidueAspectRatio = 8.0;
+// Padding ratio for line residue search box.
+const int kLineResiduePadRatio = 3;
+// Min multiple of neighbour size for a line residue to be genuine.
+const double kLineResidueSizeRatio = 1.75;
+// Aspect ratio filter for OSD.
+const float kSizeRatioToReject = 2.0;
+// Expansion factor for search box for good neighbours.
+const double kNeighbourSearchFactor = 2.5;
+// Factor of increase of overlap when adding diacritics to make an image noisy.
+const double kNoiseOverlapGrowthFactor = 4.0;
+// Fraction of the image size to add overlap when adding diacritics for an
+// image to qualify as noisy.
+const double kNoiseOverlapAreaFactor = 1.0 / 512;
+
+StrokeWidth::StrokeWidth(int gridsize,
+ const ICOORD& bleft, const ICOORD& tright)
+ : BlobGrid(gridsize, bleft, tright), nontext_map_(nullptr), projection_(nullptr),
+ denorm_(nullptr), grid_box_(bleft, tright), rerotation_(1.0f, 0.0f) {
+ leaders_win_ = nullptr;
+ widths_win_ = nullptr;
+ initial_widths_win_ = nullptr;
+ chains_win_ = nullptr;
+ diacritics_win_ = nullptr;
+ textlines_win_ = nullptr;
+ smoothed_win_ = nullptr;
+}
+
+StrokeWidth::~StrokeWidth() {
+ if (widths_win_ != nullptr) {
+ #ifndef GRAPHICS_DISABLED
+ delete widths_win_->AwaitEvent(SVET_DESTROY);
+ #endif // !GRAPHICS_DISABLED
+ if (textord_tabfind_only_strokewidths)
+ exit(0);
+ delete widths_win_;
+ }
+ delete leaders_win_;
+ delete initial_widths_win_;
+ delete chains_win_;
+ delete textlines_win_;
+ delete smoothed_win_;
+ delete diacritics_win_;
+}
+
+// Sets the neighbours member of the medium-sized blobs in the block.
+// Searches on 4 sides of each blob for similar-sized, similar-strokewidth
+// blobs and sets pointers to the good neighbours.
+void StrokeWidth::SetNeighboursOnMediumBlobs(TO_BLOCK* block) {
+ // Run a preliminary strokewidth neighbour detection on the medium blobs.
+ InsertBlobList(&block->blobs);
+ BLOBNBOX_IT blob_it(&block->blobs);
+ for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+ SetNeighbours(false, false, blob_it.data());
+ }
+ Clear();
+}
+
+// Sets the neighbour/textline writing direction members of the medium
+// and large blobs with optional repair of broken CJK characters first.
+// Repair of broken CJK is needed here because broken CJK characters
+// can fool the textline direction detection algorithm.
+void StrokeWidth::FindTextlineDirectionAndFixBrokenCJK(PageSegMode pageseg_mode,
+ bool cjk_merge,
+ TO_BLOCK* input_block) {
+ // Setup the grid with the remaining (non-noise) blobs.
+ InsertBlobs(input_block);
+ // Repair broken CJK characters if needed.
+ while (cjk_merge && FixBrokenCJK(input_block));
+ // Grade blobs by inspection of neighbours.
+ FindTextlineFlowDirection(pageseg_mode, false);
+ // Clear the grid ready for rotation or leader finding.
+ Clear();
+}
+
+// Helper to collect and count horizontal and vertical blobs from a list.
+static void CollectHorizVertBlobs(BLOBNBOX_LIST* input_blobs,
+ int* num_vertical_blobs,
+ int* num_horizontal_blobs,
+ BLOBNBOX_CLIST* vertical_blobs,
+ BLOBNBOX_CLIST* horizontal_blobs,
+ BLOBNBOX_CLIST* nondescript_blobs) {
+ BLOBNBOX_C_IT v_it(vertical_blobs);
+ BLOBNBOX_C_IT h_it(horizontal_blobs);
+ BLOBNBOX_C_IT n_it(nondescript_blobs);
+ BLOBNBOX_IT blob_it(input_blobs);
+ for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+ BLOBNBOX* blob = blob_it.data();
+ const TBOX& box = blob->bounding_box();
+ float y_x = static_cast<float>(box.height()) / box.width();
+ float x_y = 1.0f / y_x;
+ // Select a >= 1.0 ratio
+ float ratio = x_y > y_x ? x_y : y_x;
+ // If the aspect ratio is small and we want them for osd, save the blob.
+ bool ok_blob = ratio <= kSizeRatioToReject;
+ if (blob->UniquelyVertical()) {
+ ++*num_vertical_blobs;
+ if (ok_blob) v_it.add_after_then_move(blob);
+ } else if (blob->UniquelyHorizontal()) {
+ ++*num_horizontal_blobs;
+ if (ok_blob) h_it.add_after_then_move(blob);
+ } else if (ok_blob) {
+ n_it.add_after_then_move(blob);
+ }
+ }
+}
+
+
+// Types all the blobs as vertical or horizontal text or unknown and
+// returns true if the majority are vertical.
+// If the blobs are rotated, it is necessary to call CorrectForRotation
+// after rotating everything, otherwise the work done here will be enough.
+// If osd_blobs is not null, a list of blobs from the dominant textline
+// direction are returned for use in orientation and script detection.
+bool StrokeWidth::TestVerticalTextDirection(double find_vertical_text_ratio,
+ TO_BLOCK* block,
+ BLOBNBOX_CLIST* osd_blobs) {
+ int vertical_boxes = 0;
+ int horizontal_boxes = 0;
+ // Count vertical normal and large blobs.
+ BLOBNBOX_CLIST vertical_blobs;
+ BLOBNBOX_CLIST horizontal_blobs;
+ BLOBNBOX_CLIST nondescript_blobs;
+ CollectHorizVertBlobs(&block->blobs, &vertical_boxes, &horizontal_boxes,
+ &vertical_blobs, &horizontal_blobs, &nondescript_blobs);
+ CollectHorizVertBlobs(&block->large_blobs, &vertical_boxes, &horizontal_boxes,
+ &vertical_blobs, &horizontal_blobs, &nondescript_blobs);
+ if (textord_debug_tabfind)
+ tprintf("TextDir hbox=%d vs vbox=%d, %dH, %dV, %dN osd blobs\n",
+ horizontal_boxes, vertical_boxes,
+ horizontal_blobs.length(), vertical_blobs.length(),
+ nondescript_blobs.length());
+ if (osd_blobs != nullptr && vertical_boxes == 0 && horizontal_boxes == 0) {
+ // Only nondescript blobs available, so return those.
+ BLOBNBOX_C_IT osd_it(osd_blobs);
+ osd_it.add_list_after(&nondescript_blobs);
+ return false;
+ }
+ int min_vert_boxes = static_cast<int>((vertical_boxes + horizontal_boxes) *
+ find_vertical_text_ratio);
+ if (vertical_boxes >= min_vert_boxes) {
+ if (osd_blobs != nullptr) {
+ BLOBNBOX_C_IT osd_it(osd_blobs);
+ osd_it.add_list_after(&vertical_blobs);
+ }
+ return true;
+ } else {
+ if (osd_blobs != nullptr) {
+ BLOBNBOX_C_IT osd_it(osd_blobs);
+ osd_it.add_list_after(&horizontal_blobs);
+ }
+ return false;
+ }
+}
+
+// Corrects the data structures for the given rotation.
+void StrokeWidth::CorrectForRotation(const FCOORD& rotation,
+ ColPartitionGrid* part_grid) {
+ Init(part_grid->gridsize(), part_grid->bleft(), part_grid->tright());
+ grid_box_ = TBOX(bleft(), tright());
+ rerotation_.set_x(rotation.x());
+ rerotation_.set_y(-rotation.y());
+}
+
+// Finds leader partitions and inserts them into the given part_grid.
+void StrokeWidth::FindLeaderPartitions(TO_BLOCK* block,
+ ColPartitionGrid* part_grid) {
+ Clear();
+ // Find and isolate leaders in the noise list.
+ ColPartition_LIST leader_parts;
+ FindLeadersAndMarkNoise(block, &leader_parts);
+ // Setup the strokewidth grid with the block's remaining (non-noise) blobs.
+ InsertBlobList(&block->blobs);
+ // Mark blobs that have leader neighbours.
+ for (ColPartition_IT it(&leader_parts); !it.empty(); it.forward()) {
+ ColPartition* part = it.extract();
+ part->ClaimBoxes();
+ MarkLeaderNeighbours(part, LR_LEFT);
+ MarkLeaderNeighbours(part, LR_RIGHT);
+ part_grid->InsertBBox(true, true, part);
+ }
+}
+
+// Finds and marks noise those blobs that look like bits of vertical lines
+// that would otherwise screw up layout analysis.
+void StrokeWidth::RemoveLineResidue(ColPartition_LIST* big_part_list) {
+ BlobGridSearch gsearch(this);
+ BLOBNBOX* bbox;
+ // For every vertical line-like bbox in the grid, search its neighbours
+ // to find the tallest, and if the original box is taller by sufficient
+ // margin, then call it line residue and delete it.
+ gsearch.StartFullSearch();
+ while ((bbox = gsearch.NextFullSearch()) != nullptr) {
+ TBOX box = bbox->bounding_box();
+ if (box.height() < box.width() * kLineResidueAspectRatio)
+ continue;
+ // Set up a rectangle search around the blob to find the size of its
+ // neighbours.
+ int padding = box.height() * kLineResiduePadRatio;
+ TBOX search_box = box;
+ search_box.pad(padding, padding);
+ bool debug = AlignedBlob::WithinTestRegion(2, box.left(),
+ box.bottom());
+ // Find the largest object in the search box not equal to bbox.
+ BlobGridSearch rsearch(this);
+ int max_height = 0;
+ BLOBNBOX* n;
+ rsearch.StartRectSearch(search_box);
+ while ((n = rsearch.NextRectSearch()) != nullptr) {
+ if (n == bbox) continue;
+ TBOX nbox = n->bounding_box();
+ if (nbox.height() > max_height) {
+ max_height = nbox.height();
+ }
+ }
+ if (debug) {
+ tprintf("Max neighbour size=%d for candidate line box at:", max_height);
+ box.print();
+ }
+ if (max_height * kLineResidueSizeRatio < box.height()) {
+ #ifndef GRAPHICS_DISABLED
+ if (leaders_win_ != nullptr) {
+ // We are debugging, so display deleted in pink blobs in the same
+ // window that we use to display leader detection.
+ leaders_win_->Pen(ScrollView::PINK);
+ leaders_win_->Rectangle(box.left(), box.bottom(),
+ box.right(), box.top());
+ }
+ #endif // !GRAPHICS_DISABLED
+ ColPartition::MakeBigPartition(bbox, big_part_list);
+ }
+ }
+}
+
+// Types all the blobs as vertical text or horizontal text or unknown and
+// puts them into initial ColPartitions in the supplied part_grid.
+// rerotation determines how to get back to the image coordinates from the
+// blob coordinates (since they may have been rotated for vertical text).
+// block is the single block for the whole page or rectangle to be OCRed.
+// nontext_pix (full-size), is a binary mask used to prevent merges across
+// photo/text boundaries. It is not kept beyond this function.
+// denorm provides a mapping back to the image from the current blob
+// coordinate space.
+// projection provides a measure of textline density over the image and
+// provides functions to assist with diacritic detection. It should be a
+// pointer to a new TextlineProjection, and will be setup here.
+// part_grid is the output grid of textline partitions.
+// Large blobs that cause overlap are put in separate partitions and added
+// to the big_parts list.
+void StrokeWidth::GradeBlobsIntoPartitions(
+ PageSegMode pageseg_mode, const FCOORD& rerotation, TO_BLOCK* block,
+ Pix* nontext_pix, const DENORM* denorm, bool cjk_script,
+ TextlineProjection* projection, BLOBNBOX_LIST* diacritic_blobs,
+ ColPartitionGrid* part_grid, ColPartition_LIST* big_parts) {
+ nontext_map_ = nontext_pix;
+ projection_ = projection;
+ denorm_ = denorm;
+ // Clear and re Insert to take advantage of the tab stops in the blobs.
+ Clear();
+ // Setup the strokewidth grid with the remaining non-noise, non-leader blobs.
+ InsertBlobs(block);
+
+ // Run FixBrokenCJK() again if the page is CJK.
+ if (cjk_script) {
+ FixBrokenCJK(block);
+ }
+ FindTextlineFlowDirection(pageseg_mode, false);
+ projection_->ConstructProjection(block, rerotation, nontext_map_);
+#ifndef GRAPHICS_DISABLED
+ if (textord_tabfind_show_strokewidths) {
+ ScrollView* line_blobs_win = MakeWindow(0, 0, "Initial textline Blobs");
+ projection_->PlotGradedBlobs(&block->blobs, line_blobs_win);
+ projection_->PlotGradedBlobs(&block->small_blobs, line_blobs_win);
+ }
+#endif
+ projection_->MoveNonTextlineBlobs(&block->blobs, &block->noise_blobs);
+ projection_->MoveNonTextlineBlobs(&block->small_blobs, &block->noise_blobs);
+ // Clear and re Insert to take advantage of the removed diacritics.
+ Clear();
+ InsertBlobs(block);
+ FCOORD skew;
+ FindTextlineFlowDirection(pageseg_mode, true);
+ PartitionFindResult r =
+ FindInitialPartitions(pageseg_mode, rerotation, true, block,
+ diacritic_blobs, part_grid, big_parts, &skew);
+ if (r == PFR_NOISE) {
+ tprintf("Detected %d diacritics\n", diacritic_blobs->length());
+ // Noise was found, and removed.
+ Clear();
+ InsertBlobs(block);
+ FindTextlineFlowDirection(pageseg_mode, true);
+ r = FindInitialPartitions(pageseg_mode, rerotation, false, block,
+ diacritic_blobs, part_grid, big_parts, &skew);
+ }
+ nontext_map_ = nullptr;
+ projection_ = nullptr;
+ denorm_ = nullptr;
+}
+
+static void PrintBoxWidths(BLOBNBOX* neighbour) {
+ const TBOX& nbox = neighbour->bounding_box();
+ tprintf("Box (%d,%d)->(%d,%d): h-width=%.1f, v-width=%.1f p-width=%1.f\n",
+ nbox.left(), nbox.bottom(), nbox.right(), nbox.top(),
+ neighbour->horz_stroke_width(), neighbour->vert_stroke_width(),
+ 2.0 * neighbour->cblob()->area()/neighbour->cblob()->perimeter());
+}
+
+/** Handles a click event in a display window. */
+void StrokeWidth::HandleClick(int x, int y) {
+ BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>::HandleClick(x, y);
+ // Run a radial search for blobs that overlap.
+ BlobGridSearch radsearch(this);
+ radsearch.StartRadSearch(x, y, 1);
+ BLOBNBOX* neighbour;
+ FCOORD click(static_cast<float>(x), static_cast<float>(y));
+ while ((neighbour = radsearch.NextRadSearch()) != nullptr) {
+ TBOX nbox = neighbour->bounding_box();
+ if (nbox.contains(click) && neighbour->cblob() != nullptr) {
+ PrintBoxWidths(neighbour);
+ if (neighbour->neighbour(BND_LEFT) != nullptr)
+ PrintBoxWidths(neighbour->neighbour(BND_LEFT));
+ if (neighbour->neighbour(BND_RIGHT) != nullptr)
+ PrintBoxWidths(neighbour->neighbour(BND_RIGHT));
+ if (neighbour->neighbour(BND_ABOVE) != nullptr)
+ PrintBoxWidths(neighbour->neighbour(BND_ABOVE));
+ if (neighbour->neighbour(BND_BELOW) != nullptr)
+ PrintBoxWidths(neighbour->neighbour(BND_BELOW));
+ int gaps[BND_COUNT];
+ neighbour->NeighbourGaps(gaps);
+ tprintf("Left gap=%d, right=%d, above=%d, below=%d, horz=%d, vert=%d\n"
+ "Good= %d %d %d %d\n",
+ gaps[BND_LEFT], gaps[BND_RIGHT],
+ gaps[BND_ABOVE], gaps[BND_BELOW],
+ neighbour->horz_possible(),
+ neighbour->vert_possible(),
+ neighbour->good_stroke_neighbour(BND_LEFT),
+ neighbour->good_stroke_neighbour(BND_RIGHT),
+ neighbour->good_stroke_neighbour(BND_ABOVE),
+ neighbour->good_stroke_neighbour(BND_BELOW));
+ break;
+ }
+ }
+}
+
+// Detects and marks leader dots/dashes.
+// Leaders are horizontal chains of small or noise blobs that look
+// monospace according to ColPartition::MarkAsLeaderIfMonospaced().
+// Detected leaders become the only occupants of the block->small_blobs list.
+// Non-leader small blobs get moved to the blobs list.
+// Non-leader noise blobs remain singletons in the noise list.
+// All small and noise blobs in high density regions are marked BTFT_NONTEXT.
+// block is the single block for the whole page or rectangle to be OCRed.
+// leader_parts is the output.
+void StrokeWidth::FindLeadersAndMarkNoise(TO_BLOCK* block,
+ ColPartition_LIST* leader_parts) {
+ InsertBlobList(&block->small_blobs);
+ InsertBlobList(&block->noise_blobs);
+ BlobGridSearch gsearch(this);
+ BLOBNBOX* bbox;
+ // For every bbox in the grid, set its neighbours.
+ gsearch.StartFullSearch();
+ while ((bbox = gsearch.NextFullSearch()) != nullptr) {
+ SetNeighbours(true, false, bbox);
+ }
+ ColPartition_IT part_it(leader_parts);
+ gsearch.StartFullSearch();
+ while ((bbox = gsearch.NextFullSearch()) != nullptr) {
+ if (bbox->flow() == BTFT_NONE) {
+ if (bbox->neighbour(BND_RIGHT) == nullptr &&
+ bbox->neighbour(BND_LEFT) == nullptr)
+ continue;
+ // Put all the linked blobs into a ColPartition.
+ ColPartition* part = new ColPartition(BRT_UNKNOWN, ICOORD(0, 1));
+ BLOBNBOX* blob;
+ for (blob = bbox; blob != nullptr && blob->flow() == BTFT_NONE;
+ blob = blob->neighbour(BND_RIGHT))
+ part->AddBox(blob);
+ for (blob = bbox->neighbour(BND_LEFT); blob != nullptr &&
+ blob->flow() == BTFT_NONE;
+ blob = blob->neighbour(BND_LEFT))
+ part->AddBox(blob);
+ if (part->MarkAsLeaderIfMonospaced())
+ part_it.add_after_then_move(part);
+ else
+ delete part;
+ }
+ }
+#ifndef GRAPHICS_DISABLED
+ if (textord_tabfind_show_strokewidths) {
+ leaders_win_ = DisplayGoodBlobs("LeaderNeighbours", 0, 0);
+ }
+#endif
+ // Move any non-leaders from the small to the blobs list, as they are
+ // most likely dashes or broken characters.
+ BLOBNBOX_IT blob_it(&block->blobs);
+ BLOBNBOX_IT small_it(&block->small_blobs);
+ for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) {
+ BLOBNBOX* blob = small_it.data();
+ if (blob->flow() != BTFT_LEADER) {
+ if (blob->flow() == BTFT_NEIGHBOURS)
+ blob->set_flow(BTFT_NONE);
+ blob->ClearNeighbours();
+ blob_it.add_to_end(small_it.extract());
+ }
+ }
+ // Move leaders from the noise list to the small list, leaving the small
+ // list exclusively leaders, so they don't get processed further,
+ // and the remaining small blobs all in the noise list.
+ BLOBNBOX_IT noise_it(&block->noise_blobs);
+ for (noise_it.mark_cycle_pt(); !noise_it.cycled_list(); noise_it.forward()) {
+ BLOBNBOX* blob = noise_it.data();
+ if (blob->flow() == BTFT_LEADER || blob->joined_to_prev()) {
+ small_it.add_to_end(noise_it.extract());
+ } else if (blob->flow() == BTFT_NEIGHBOURS) {
+ blob->set_flow(BTFT_NONE);
+ blob->ClearNeighbours();
+ }
+ }
+ // Clear the grid as we don't want the small stuff hanging around in it.
+ Clear();
+}
+
+/** Inserts the block blobs (normal and large) into this grid.
+ * Blobs remain owned by the block. */
+void StrokeWidth::InsertBlobs(TO_BLOCK* block) {
+ InsertBlobList(&block->blobs);
+ InsertBlobList(&block->large_blobs);
+}
+
+// Checks the left or right side of the given leader partition and sets the
+// (opposite) leader_on_right or leader_on_left flags for blobs
+// that are next to the given side of the given leader partition.
+void StrokeWidth::MarkLeaderNeighbours(const ColPartition* part,
+ LeftOrRight side) {
+ const TBOX& part_box = part->bounding_box();
+ BlobGridSearch blobsearch(this);
+ // Search to the side of the leader for the nearest neighbour.
+ BLOBNBOX* best_blob = nullptr;
+ int best_gap = 0;
+ blobsearch.StartSideSearch(side == LR_LEFT ? part_box.left()
+ : part_box.right(),
+ part_box.bottom(), part_box.top());
+ BLOBNBOX* blob;
+ while ((blob = blobsearch.NextSideSearch(side == LR_LEFT)) != nullptr) {
+ const TBOX& blob_box = blob->bounding_box();
+ if (!blob_box.y_overlap(part_box))
+ continue;
+ int x_gap = blob_box.x_gap(part_box);
+ if (x_gap > 2 * gridsize()) {
+ break;
+ } else if (best_blob == nullptr || x_gap < best_gap) {
+ best_blob = blob;
+ best_gap = x_gap;
+ }
+ }
+ if (best_blob != nullptr) {
+ if (side == LR_LEFT)
+ best_blob->set_leader_on_right(true);
+ else
+ best_blob->set_leader_on_left(true);
+ #ifndef GRAPHICS_DISABLED
+ if (leaders_win_ != nullptr) {
+ leaders_win_->Pen(side == LR_LEFT ? ScrollView::RED : ScrollView::GREEN);
+ const TBOX& blob_box = best_blob->bounding_box();
+ leaders_win_->Rectangle(blob_box.left(), blob_box.bottom(),
+ blob_box.right(), blob_box.top());
+ }
+ #endif // !GRAPHICS_DISABLED
+ }
+}
+
+// Helper to compute the UQ of the square-ish CJK characters.
+static int UpperQuartileCJKSize(int gridsize, BLOBNBOX_LIST* blobs) {
+ STATS sizes(0, gridsize * kMaxCJKSizeRatio);
+ BLOBNBOX_IT it(blobs);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ BLOBNBOX* blob = it.data();
+ int width = blob->bounding_box().width();
+ int height = blob->bounding_box().height();
+ if (width <= height * kCJKAspectRatio && height < width * kCJKAspectRatio)
+ sizes.add(height, 1);
+ }
+ return static_cast<int>(sizes.ile(0.75f) + 0.5);
+}
+
+// Fix broken CJK characters, using the fake joined blobs mechanism.
+// Blobs are really merged, ie the master takes all the outlines and the
+// others are deleted.
+// Returns true if sufficient blobs are merged that it may be worth running
+// again, due to a better estimate of character size.
+bool StrokeWidth::FixBrokenCJK(TO_BLOCK* block) {
+ BLOBNBOX_LIST* blobs = &block->blobs;
+ int median_height = UpperQuartileCJKSize(gridsize(), blobs);
+ int max_dist = static_cast<int>(median_height * kCJKBrokenDistanceFraction);
+ int max_height = static_cast<int>(median_height * kCJKAspectRatio);
+ int num_fixed = 0;
+ BLOBNBOX_IT blob_it(blobs);
+
+ for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+ BLOBNBOX* blob = blob_it.data();
+ if (blob->cblob() == nullptr || blob->cblob()->out_list()->empty())
+ continue;
+ TBOX bbox = blob->bounding_box();
+ bool debug = AlignedBlob::WithinTestRegion(3, bbox.left(),
+ bbox.bottom());
+ if (debug) {
+ tprintf("Checking for Broken CJK (max size=%d):", max_height);
+ bbox.print();
+ }
+ // Generate a list of blobs that overlap or are near enough to merge.
+ BLOBNBOX_CLIST overlapped_blobs;
+ AccumulateOverlaps(blob, debug, max_height, max_dist,
+ &bbox, &overlapped_blobs);
+ if (!overlapped_blobs.empty()) {
+ // There are overlapping blobs, so qualify them as being satisfactory
+ // before removing them from the grid and replacing them with the union.
+ // The final box must be roughly square.
+ if (bbox.width() > bbox.height() * kCJKAspectRatio ||
+ bbox.height() > bbox.width() * kCJKAspectRatio) {
+ if (debug) {
+ tprintf("Bad final aspectratio:");
+ bbox.print();
+ }
+ continue;
+ }
+ // There can't be too many blobs to merge.
+ if (overlapped_blobs.length() >= kCJKMaxComponents) {
+ if (debug)
+ tprintf("Too many neighbours: %d\n", overlapped_blobs.length());
+ continue;
+ }
+ // The strokewidths must match amongst the join candidates.
+ BLOBNBOX_C_IT n_it(&overlapped_blobs);
+ for (n_it.mark_cycle_pt(); !n_it.cycled_list(); n_it.forward()) {
+ BLOBNBOX* neighbour = nullptr;
+ neighbour = n_it.data();
+ if (!blob->MatchingStrokeWidth(*neighbour, kStrokeWidthFractionCJK,
+ kStrokeWidthCJK))
+ break;
+ }
+ if (!n_it.cycled_list()) {
+ if (debug) {
+ tprintf("Bad stroke widths:");
+ PrintBoxWidths(blob);
+ }
+ continue; // Not good enough.
+ }
+
+ // Merge all the candidates into blob.
+ // We must remove blob from the grid and reinsert it after merging
+ // to maintain the integrity of the grid.
+ RemoveBBox(blob);
+ // Everything else will be calculated later.
+ for (n_it.mark_cycle_pt(); !n_it.cycled_list(); n_it.forward()) {
+ BLOBNBOX* neighbour = n_it.data();
+ RemoveBBox(neighbour);
+ // Mark empty blob for deletion.
+ neighbour->set_region_type(BRT_NOISE);
+ blob->really_merge(neighbour);
+ if (rerotation_.x() != 1.0f || rerotation_.y() != 0.0f) {
+ blob->rotate_box(rerotation_);
+ }
+ }
+ InsertBBox(true, true, blob);
+ ++num_fixed;
+ if (debug) {
+ tprintf("Done! Final box:");
+ bbox.print();
+ }
+ }
+ }
+ // Count remaining blobs.
+ int num_remaining = 0;
+ for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+ BLOBNBOX* blob = blob_it.data();
+ if (blob->cblob() != nullptr && !blob->cblob()->out_list()->empty()) {
+ ++num_remaining;
+ }
+ }
+ // Permanently delete all the marked blobs after first removing all
+ // references in the neighbour members.
+ block->DeleteUnownedNoise();
+ return num_fixed > num_remaining * kBrokenCJKIterationFraction;
+}
+
+// Helper function to determine whether it is reasonable to merge the
+// bbox and the nbox for repairing broken CJK.
+// The distance apart must not exceed max_dist, the combined size must
+// not exceed max_size, and the aspect ratio must either improve or at
+// least not get worse by much.
+static bool AcceptableCJKMerge(const TBOX& bbox, const TBOX& nbox,
+ bool debug, int max_size, int max_dist,
+ int* x_gap, int* y_gap) {
+ *x_gap = bbox.x_gap(nbox);
+ *y_gap = bbox.y_gap(nbox);
+ TBOX merged(nbox);
+ merged += bbox;
+ if (debug) {
+ tprintf("gaps = %d, %d, merged_box:", *x_gap, *y_gap);
+ merged.print();
+ }
+ if (*x_gap <= max_dist && *y_gap <= max_dist &&
+ merged.width() <= max_size && merged.height() <= max_size) {
+ // Close enough to call overlapping. Check aspect ratios.
+ double old_ratio = static_cast<double>(bbox.width()) / bbox.height();
+ if (old_ratio < 1.0) old_ratio = 1.0 / old_ratio;
+ double new_ratio = static_cast<double>(merged.width()) / merged.height();
+ if (new_ratio < 1.0) new_ratio = 1.0 / new_ratio;
+ if (new_ratio <= old_ratio * kCJKAspectRatioIncrease)
+ return true;
+ }
+ return false;
+}
+
+// Collect blobs that overlap or are within max_dist of the input bbox.
+// Return them in the list of blobs and expand the bbox to be the union
+// of all the boxes. not_this is excluded from the search, as are blobs
+// that cause the merged box to exceed max_size in either dimension.
+void StrokeWidth::AccumulateOverlaps(const BLOBNBOX* not_this, bool debug,
+ int max_size, int max_dist,
+ TBOX* bbox, BLOBNBOX_CLIST* blobs) {
+ // While searching, nearests holds the nearest failed blob in each
+ // direction. When we have a nearest in each of the 4 directions, then
+ // the search is over, and at this point the final bbox must not overlap
+ // any of the nearests.
+ BLOBNBOX* nearests[BND_COUNT];
+ for (auto & nearest : nearests) {
+ nearest = nullptr;
+ }
+ int x = (bbox->left() + bbox->right()) / 2;
+ int y = (bbox->bottom() + bbox->top()) / 2;
+ // Run a radial search for blobs that overlap or are sufficiently close.
+ BlobGridSearch radsearch(this);
+ radsearch.StartRadSearch(x, y, kCJKRadius);
+ BLOBNBOX* neighbour;
+ while ((neighbour = radsearch.NextRadSearch()) != nullptr) {
+ if (neighbour == not_this) continue;
+ TBOX nbox = neighbour->bounding_box();
+ int x_gap, y_gap;
+ if (AcceptableCJKMerge(*bbox, nbox, debug, max_size, max_dist,
+ &x_gap, &y_gap)) {
+ // Close enough to call overlapping. Merge boxes.
+ *bbox += nbox;
+ blobs->add_sorted(SortByBoxLeft<BLOBNBOX>, true, neighbour);
+ if (debug) {
+ tprintf("Added:");
+ nbox.print();
+ }
+ // Since we merged, search the nearests, as some might now me mergeable.
+ for (int dir = 0; dir < BND_COUNT; ++dir) {
+ if (nearests[dir] == nullptr) continue;
+ nbox = nearests[dir]->bounding_box();
+ if (AcceptableCJKMerge(*bbox, nbox, debug, max_size,
+ max_dist, &x_gap, &y_gap)) {
+ // Close enough to call overlapping. Merge boxes.
+ *bbox += nbox;
+ blobs->add_sorted(SortByBoxLeft<BLOBNBOX>, true, nearests[dir]);
+ if (debug) {
+ tprintf("Added:");
+ nbox.print();
+ }
+ nearests[dir] = nullptr;
+ dir = -1; // Restart the search.
+ }
+ }
+ } else if (x_gap < 0 && x_gap <= y_gap) {
+ // A vertical neighbour. Record the nearest.
+ BlobNeighbourDir dir = nbox.top() > bbox->top() ? BND_ABOVE : BND_BELOW;
+ if (nearests[dir] == nullptr ||
+ y_gap < bbox->y_gap(nearests[dir]->bounding_box())) {
+ nearests[dir] = neighbour;
+ }
+ } else if (y_gap < 0 && y_gap <= x_gap) {
+ // A horizontal neighbour. Record the nearest.
+ BlobNeighbourDir dir = nbox.left() > bbox->left() ? BND_RIGHT : BND_LEFT;
+ if (nearests[dir] == nullptr ||
+ x_gap < bbox->x_gap(nearests[dir]->bounding_box())) {
+ nearests[dir] = neighbour;
+ }
+ }
+ // If all nearests are non-null, then we have finished.
+ if (nearests[BND_LEFT] && nearests[BND_RIGHT] &&
+ nearests[BND_ABOVE] && nearests[BND_BELOW])
+ break;
+ }
+ // Final overlap with a nearest is not allowed.
+ for (auto & nearest : nearests) {
+ if (nearest == nullptr) continue;
+ const TBOX& nbox = nearest->bounding_box();
+ if (debug) {
+ tprintf("Testing for overlap with:");
+ nbox.print();
+ }
+ if (bbox->overlap(nbox)) {
+ blobs->shallow_clear();
+ if (debug)
+ tprintf("Final box overlaps nearest\n");
+ return;
+ }
+ }
+}
+
+// For each blob in this grid, Finds the textline direction to be horizontal
+// or vertical according to distance to neighbours and 1st and 2nd order
+// neighbours. Non-text tends to end up without a definite direction.
+// Result is setting of the neighbours and vert_possible/horz_possible
+// flags in the BLOBNBOXes currently in this grid.
+// This function is called more than once if page orientation is uncertain,
+// so display_if_debugging is true on the final call to display the results.
+void StrokeWidth::FindTextlineFlowDirection(PageSegMode pageseg_mode,
+ bool display_if_debugging) {
+ BlobGridSearch gsearch(this);
+ BLOBNBOX* bbox;
+ // For every bbox in the grid, set its neighbours.
+ gsearch.StartFullSearch();
+ while ((bbox = gsearch.NextFullSearch()) != nullptr) {
+ SetNeighbours(false, display_if_debugging, bbox);
+ }
+ // Where vertical or horizontal wins by a big margin, clarify it.
+ gsearch.StartFullSearch();
+ while ((bbox = gsearch.NextFullSearch()) != nullptr) {
+ SimplifyObviousNeighbours(bbox);
+ }
+ // Now try to make the blobs only vertical or horizontal using neighbours.
+ gsearch.StartFullSearch();
+ while ((bbox = gsearch.NextFullSearch()) != nullptr) {
+ if (FindingVerticalOnly(pageseg_mode)) {
+ bbox->set_vert_possible(true);
+ bbox->set_horz_possible(false);
+ } else if (FindingHorizontalOnly(pageseg_mode)) {
+ bbox->set_vert_possible(false);
+ bbox->set_horz_possible(true);
+ } else {
+ SetNeighbourFlows(bbox);
+ }
+ }
+#ifndef GRAPHICS_DISABLED
+ if ((textord_tabfind_show_strokewidths && display_if_debugging) ||
+ textord_tabfind_show_strokewidths > 1) {
+ initial_widths_win_ = DisplayGoodBlobs("InitialStrokewidths", 400, 0);
+ }
+#endif
+ // Improve flow direction with neighbours.
+ gsearch.StartFullSearch();
+ while ((bbox = gsearch.NextFullSearch()) != nullptr) {
+ SmoothNeighbourTypes(pageseg_mode, false, bbox);
+ }
+ // Now allow reset of firm values to fix renegades.
+ gsearch.StartFullSearch();
+ while ((bbox = gsearch.NextFullSearch()) != nullptr) {
+ SmoothNeighbourTypes(pageseg_mode, true, bbox);
+ }
+ // Repeat.
+ gsearch.StartFullSearch();
+ while ((bbox = gsearch.NextFullSearch()) != nullptr) {
+ SmoothNeighbourTypes(pageseg_mode, true, bbox);
+ }
+#ifndef GRAPHICS_DISABLED
+ if ((textord_tabfind_show_strokewidths && display_if_debugging) ||
+ textord_tabfind_show_strokewidths > 1) {
+ widths_win_ = DisplayGoodBlobs("ImprovedStrokewidths", 800, 0);
+ }
+#endif
+}
+
+// Sets the neighbours and good_stroke_neighbours members of the blob by
+// searching close on all 4 sides.
+// When finding leader dots/dashes, there is a slightly different rule for
+// what makes a good neighbour.
+void StrokeWidth::SetNeighbours(bool leaders, bool activate_line_trap,
+ BLOBNBOX* blob) {
+ int line_trap_count = 0;
+ for (int dir = 0; dir < BND_COUNT; ++dir) {
+ auto bnd = static_cast<BlobNeighbourDir>(dir);
+ line_trap_count += FindGoodNeighbour(bnd, leaders, blob);
+ }
+ if (line_trap_count > 0 && activate_line_trap) {
+ // It looks like a line so isolate it by clearing its neighbours.
+ blob->ClearNeighbours();
+ const TBOX& box = blob->bounding_box();
+ blob->set_region_type(box.width() > box.height() ? BRT_HLINE : BRT_VLINE);
+ }
+}
+
+
+// Sets the good_stroke_neighbours member of the blob if it has a
+// GoodNeighbour on the given side.
+// Also sets the neighbour in the blob, whether or not a good one is found.
+// Returns the number of blobs in the nearby search area that would lead us to
+// believe that this blob is a line separator.
+// Leaders get extra special lenient treatment.
+int StrokeWidth::FindGoodNeighbour(BlobNeighbourDir dir, bool leaders,
+ BLOBNBOX* blob) {
+ // Search for neighbours that overlap vertically.
+ TBOX blob_box = blob->bounding_box();
+ bool debug = AlignedBlob::WithinTestRegion(2, blob_box.left(),
+ blob_box.bottom());
+ if (debug) {
+ tprintf("FGN in dir %d for blob:", dir);
+ blob_box.print();
+ }
+ int top = blob_box.top();
+ int bottom = blob_box.bottom();
+ int left = blob_box.left();
+ int right = blob_box.right();
+ int width = right - left;
+ int height = top - bottom;
+
+ // A trap to detect lines tests for the min dimension of neighbours
+ // being larger than a multiple of the min dimension of the line
+ // and the larger dimension being smaller than a fraction of the max
+ // dimension of the line.
+ int line_trap_max = std::max(width, height) / kLineTrapLongest;
+ int line_trap_min = std::min(width, height) * kLineTrapShortest;
+ int line_trap_count = 0;
+
+ int min_good_overlap = (dir == BND_LEFT || dir == BND_RIGHT)
+ ? height / 2 : width / 2;
+ int min_decent_overlap = (dir == BND_LEFT || dir == BND_RIGHT)
+ ? height / 3 : width / 3;
+ if (leaders)
+ min_good_overlap = min_decent_overlap = 1;
+
+ int search_pad = static_cast<int>(
+ sqrt(static_cast<double>(width * height)) * kNeighbourSearchFactor);
+ if (gridsize() > search_pad)
+ search_pad = gridsize();
+ TBOX search_box = blob_box;
+ // Pad the search in the appropriate direction.
+ switch (dir) {
+ case BND_LEFT:
+ search_box.set_left(search_box.left() - search_pad);
+ break;
+ case BND_RIGHT:
+ search_box.set_right(search_box.right() + search_pad);
+ break;
+ case BND_BELOW:
+ search_box.set_bottom(search_box.bottom() - search_pad);
+ break;
+ case BND_ABOVE:
+ search_box.set_top(search_box.top() + search_pad);
+ break;
+ case BND_COUNT:
+ return 0;
+ }
+
+ BlobGridSearch rectsearch(this);
+ rectsearch.StartRectSearch(search_box);
+ BLOBNBOX* best_neighbour = nullptr;
+ double best_goodness = 0.0;
+ bool best_is_good = false;
+ BLOBNBOX* neighbour;
+ while ((neighbour = rectsearch.NextRectSearch()) != nullptr) {
+ TBOX nbox = neighbour->bounding_box();
+ if (neighbour == blob)
+ continue;
+ int mid_x = (nbox.left() + nbox.right()) / 2;
+ if (mid_x < blob->left_rule() || mid_x > blob->right_rule())
+ continue; // In a different column.
+ if (debug) {
+ tprintf("Neighbour at:");
+ nbox.print();
+ }
+
+ // Last-minute line detector. There is a small upper limit to the line
+ // width accepted by the morphological line detector.
+ int n_width = nbox.width();
+ int n_height = nbox.height();
+ if (std::min(n_width, n_height) > line_trap_min &&
+ std::max(n_width, n_height) < line_trap_max)
+ ++line_trap_count;
+ // Heavily joined text, such as Arabic may have very different sizes when
+ // looking at the maxes, but the heights may be almost identical, so check
+ // for a difference in height if looking sideways or width vertically.
+ if (TabFind::VeryDifferentSizes(std::max(n_width, n_height),
+ std::max(width, height)) &&
+ (((dir == BND_LEFT || dir ==BND_RIGHT) &&
+ TabFind::DifferentSizes(n_height, height)) ||
+ ((dir == BND_BELOW || dir ==BND_ABOVE) &&
+ TabFind::DifferentSizes(n_width, width)))) {
+ if (debug) tprintf("Bad size\n");
+ continue; // Could be a different font size or non-text.
+ }
+ // Amount of vertical overlap between the blobs.
+ int overlap;
+ // If the overlap is along the short side of the neighbour, and it
+ // is fully overlapped, then perp_overlap holds the length of the long
+ // side of the neighbour. A measure to include hyphens and dashes as
+ // legitimate neighbours.
+ int perp_overlap;
+ int gap;
+ if (dir == BND_LEFT || dir == BND_RIGHT) {
+ overlap = std::min(static_cast<int>(nbox.top()), top) - std::max(static_cast<int>(nbox.bottom()), bottom);
+ if (overlap == nbox.height() && nbox.width() > nbox.height())
+ perp_overlap = nbox.width();
+ else
+ perp_overlap = overlap;
+ gap = dir == BND_LEFT ? left - nbox.left() : nbox.right() - right;
+ if (gap <= 0) {
+ if (debug) tprintf("On wrong side\n");
+ continue; // On the wrong side.
+ }
+ gap -= n_width;
+ } else {
+ overlap = std::min(static_cast<int>(nbox.right()), right) - std::max(static_cast<int>(nbox.left()), left);
+ if (overlap == nbox.width() && nbox.height() > nbox.width())
+ perp_overlap = nbox.height();
+ else
+ perp_overlap = overlap;
+ gap = dir == BND_BELOW ? bottom - nbox.bottom() : nbox.top() - top;
+ if (gap <= 0) {
+ if (debug) tprintf("On wrong side\n");
+ continue; // On the wrong side.
+ }
+ gap -= n_height;
+ }
+ if (-gap > overlap) {
+ if (debug) tprintf("Overlaps wrong way\n");
+ continue; // Overlaps the wrong way.
+ }
+ if (perp_overlap < min_decent_overlap) {
+ if (debug) tprintf("Doesn't overlap enough\n");
+ continue; // Doesn't overlap enough.
+ }
+ bool bad_sizes = TabFind::DifferentSizes(height, n_height) &&
+ TabFind::DifferentSizes(width, n_width);
+ bool is_good = overlap >= min_good_overlap && !bad_sizes &&
+ blob->MatchingStrokeWidth(*neighbour,
+ kStrokeWidthFractionTolerance,
+ kStrokeWidthTolerance);
+ // Best is a fuzzy combination of gap, overlap and is good.
+ // Basically if you make one thing twice as good without making
+ // anything else twice as bad, then it is better.
+ if (gap < 1) gap = 1;
+ double goodness = (1.0 + is_good) * overlap / gap;
+ if (debug) {
+ tprintf("goodness = %g vs best of %g, good=%d, overlap=%d, gap=%d\n",
+ goodness, best_goodness, is_good, overlap, gap);
+ }
+ if (goodness > best_goodness) {
+ best_neighbour = neighbour;
+ best_goodness = goodness;
+ best_is_good = is_good;
+ }
+ }
+ blob->set_neighbour(dir, best_neighbour, best_is_good);
+ return line_trap_count;
+}
+
+// Helper to get a list of 1st-order neighbours.
+static void ListNeighbours(const BLOBNBOX* blob,
+ BLOBNBOX_CLIST* neighbours) {
+ for (int dir = 0; dir < BND_COUNT; ++dir) {
+ auto bnd = static_cast<BlobNeighbourDir>(dir);
+ BLOBNBOX* neighbour = blob->neighbour(bnd);
+ if (neighbour != nullptr) {
+ neighbours->add_sorted(SortByBoxLeft<BLOBNBOX>, true, neighbour);
+ }
+ }
+}
+
+// Helper to get a list of 1st and 2nd order neighbours.
+static void List2ndNeighbours(const BLOBNBOX* blob,
+ BLOBNBOX_CLIST* neighbours) {
+ ListNeighbours(blob, neighbours);
+ for (int dir = 0; dir < BND_COUNT; ++dir) {
+ auto bnd = static_cast<BlobNeighbourDir>(dir);
+ BLOBNBOX* neighbour = blob->neighbour(bnd);
+ if (neighbour != nullptr) {
+ ListNeighbours(neighbour, neighbours);
+ }
+ }
+}
+
+// Helper to get a list of 1st, 2nd and 3rd order neighbours.
+static void List3rdNeighbours(const BLOBNBOX* blob,
+ BLOBNBOX_CLIST* neighbours) {
+ List2ndNeighbours(blob, neighbours);
+ for (int dir = 0; dir < BND_COUNT; ++dir) {
+ auto bnd = static_cast<BlobNeighbourDir>(dir);
+ BLOBNBOX* neighbour = blob->neighbour(bnd);
+ if (neighbour != nullptr) {
+ List2ndNeighbours(neighbour, neighbours);
+ }
+ }
+}
+
+// Helper to count the evidence for verticalness or horizontalness
+// in a list of neighbours.
+static void CountNeighbourGaps(bool debug, BLOBNBOX_CLIST* neighbours,
+ int* pure_h_count, int* pure_v_count) {
+ if (neighbours->length() <= kMostlyOneDirRatio)
+ return;
+ BLOBNBOX_C_IT it(neighbours);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ BLOBNBOX* blob = it.data();
+ int h_min, h_max, v_min, v_max;
+ blob->MinMaxGapsClipped(&h_min, &h_max, &v_min, &v_max);
+ if (debug)
+ tprintf("Hgaps [%d,%d], vgaps [%d,%d]:", h_min, h_max, v_min, v_max);
+ if (h_max < v_min ||
+ blob->leader_on_left() || blob->leader_on_right()) {
+ // Horizontal gaps are clear winners. Count a pure horizontal.
+ ++*pure_h_count;
+ if (debug) tprintf("Horz at:");
+ } else if (v_max < h_min) {
+ // Vertical gaps are clear winners. Clear a pure vertical.
+ ++*pure_v_count;
+ if (debug) tprintf("Vert at:");
+ } else {
+ if (debug) tprintf("Neither at:");
+ }
+ if (debug)
+ blob->bounding_box().print();
+ }
+}
+
+// Makes the blob to be only horizontal or vertical where evidence
+// is clear based on gaps of 2nd order neighbours, or definite individual
+// blobs.
+void StrokeWidth::SetNeighbourFlows(BLOBNBOX* blob) {
+ if (blob->DefiniteIndividualFlow())
+ return;
+ bool debug = AlignedBlob::WithinTestRegion(2, blob->bounding_box().left(),
+ blob->bounding_box().bottom());
+ if (debug) {
+ tprintf("SetNeighbourFlows (current flow=%d, type=%d) on:",
+ blob->flow(), blob->region_type());
+ blob->bounding_box().print();
+ }
+ BLOBNBOX_CLIST neighbours;
+ List3rdNeighbours(blob, &neighbours);
+ // The number of pure horizontal and vertical neighbours.
+ int pure_h_count = 0;
+ int pure_v_count = 0;
+ CountNeighbourGaps(debug, &neighbours, &pure_h_count, &pure_v_count);
+ if (debug) {
+ HandleClick(blob->bounding_box().left() + 1,
+ blob->bounding_box().bottom() + 1);
+ tprintf("SetFlows: h_count=%d, v_count=%d\n",
+ pure_h_count, pure_v_count);
+ }
+ if (!neighbours.empty()) {
+ blob->set_vert_possible(true);
+ blob->set_horz_possible(true);
+ if (pure_h_count > 2 * pure_v_count) {
+ // Horizontal gaps are clear winners. Clear vertical neighbours.
+ blob->set_vert_possible(false);
+ } else if (pure_v_count > 2 * pure_h_count) {
+ // Vertical gaps are clear winners. Clear horizontal neighbours.
+ blob->set_horz_possible(false);
+ }
+ } else {
+ // Lonely blob. Can't tell its flow direction.
+ blob->set_vert_possible(false);
+ blob->set_horz_possible(false);
+ }
+}
+
+
+// Helper to count the number of horizontal and vertical blobs in a list.
+static void CountNeighbourTypes(BLOBNBOX_CLIST* neighbours,
+ int* pure_h_count, int* pure_v_count) {
+ BLOBNBOX_C_IT it(neighbours);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ BLOBNBOX* blob = it.data();
+ if (blob->UniquelyHorizontal())
+ ++*pure_h_count;
+ if (blob->UniquelyVertical())
+ ++*pure_v_count;
+ }
+}
+
+// Nullify the neighbours in the wrong directions where the direction
+// is clear-cut based on a distance margin. Good for isolating vertical
+// text from neighbouring horizontal text.
+void StrokeWidth::SimplifyObviousNeighbours(BLOBNBOX* blob) {
+ // Case 1: We have text that is likely several characters, blurry and joined
+ // together.
+ if ((blob->bounding_box().width() > 3 * blob->area_stroke_width() &&
+ blob->bounding_box().height() > 3 * blob->area_stroke_width())) {
+ // The blob is complex (not stick-like).
+ if (blob->bounding_box().width() > 4 * blob->bounding_box().height()) {
+ // Horizontal conjoined text.
+ blob->set_neighbour(BND_ABOVE, nullptr, false);
+ blob->set_neighbour(BND_BELOW, nullptr, false);
+ return;
+ }
+ if (blob->bounding_box().height() > 4 * blob->bounding_box().width()) {
+ // Vertical conjoined text.
+ blob->set_neighbour(BND_LEFT, nullptr, false);
+ blob->set_neighbour(BND_RIGHT, nullptr, false);
+ return;
+ }
+ }
+
+ // Case 2: This blob is likely a single character.
+ int margin = gridsize() / 2;
+ int h_min, h_max, v_min, v_max;
+ blob->MinMaxGapsClipped(&h_min, &h_max, &v_min, &v_max);
+ if ((h_max + margin < v_min && h_max < margin / 2) ||
+ blob->leader_on_left() || blob->leader_on_right()) {
+ // Horizontal gaps are clear winners. Clear vertical neighbours.
+ blob->set_neighbour(BND_ABOVE, nullptr, false);
+ blob->set_neighbour(BND_BELOW, nullptr, false);
+ } else if (v_max + margin < h_min && v_max < margin / 2) {
+ // Vertical gaps are clear winners. Clear horizontal neighbours.
+ blob->set_neighbour(BND_LEFT, nullptr, false);
+ blob->set_neighbour(BND_RIGHT, nullptr, false);
+ }
+}
+
+// Smoothes the vertical/horizontal type of the blob based on the
+// 2nd-order neighbours. If reset_all is true, then all blobs are
+// changed. Otherwise, only ambiguous blobs are processed.
+void StrokeWidth::SmoothNeighbourTypes(PageSegMode pageseg_mode, bool reset_all,
+ BLOBNBOX* blob) {
+ if ((blob->vert_possible() && blob->horz_possible()) || reset_all) {
+ // There are both horizontal and vertical so try to fix it.
+ BLOBNBOX_CLIST neighbours;
+ List2ndNeighbours(blob, &neighbours);
+ // The number of pure horizontal and vertical neighbours.
+ int pure_h_count = 0;
+ int pure_v_count = 0;
+ CountNeighbourTypes(&neighbours, &pure_h_count, &pure_v_count);
+ if (AlignedBlob::WithinTestRegion(2, blob->bounding_box().left(),
+ blob->bounding_box().bottom())) {
+ HandleClick(blob->bounding_box().left() + 1,
+ blob->bounding_box().bottom() + 1);
+ tprintf("pure_h=%d, pure_v=%d\n",
+ pure_h_count, pure_v_count);
+ }
+ if (pure_h_count > pure_v_count && !FindingVerticalOnly(pageseg_mode)) {
+ // Horizontal gaps are clear winners. Clear vertical neighbours.
+ blob->set_vert_possible(false);
+ blob->set_horz_possible(true);
+ } else if (pure_v_count > pure_h_count &&
+ !FindingHorizontalOnly(pageseg_mode)) {
+ // Vertical gaps are clear winners. Clear horizontal neighbours.
+ blob->set_horz_possible(false);
+ blob->set_vert_possible(true);
+ }
+ } else if (AlignedBlob::WithinTestRegion(2, blob->bounding_box().left(),
+ blob->bounding_box().bottom())) {
+ HandleClick(blob->bounding_box().left() + 1,
+ blob->bounding_box().bottom() + 1);
+ tprintf("Clean on pass 3!\n");
+ }
+}
+
+// Partition creation. Accumulates vertical and horizontal text chains,
+// puts the remaining blobs in as unknowns, and then merges/splits to
+// minimize overlap and smoothes the types with neighbours and the color
+// image if provided. rerotation is used to rotate the coordinate space
+// back to the nontext_map_ image.
+// If find_problems is true, detects possible noise pollution by the amount
+// of partition overlap that is created by the diacritics. If excessive, the
+// noise is separated out into diacritic blobs, and PFR_NOISE is returned.
+// [TODO(rays): if the partition overlap is caused by heavy skew, deskews
+// the components, saves the skew_angle and returns PFR_SKEW.] If the return
+// is not PFR_OK, the job is incomplete, and FindInitialPartitions must be
+// called again after cleaning up the partly done work.
+PartitionFindResult StrokeWidth::FindInitialPartitions(
+ PageSegMode pageseg_mode, const FCOORD& rerotation, bool find_problems,
+ TO_BLOCK* block, BLOBNBOX_LIST* diacritic_blobs,
+ ColPartitionGrid* part_grid, ColPartition_LIST* big_parts,
+ FCOORD* skew_angle) {
+ if (!FindingHorizontalOnly(pageseg_mode)) FindVerticalTextChains(part_grid);
+ if (!FindingVerticalOnly(pageseg_mode)) FindHorizontalTextChains(part_grid);
+#ifndef GRAPHICS_DISABLED
+ if (textord_tabfind_show_strokewidths) {
+ chains_win_ = MakeWindow(0, 400, "Initial text chains");
+ part_grid->DisplayBoxes(chains_win_);
+ projection_->DisplayProjection();
+ }
+#endif
+ if (find_problems) {
+ // TODO(rays) Do something to find skew, set skew_angle and return if there
+ // is some.
+ }
+ part_grid->SplitOverlappingPartitions(big_parts);
+ EasyMerges(part_grid);
+ RemoveLargeUnusedBlobs(block, part_grid, big_parts);
+ TBOX grid_box(bleft(), tright());
+ while (part_grid->GridSmoothNeighbours(BTFT_CHAIN, nontext_map_, grid_box,
+ rerotation));
+ while (part_grid->GridSmoothNeighbours(BTFT_NEIGHBOURS, nontext_map_,
+ grid_box, rerotation));
+ int pre_overlap = part_grid->ComputeTotalOverlap(nullptr);
+ TestDiacritics(part_grid, block);
+ MergeDiacritics(block, part_grid);
+ if (find_problems && diacritic_blobs != nullptr &&
+ DetectAndRemoveNoise(pre_overlap, grid_box, block, part_grid,
+ diacritic_blobs)) {
+ return PFR_NOISE;
+ }
+#ifndef GRAPHICS_DISABLED
+ if (textord_tabfind_show_strokewidths) {
+ textlines_win_ = MakeWindow(400, 400, "GoodTextline blobs");
+ part_grid->DisplayBoxes(textlines_win_);
+ diacritics_win_ = DisplayDiacritics("Diacritics", 0, 0, block);
+ }
+#endif
+ PartitionRemainingBlobs(pageseg_mode, part_grid);
+ part_grid->SplitOverlappingPartitions(big_parts);
+ EasyMerges(part_grid);
+ while (part_grid->GridSmoothNeighbours(BTFT_CHAIN, nontext_map_, grid_box,
+ rerotation));
+ while (part_grid->GridSmoothNeighbours(BTFT_NEIGHBOURS, nontext_map_,
+ grid_box, rerotation));
+ // Now eliminate strong stuff in a sea of the opposite.
+ while (part_grid->GridSmoothNeighbours(BTFT_STRONG_CHAIN, nontext_map_,
+ grid_box, rerotation));
+#ifndef GRAPHICS_DISABLED
+ if (textord_tabfind_show_strokewidths) {
+ smoothed_win_ = MakeWindow(800, 400, "Smoothed blobs");
+ part_grid->DisplayBoxes(smoothed_win_);
+ }
+#endif
+ return PFR_OK;
+}
+
+// Detects noise by a significant increase in partition overlap from
+// pre_overlap to now, and removes noise from the union of all the overlapping
+// partitions, placing the blobs in diacritic_blobs. Returns true if any noise
+// was found and removed.
+bool StrokeWidth::DetectAndRemoveNoise(int pre_overlap, const TBOX& grid_box,
+ TO_BLOCK* block,
+ ColPartitionGrid* part_grid,
+ BLOBNBOX_LIST* diacritic_blobs) {
+ ColPartitionGrid* noise_grid = nullptr;
+ int post_overlap = part_grid->ComputeTotalOverlap(&noise_grid);
+ if (pre_overlap == 0) pre_overlap = 1;
+ BLOBNBOX_IT diacritic_it(diacritic_blobs);
+ if (noise_grid != nullptr) {
+ if (post_overlap > pre_overlap * kNoiseOverlapGrowthFactor &&
+ post_overlap > grid_box.area() * kNoiseOverlapAreaFactor) {
+ // This is noisy enough to fix.
+#ifndef GRAPHICS_DISABLED
+ if (textord_tabfind_show_strokewidths) {
+ ScrollView* noise_win = MakeWindow(1000, 500, "Noise Areas");
+ noise_grid->DisplayBoxes(noise_win);
+ }
+#endif
+ part_grid->DeleteNonLeaderParts();
+ BLOBNBOX_IT blob_it(&block->noise_blobs);
+ ColPartitionGridSearch rsearch(noise_grid);
+ for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+ BLOBNBOX* blob = blob_it.data();
+ blob->ClearNeighbours();
+ if (!blob->IsDiacritic() || blob->owner() != nullptr)
+ continue; // Not a noise candidate.
+ TBOX search_box(blob->bounding_box());
+ search_box.pad(gridsize(), gridsize());
+ rsearch.StartRectSearch(search_box);
+ ColPartition* part = rsearch.NextRectSearch();
+ if (part != nullptr) {
+ // Consider blob as possible noise.
+ blob->set_owns_cblob(true);
+ blob->compute_bounding_box();
+ diacritic_it.add_after_then_move(blob_it.extract());
+ }
+ }
+ noise_grid->DeleteParts();
+ delete noise_grid;
+ return true;
+ }
+ noise_grid->DeleteParts();
+ delete noise_grid;
+ }
+ return false;
+}
+
+// Helper verifies that blob's neighbour in direction dir is good to add to a
+// vertical text chain by returning the neighbour if it is not null, not owned,
+// and not uniquely horizontal, as well as its neighbour in the opposite
+// direction is blob.
+static BLOBNBOX* MutualUnusedVNeighbour(const BLOBNBOX* blob,
+ BlobNeighbourDir dir) {
+ BLOBNBOX* next_blob = blob->neighbour(dir);
+ if (next_blob == nullptr || next_blob->owner() != nullptr ||
+ next_blob->UniquelyHorizontal())
+ return nullptr;
+ if (next_blob->neighbour(DirOtherWay(dir)) == blob)
+ return next_blob;
+ return nullptr;
+}
+
+// Finds vertical chains of text-like blobs and puts them in ColPartitions.
+void StrokeWidth::FindVerticalTextChains(ColPartitionGrid* part_grid) {
+ // A PageSegMode that forces vertical textlines with the current rotation.
+ PageSegMode pageseg_mode =
+ rerotation_.y() == 0.0f ? PSM_SINGLE_BLOCK_VERT_TEXT : PSM_SINGLE_COLUMN;
+ BlobGridSearch gsearch(this);
+ BLOBNBOX* bbox;
+ gsearch.StartFullSearch();
+ while ((bbox = gsearch.NextFullSearch()) != nullptr) {
+ // Only process boxes that have no horizontal hope and have not yet
+ // been included in a chain.
+ BLOBNBOX* blob;
+ if (bbox->owner() == nullptr && bbox->UniquelyVertical() &&
+ (blob = MutualUnusedVNeighbour(bbox, BND_ABOVE)) != nullptr) {
+ // Put all the linked blobs into a ColPartition.
+ ColPartition* part = new ColPartition(BRT_VERT_TEXT, ICOORD(0, 1));
+ part->AddBox(bbox);
+ while (blob != nullptr) {
+ part->AddBox(blob);
+ blob = MutualUnusedVNeighbour(blob, BND_ABOVE);
+ }
+ blob = MutualUnusedVNeighbour(bbox, BND_BELOW);
+ while (blob != nullptr) {
+ part->AddBox(blob);
+ blob = MutualUnusedVNeighbour(blob, BND_BELOW);
+ }
+ CompletePartition(pageseg_mode, part, part_grid);
+ }
+ }
+}
+
+// Helper verifies that blob's neighbour in direction dir is good to add to a
+// horizontal text chain by returning the neighbour if it is not null, not
+// owned, and not uniquely vertical, as well as its neighbour in the opposite
+// direction is blob.
+static BLOBNBOX* MutualUnusedHNeighbour(const BLOBNBOX* blob,
+ BlobNeighbourDir dir) {
+ BLOBNBOX* next_blob = blob->neighbour(dir);
+ if (next_blob == nullptr || next_blob->owner() != nullptr ||
+ next_blob->UniquelyVertical())
+ return nullptr;
+ if (next_blob->neighbour(DirOtherWay(dir)) == blob)
+ return next_blob;
+ return nullptr;
+}
+
+// Finds horizontal chains of text-like blobs and puts them in ColPartitions.
+void StrokeWidth::FindHorizontalTextChains(ColPartitionGrid* part_grid) {
+ // A PageSegMode that forces horizontal textlines with the current rotation.
+ PageSegMode pageseg_mode =
+ rerotation_.y() == 0.0f ? PSM_SINGLE_COLUMN : PSM_SINGLE_BLOCK_VERT_TEXT;
+ BlobGridSearch gsearch(this);
+ BLOBNBOX* bbox;
+ gsearch.StartFullSearch();
+ while ((bbox = gsearch.NextFullSearch()) != nullptr) {
+ BLOBNBOX* blob;
+ if (bbox->owner() == nullptr && bbox->UniquelyHorizontal() &&
+ (blob = MutualUnusedHNeighbour(bbox, BND_RIGHT)) != nullptr) {
+ // Put all the linked blobs into a ColPartition.
+ ColPartition* part = new ColPartition(BRT_TEXT, ICOORD(0, 1));
+ part->AddBox(bbox);
+ while (blob != nullptr) {
+ part->AddBox(blob);
+ blob = MutualUnusedHNeighbour(blob, BND_RIGHT);
+ }
+ blob = MutualUnusedHNeighbour(bbox, BND_LEFT);
+ while (blob != nullptr) {
+ part->AddBox(blob);
+ blob = MutualUnusedVNeighbour(blob, BND_LEFT);
+ }
+ CompletePartition(pageseg_mode, part, part_grid);
+ }
+ }
+}
+
+// Finds diacritics and saves their base character in the blob.
+// The objective is to move all diacritics to the noise_blobs list, so
+// they don't mess up early textline finding/merging, or force splits
+// on textlines that overlap a bit. Blobs that become diacritics must be
+// either part of no ColPartition (nullptr owner) or in a small partition in
+// which ALL the blobs are diacritics, in which case the partition is
+// exploded (deleted) back to its blobs.
+void StrokeWidth::TestDiacritics(ColPartitionGrid* part_grid, TO_BLOCK* block) {
+ BlobGrid small_grid(gridsize(), bleft(), tright());
+ small_grid.InsertBlobList(&block->noise_blobs);
+ small_grid.InsertBlobList(&block->blobs);
+ int medium_diacritics = 0;
+ int small_diacritics = 0;
+ BLOBNBOX_IT small_it(&block->noise_blobs);
+ for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) {
+ BLOBNBOX* blob = small_it.data();
+ if (blob->owner() == nullptr && !blob->IsDiacritic() &&
+ DiacriticBlob(&small_grid, blob)) {
+ ++small_diacritics;
+ }
+ }
+ BLOBNBOX_IT blob_it(&block->blobs);
+ for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+ BLOBNBOX* blob = blob_it.data();
+ if (blob->IsDiacritic()) {
+ small_it.add_to_end(blob_it.extract());
+ continue; // Already a diacritic.
+ }
+ ColPartition* part = blob->owner();
+ if (part == nullptr && DiacriticBlob(&small_grid, blob)) {
+ ++medium_diacritics;
+ RemoveBBox(blob);
+ small_it.add_to_end(blob_it.extract());
+ } else if (part != nullptr && !part->block_owned() &&
+ part->boxes_count() < 3) {
+ // We allow blobs in small partitions to become diacritics if ALL the
+ // blobs in the partition qualify as we can then cleanly delete the
+ // partition, turn all the blobs in it to diacritics and they can be
+ // merged into the base character partition more easily than merging
+ // the partitions.
+ BLOBNBOX_C_IT box_it(part->boxes());
+ for (box_it.mark_cycle_pt(); !box_it.cycled_list() &&
+ DiacriticBlob(&small_grid, box_it.data());
+ box_it.forward());
+ if (box_it.cycled_list()) {
+ // They are all good.
+ while (!box_it.empty()) {
+ // Liberate the blob from its partition so it can be treated
+ // as a diacritic and merged explicitly with the base part.
+ // The blob is really owned by the block. The partition "owner"
+ // is nulled to allow the blob to get merged with its base character
+ // partition.
+ BLOBNBOX* box = box_it.extract();
+ box->set_owner(nullptr);
+ box_it.forward();
+ ++medium_diacritics;
+ // We remove the blob from the grid so it isn't found by subsequent
+ // searches where we might not want to include diacritics.
+ RemoveBBox(box);
+ }
+ // We only move the one blob to the small list here, but the others
+ // all get moved by the test at the top of the loop.
+ small_it.add_to_end(blob_it.extract());
+ part_grid->RemoveBBox(part);
+ delete part;
+ }
+ } else if (AlignedBlob::WithinTestRegion(2, blob->bounding_box().left(),
+ blob->bounding_box().bottom())) {
+ tprintf("Blob not available to be a diacritic at:");
+ blob->bounding_box().print();
+ }
+ }
+ if (textord_tabfind_show_strokewidths) {
+ tprintf("Found %d small diacritics, %d medium\n",
+ small_diacritics, medium_diacritics);
+ }
+}
+
+// Searches this grid for an appropriately close and sized neighbour of the
+// given [small] blob. If such a blob is found, the diacritic base is saved
+// in the blob and true is returned.
+// The small_grid is a secondary grid that contains the small/noise objects
+// that are not in this grid, but may be useful for determining a connection
+// between blob and its potential base character. (See DiacriticXGapFilled.)
+bool StrokeWidth::DiacriticBlob(BlobGrid* small_grid, BLOBNBOX* blob) {
+ if (BLOBNBOX::UnMergeableType(blob->region_type()) ||
+ blob->region_type() == BRT_VERT_TEXT)
+ return false;
+ TBOX small_box(blob->bounding_box());
+ bool debug = AlignedBlob::WithinTestRegion(2, small_box.left(),
+ small_box.bottom());
+ if (debug) {
+ tprintf("Testing blob for diacriticness at:");
+ small_box.print();
+ }
+ int x = (small_box.left() + small_box.right()) / 2;
+ int y = (small_box.bottom() + small_box.top()) / 2;
+ int grid_x, grid_y;
+ GridCoords(x, y, &grid_x, &grid_y);
+ int height = small_box.height();
+ // Setup a rectangle search to find its nearest base-character neighbour.
+ // We keep 2 different best candidates:
+ // best_x_overlap is a category of base characters that have an overlap in x
+ // (like a acute) in which we look for the least y-gap, computed using the
+ // projection to favor base characters in the same textline.
+ // best_y_overlap is a category of base characters that have no x overlap,
+ // (nominally a y-overlap is preferrecd but not essential) in which we
+ // look for the least weighted sum of x-gap and y-gap, with x-gap getting
+ // a lower weight to catch quotes at the end of a textline.
+ // NOTE that x-gap and y-gap are measured from the nearest side of the base
+ // character to the FARTHEST side of the diacritic to allow small diacritics
+ // to be a reasonable distance away, but not big diacritics.
+ BLOBNBOX* best_x_overlap = nullptr;
+ BLOBNBOX* best_y_overlap = nullptr;
+ int best_total_dist = 0;
+ int best_y_gap = 0;
+ TBOX best_xbox;
+ // TODO(rays) the search box could be setup using the projection as a guide.
+ TBOX search_box(small_box);
+ int x_pad = IntCastRounded(gridsize() * kDiacriticXPadRatio);
+ int y_pad = IntCastRounded(gridsize() * kDiacriticYPadRatio);
+ search_box.pad(x_pad, y_pad);
+ BlobGridSearch rsearch(this);
+ rsearch.SetUniqueMode(true);
+ int min_height = height * kMinDiacriticSizeRatio;
+ rsearch.StartRectSearch(search_box);
+ BLOBNBOX* neighbour;
+ while ((neighbour = rsearch.NextRectSearch()) != nullptr) {
+ if (BLOBNBOX::UnMergeableType(neighbour->region_type()) ||
+ neighbour == blob || neighbour->owner() == blob->owner())
+ continue;
+ TBOX nbox = neighbour->bounding_box();
+ if (neighbour->owner() == nullptr || neighbour->owner()->IsVerticalType() ||
+ (neighbour->flow() != BTFT_CHAIN &&
+ neighbour->flow() != BTFT_STRONG_CHAIN)) {
+ if (debug) {
+ tprintf("Neighbour not strong enough:");
+ nbox.print();
+ }
+ continue; // Diacritics must be attached to strong text.
+ }
+ if (nbox.height() < min_height) {
+ if (debug) {
+ tprintf("Neighbour not big enough:");
+ nbox.print();
+ }
+ continue; // Too small to be the base character.
+ }
+ int x_gap = small_box.x_gap(nbox);
+ int y_gap = small_box.y_gap(nbox);
+ int total_distance = projection_->DistanceOfBoxFromBox(small_box, nbox,
+ true, denorm_,
+ debug);
+ if (debug) tprintf("xgap=%d, y=%d, total dist=%d\n",
+ x_gap, y_gap, total_distance);
+ if (total_distance >
+ neighbour->owner()->median_height() * kMaxDiacriticDistanceRatio) {
+ if (debug) {
+ tprintf("Neighbour with median size %d too far away:",
+ neighbour->owner()->median_height());
+ neighbour->bounding_box().print();
+ }
+ continue; // Diacritics must not be too distant.
+ }
+ if (x_gap <= 0) {
+ if (debug) {
+ tprintf("Computing reduced box for :");
+ nbox.print();
+ }
+ int left = small_box.left() - small_box.width();
+ int right = small_box.right() + small_box.width();
+ nbox = neighbour->BoundsWithinLimits(left, right);
+ y_gap = small_box.y_gap(nbox);
+ if (best_x_overlap == nullptr || y_gap < best_y_gap) {
+ best_x_overlap = neighbour;
+ best_xbox = nbox;
+ best_y_gap = y_gap;
+ if (debug) {
+ tprintf("New best:");
+ nbox.print();
+ }
+ } else if (debug) {
+ tprintf("Shrunken box doesn't win:");
+ nbox.print();
+ }
+ } else if (blob->ConfirmNoTabViolation(*neighbour)) {
+ if (best_y_overlap == nullptr || total_distance < best_total_dist) {
+ if (debug) {
+ tprintf("New best y overlap:");
+ nbox.print();
+ }
+ best_y_overlap = neighbour;
+ best_total_dist = total_distance;
+ } else if (debug) {
+ tprintf("New y overlap box doesn't win:");
+ nbox.print();
+ }
+ } else if (debug) {
+ tprintf("Neighbour wrong side of a tab:");
+ nbox.print();
+ }
+ }
+ if (best_x_overlap != nullptr &&
+ (best_y_overlap == nullptr ||
+ best_xbox.major_y_overlap(best_y_overlap->bounding_box()))) {
+ blob->set_diacritic_box(best_xbox);
+ blob->set_base_char_blob(best_x_overlap);
+ if (debug) {
+ tprintf("DiacriticBlob OK! (x-overlap:");
+ small_box.print();
+ best_xbox.print();
+ }
+ return true;
+ }
+ if (best_y_overlap != nullptr &&
+ DiacriticXGapFilled(small_grid, small_box,
+ best_y_overlap->bounding_box()) &&
+ NoNoiseInBetween(small_box, best_y_overlap->bounding_box())) {
+ blob->set_diacritic_box(best_y_overlap->bounding_box());
+ blob->set_base_char_blob(best_y_overlap);
+ if (debug) {
+ tprintf("DiacriticBlob OK! (y-overlap:");
+ small_box.print();
+ best_y_overlap->bounding_box().print();
+ }
+ return true;
+ }
+ if (debug) {
+ tprintf("DiacriticBlob fails:");
+ small_box.print();
+ tprintf("Best x+y gap = %d, y = %d\n", best_total_dist, best_y_gap);
+ if (best_y_overlap != nullptr) {
+ tprintf("XGapFilled=%d, NoiseBetween=%d\n",
+ DiacriticXGapFilled(small_grid, small_box,
+ best_y_overlap->bounding_box()),
+ NoNoiseInBetween(small_box, best_y_overlap->bounding_box()));
+ }
+ }
+ return false;
+}
+
+// Returns true if there is no gap between the base char and the diacritic
+// bigger than a fraction of the height of the base char:
+// Eg: line end.....'
+// The quote is a long way from the end of the line, yet it needs to be a
+// diacritic. To determine that the quote is not part of an image, or
+// a different text block, we check for other marks in the gap between
+// the base char and the diacritic.
+// '<--Diacritic
+// |---------|
+// | |<-toobig-gap->
+// | Base |<ok gap>
+// |---------| x<-----Dot occupying gap
+// The grid is const really.
+bool StrokeWidth::DiacriticXGapFilled(BlobGrid* grid,
+ const TBOX& diacritic_box,
+ const TBOX& base_box) {
+ // Since most gaps are small, use an iterative algorithm to search the gap.
+ int max_gap = IntCastRounded(base_box.height() *
+ kMaxDiacriticGapToBaseCharHeight);
+ TBOX occupied_box(base_box);
+ int diacritic_gap;
+ while ((diacritic_gap = diacritic_box.x_gap(occupied_box)) > max_gap) {
+ TBOX search_box(occupied_box);
+ if (diacritic_box.left() > search_box.right()) {
+ // We are looking right.
+ search_box.set_left(search_box.right());
+ search_box.set_right(search_box.left() + max_gap);
+ } else {
+ // We are looking left.
+ search_box.set_right(search_box.left());
+ search_box.set_left(search_box.left() - max_gap);
+ }
+ BlobGridSearch rsearch(grid);
+ rsearch.StartRectSearch(search_box);
+ BLOBNBOX* neighbour;
+ while ((neighbour = rsearch.NextRectSearch()) != nullptr) {
+ const TBOX& nbox = neighbour->bounding_box();
+ if (nbox.x_gap(diacritic_box) < diacritic_gap) {
+ if (nbox.left() < occupied_box.left())
+ occupied_box.set_left(nbox.left());
+ if (nbox.right() > occupied_box.right())
+ occupied_box.set_right(nbox.right());
+ break;
+ }
+ }
+ if (neighbour == nullptr)
+ return false; // Found a big gap.
+ }
+ return true; // The gap was filled.
+}
+
+// Merges diacritics with the ColPartition of the base character blob.
+void StrokeWidth::MergeDiacritics(TO_BLOCK* block,
+ ColPartitionGrid* part_grid) {
+ BLOBNBOX_IT small_it(&block->noise_blobs);
+ for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) {
+ BLOBNBOX* blob = small_it.data();
+ if (blob->base_char_blob() != nullptr) {
+ ColPartition* part = blob->base_char_blob()->owner();
+ // The base character must be owned by a partition and that partition
+ // must not be on the big_parts list (not block owned).
+ if (part != nullptr && !part->block_owned() && blob->owner() == nullptr &&
+ blob->IsDiacritic()) {
+ // The partition has to be removed from the grid and reinserted
+ // because its bounding box may change.
+ part_grid->RemoveBBox(part);
+ part->AddBox(blob);
+ blob->set_region_type(part->blob_type());
+ blob->set_flow(part->flow());
+ blob->set_owner(part);
+ part_grid->InsertBBox(true, true, part);
+ }
+ // Set all base chars to nullptr before any blobs get deleted.
+ blob->set_base_char_blob(nullptr);
+ }
+ }
+}
+
+// Any blobs on the large_blobs list of block that are still unowned by a
+// ColPartition, are probably drop-cap or vertically touching so the blobs
+// are removed to the big_parts list and treated separately.
+void StrokeWidth::RemoveLargeUnusedBlobs(TO_BLOCK* block,
+ ColPartitionGrid* part_grid,
+ ColPartition_LIST* big_parts) {
+ BLOBNBOX_IT large_it(&block->large_blobs);
+ for (large_it.mark_cycle_pt(); !large_it.cycled_list(); large_it.forward()) {
+ BLOBNBOX* blob = large_it.data();
+ ColPartition* big_part = blob->owner();
+ if (big_part == nullptr) {
+ // Large blobs should have gone into partitions by now if they are
+ // genuine characters, so move any unowned ones out to the big parts
+ // list. This will include drop caps and vertically touching characters.
+ ColPartition::MakeBigPartition(blob, big_parts);
+ }
+ }
+}
+
+// All remaining unused blobs are put in individual ColPartitions.
+void StrokeWidth::PartitionRemainingBlobs(PageSegMode pageseg_mode,
+ ColPartitionGrid* part_grid) {
+ BlobGridSearch gsearch(this);
+ BLOBNBOX* bbox;
+ int prev_grid_x = -1;
+ int prev_grid_y = -1;
+ BLOBNBOX_CLIST cell_list;
+ BLOBNBOX_C_IT cell_it(&cell_list);
+ bool cell_all_noise = true;
+ gsearch.StartFullSearch();
+ while ((bbox = gsearch.NextFullSearch()) != nullptr) {
+ int grid_x = gsearch.GridX();
+ int grid_y = gsearch.GridY();
+ if (grid_x != prev_grid_x || grid_y != prev_grid_y) {
+ // New cell. Process old cell.
+ MakePartitionsFromCellList(pageseg_mode, cell_all_noise, part_grid,
+ &cell_list);
+ cell_it.set_to_list(&cell_list);
+ prev_grid_x = grid_x;
+ prev_grid_y = grid_y;
+ cell_all_noise = true;
+ }
+ if (bbox->owner() == nullptr) {
+ cell_it.add_to_end(bbox);
+ if (bbox->flow() != BTFT_NONTEXT)
+ cell_all_noise = false;
+ } else {
+ cell_all_noise = false;
+ }
+ }
+ MakePartitionsFromCellList(pageseg_mode, cell_all_noise, part_grid,
+ &cell_list);
+}
+
+// If combine, put all blobs in the cell_list into a single partition, otherwise
+// put each one into its own partition.
+void StrokeWidth::MakePartitionsFromCellList(PageSegMode pageseg_mode,
+ bool combine,
+ ColPartitionGrid* part_grid,
+ BLOBNBOX_CLIST* cell_list) {
+ if (cell_list->empty())
+ return;
+ BLOBNBOX_C_IT cell_it(cell_list);
+ if (combine) {
+ BLOBNBOX* bbox = cell_it.extract();
+ ColPartition* part = new ColPartition(bbox->region_type(), ICOORD(0, 1));
+ part->AddBox(bbox);
+ part->set_flow(bbox->flow());
+ for (cell_it.forward(); !cell_it.empty(); cell_it.forward()) {
+ part->AddBox(cell_it.extract());
+ }
+ CompletePartition(pageseg_mode, part, part_grid);
+ } else {
+ for (; !cell_it.empty(); cell_it.forward()) {
+ BLOBNBOX* bbox = cell_it.extract();
+ ColPartition* part = new ColPartition(bbox->region_type(), ICOORD(0, 1));
+ part->set_flow(bbox->flow());
+ part->AddBox(bbox);
+ CompletePartition(pageseg_mode, part, part_grid);
+ }
+ }
+}
+
+// Helper function to finish setting up a ColPartition and insert into
+// part_grid.
+void StrokeWidth::CompletePartition(PageSegMode pageseg_mode,
+ ColPartition* part,
+ ColPartitionGrid* part_grid) {
+ part->ComputeLimits();
+ TBOX box = part->bounding_box();
+ bool debug = AlignedBlob::WithinTestRegion(2, box.left(),
+ box.bottom());
+ int value = projection_->EvaluateColPartition(*part, denorm_, debug);
+ // Override value if pageseg_mode disagrees.
+ if (value > 0 && FindingVerticalOnly(pageseg_mode)) {
+ value = part->boxes_count() == 1 ? 0 : -2;
+ } else if (value < 0 && FindingHorizontalOnly(pageseg_mode)) {
+ value = part->boxes_count() == 1 ? 0 : 2;
+ }
+ part->SetRegionAndFlowTypesFromProjectionValue(value);
+ part->ClaimBoxes();
+ part_grid->InsertBBox(true, true, part);
+}
+
+// Merge partitions where the merge appears harmless.
+// As this
+void StrokeWidth::EasyMerges(ColPartitionGrid* part_grid) {
+ using namespace std::placeholders; // for _1, _2
+ part_grid->Merges(
+ std::bind(&StrokeWidth::OrientationSearchBox, this, _1, _2),
+ std::bind(&StrokeWidth::ConfirmEasyMerge, this, _1, _2));
+}
+
+// Compute a search box based on the orientation of the partition.
+// Returns true if a suitable box can be calculated.
+// Callback for EasyMerges.
+bool StrokeWidth::OrientationSearchBox(ColPartition* part, TBOX* box) {
+ if (part->IsVerticalType()) {
+ box->set_top(box->top() + box->width());
+ box->set_bottom(box->bottom() - box->width());
+ } else {
+ box->set_left(box->left() - box->height());
+ box->set_right(box->right() + box->height());
+ }
+ return true;
+}
+
+// Merge confirmation callback for EasyMerges.
+bool StrokeWidth::ConfirmEasyMerge(const ColPartition* p1,
+ const ColPartition* p2) {
+ ASSERT_HOST(p1 != nullptr && p2 != nullptr);
+ ASSERT_HOST(!p1->IsEmpty() && !p2->IsEmpty());
+ if ((p1->flow() == BTFT_NONTEXT && p2->flow() >= BTFT_CHAIN) ||
+ (p1->flow() >= BTFT_CHAIN && p2->flow() == BTFT_NONTEXT))
+ return false; // Don't merge confirmed image with text.
+ if ((p1->IsVerticalType() || p2->IsVerticalType()) &&
+ p1->HCoreOverlap(*p2) <= 0 &&
+ ((!p1->IsSingleton() &&
+ !p2->IsSingleton()) ||
+ !p1->bounding_box().major_overlap(p2->bounding_box())))
+ return false; // Overlap must be in the text line.
+ if ((p1->IsHorizontalType() || p2->IsHorizontalType()) &&
+ p1->VCoreOverlap(*p2) <= 0 &&
+ ((!p1->IsSingleton() &&
+ !p2->IsSingleton()) ||
+ (!p1->bounding_box().major_overlap(p2->bounding_box()) &&
+ !p1->OKDiacriticMerge(*p2, false) &&
+ !p2->OKDiacriticMerge(*p1, false))))
+ return false; // Overlap must be in the text line.
+ if (!p1->ConfirmNoTabViolation(*p2))
+ return false;
+ if (p1->flow() <= BTFT_NONTEXT && p2->flow() <= BTFT_NONTEXT)
+ return true;
+ return NoNoiseInBetween(p1->bounding_box(), p2->bounding_box());
+}
+
+// Returns true if there is no significant noise in between the boxes.
+bool StrokeWidth::NoNoiseInBetween(const TBOX& box1, const TBOX& box2) const {
+ return ImageFind::BlankImageInBetween(box1, box2, grid_box_, rerotation_,
+ nontext_map_);
+}
+
+#ifndef GRAPHICS_DISABLED
+
+/** Displays the blobs colored according to the number of good neighbours
+ * and the vertical/horizontal flow.
+ */
+ScrollView* StrokeWidth::DisplayGoodBlobs(const char* window_name,
+ int x, int y) {
+ auto window = MakeWindow(x, y, window_name);
+ // For every blob in the grid, display it.
+ window->Brush(ScrollView::NONE);
+
+ // For every bbox in the grid, display it.
+ BlobGridSearch gsearch(this);
+ gsearch.StartFullSearch();
+ BLOBNBOX* bbox;
+ while ((bbox = gsearch.NextFullSearch()) != nullptr) {
+ const TBOX& box = bbox->bounding_box();
+ int left_x = box.left();
+ int right_x = box.right();
+ int top_y = box.top();
+ int bottom_y = box.bottom();
+ int goodness = bbox->GoodTextBlob();
+ BlobRegionType blob_type = bbox->region_type();
+ if (bbox->UniquelyVertical())
+ blob_type = BRT_VERT_TEXT;
+ if (bbox->UniquelyHorizontal())
+ blob_type = BRT_TEXT;
+ BlobTextFlowType flow = bbox->flow();
+ if (flow == BTFT_NONE) {
+ if (goodness == 0)
+ flow = BTFT_NEIGHBOURS;
+ else if (goodness == 1)
+ flow = BTFT_CHAIN;
+ else
+ flow = BTFT_STRONG_CHAIN;
+ }
+ window->Pen(BLOBNBOX::TextlineColor(blob_type, flow));
+ window->Rectangle(left_x, bottom_y, right_x, top_y);
+ }
+ window->Update();
+ return window;
+}
+
+static void DrawDiacriticJoiner(const BLOBNBOX* blob, ScrollView* window) {
+ const TBOX& blob_box(blob->bounding_box());
+ int top = std::max(static_cast<int>(blob_box.top()), blob->base_char_top());
+ int bottom = std::min(static_cast<int>(blob_box.bottom()), blob->base_char_bottom());
+ int x = (blob_box.left() + blob_box.right()) / 2;
+ window->Line(x, top, x, bottom);
+}
+
+// Displays blobs colored according to whether or not they are diacritics.
+ScrollView* StrokeWidth::DisplayDiacritics(const char* window_name,
+ int x, int y, TO_BLOCK* block) {
+ auto window = MakeWindow(x, y, window_name);
+ // For every blob in the grid, display it.
+ window->Brush(ScrollView::NONE);
+
+ BLOBNBOX_IT it(&block->blobs);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ BLOBNBOX* blob = it.data();
+ if (blob->IsDiacritic()) {
+ window->Pen(ScrollView::GREEN);
+ DrawDiacriticJoiner(blob, window);
+ } else {
+ window->Pen(blob->BoxColor());
+ }
+ const TBOX& box = blob->bounding_box();
+ window->Rectangle(box.left(), box. bottom(), box.right(), box.top());
+ }
+ it.set_to_list(&block->noise_blobs);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ BLOBNBOX* blob = it.data();
+ if (blob->IsDiacritic()) {
+ window->Pen(ScrollView::GREEN);
+ DrawDiacriticJoiner(blob, window);
+ } else {
+ window->Pen(ScrollView::WHITE);
+ }
+ const TBOX& box = blob->bounding_box();
+ window->Rectangle(box.left(), box. bottom(), box.right(), box.top());
+ }
+ window->Update();
+ return window;
+}
+
+#endif // !GRAPHICS_DISABLED
+
+} // namespace tesseract.
diff --git a/tesseract/src/textord/strokewidth.h b/tesseract/src/textord/strokewidth.h
new file mode 100644
index 00000000..81b07c55
--- /dev/null
+++ b/tesseract/src/textord/strokewidth.h
@@ -0,0 +1,355 @@
+///////////////////////////////////////////////////////////////////////
+// File: strokewidth.h
+// Description: Subclass of BBGrid to find uniformity of strokewidth.
+// Author: Ray Smith
+// Created: Mon Mar 31 16:17:01 PST 2008
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TEXTORD_STROKEWIDTH_H_
+#define TESSERACT_TEXTORD_STROKEWIDTH_H_
+
+#include "blobbox.h" // BlobNeighourDir.
+#include "blobgrid.h" // Base class.
+#include "colpartitiongrid.h"
+#include "textlineprojection.h"
+
+class DENORM;
+class ScrollView;
+class TO_BLOCK;
+
+namespace tesseract {
+
+class ColPartition_LIST;
+class TabFind;
+class TextlineProjection;
+
+// Misc enums to clarify bool arguments for direction-controlling args.
+enum LeftOrRight {
+ LR_LEFT,
+ LR_RIGHT
+};
+
+// Return value from FindInitialPartitions indicates detection of severe
+// skew or noise.
+enum PartitionFindResult {
+ PFR_OK, // Everything is OK.
+ PFR_SKEW, // Skew was detected and rotated.
+ PFR_NOISE // Noise was detected and removed.
+};
+
+/**
+ * The StrokeWidth class holds all the normal and large blobs.
+ * It is used to find good large blobs and move them to the normal blobs
+ * by virtue of having a reasonable strokewidth compatible neighbour.
+ */
+class StrokeWidth : public BlobGrid {
+ public:
+ StrokeWidth(int gridsize, const ICOORD& bleft, const ICOORD& tright);
+ ~StrokeWidth() override;
+
+ // Sets the neighbours member of the medium-sized blobs in the block.
+ // Searches on 4 sides of each blob for similar-sized, similar-strokewidth
+ // blobs and sets pointers to the good neighbours.
+ void SetNeighboursOnMediumBlobs(TO_BLOCK* block);
+
+ // Sets the neighbour/textline writing direction members of the medium
+ // and large blobs with optional repair of broken CJK characters first.
+ // Repair of broken CJK is needed here because broken CJK characters
+ // can fool the textline direction detection algorithm.
+ void FindTextlineDirectionAndFixBrokenCJK(PageSegMode pageseg_mode,
+ bool cjk_merge,
+ TO_BLOCK* input_block);
+
+ // To save computation, the process of generating partitions is broken
+ // into the following 4 steps:
+ // TestVerticalTextDirection
+ // CorrectForRotation (used only if a rotation is to be applied)
+ // FindLeaderPartitions
+ // GradeBlobsIntoPartitions.
+ // These functions are all required, in sequence, except for
+ // CorrectForRotation, which is not needed if no rotation is applied.
+
+ // Types all the blobs as vertical or horizontal text or unknown and
+ // returns true if the majority are vertical.
+ // If the blobs are rotated, it is necessary to call CorrectForRotation
+ // after rotating everything, otherwise the work done here will be enough.
+ // If osd_blobs is not null, a list of blobs from the dominant textline
+ // direction are returned for use in orientation and script detection.
+ // find_vertical_text_ratio should be textord_tabfind_vertical_text_ratio.
+ bool TestVerticalTextDirection(double find_vertical_text_ratio,
+ TO_BLOCK* block,
+ BLOBNBOX_CLIST* osd_blobs);
+
+ // Corrects the data structures for the given rotation.
+ void CorrectForRotation(const FCOORD& rerotation,
+ ColPartitionGrid* part_grid);
+
+ // Finds leader partitions and inserts them into the give grid.
+ void FindLeaderPartitions(TO_BLOCK* block,
+ ColPartitionGrid* part_grid);
+
+ // Finds and marks noise those blobs that look like bits of vertical lines
+ // that would otherwise screw up layout analysis.
+ void RemoveLineResidue(ColPartition_LIST* big_part_list);
+
+ // Types all the blobs as vertical text or horizontal text or unknown and
+ // puts them into initial ColPartitions in the supplied part_grid.
+ // rerotation determines how to get back to the image coordinates from the
+ // blob coordinates (since they may have been rotated for vertical text).
+ // block is the single block for the whole page or rectangle to be OCRed.
+ // nontext_pix (full-size), is a binary mask used to prevent merges across
+ // photo/text boundaries. It is not kept beyond this function.
+ // denorm provides a mapping back to the image from the current blob
+ // coordinate space.
+ // projection provides a measure of textline density over the image and
+ // provides functions to assist with diacritic detection. It should be a
+ // pointer to a new TextlineProjection, and will be setup here.
+ // part_grid is the output grid of textline partitions.
+ // Large blobs that cause overlap are put in separate partitions and added
+ // to the big_parts list.
+ void GradeBlobsIntoPartitions(PageSegMode pageseg_mode,
+ const FCOORD& rerotation, TO_BLOCK* block,
+ Pix* nontext_pix, const DENORM* denorm,
+ bool cjk_script, TextlineProjection* projection,
+ BLOBNBOX_LIST* diacritic_blobs,
+ ColPartitionGrid* part_grid,
+ ColPartition_LIST* big_parts);
+
+ // Handles a click event in a display window.
+ void HandleClick(int x, int y) override;
+
+ private:
+ // Computes the noise_density_ by summing the number of elements in a
+ // neighbourhood of each grid cell.
+ void ComputeNoiseDensity(TO_BLOCK* block, TabFind* line_grid);
+
+ // Detects and marks leader dots/dashes.
+ // Leaders are horizontal chains of small or noise blobs that look
+ // monospace according to ColPartition::MarkAsLeaderIfMonospaced().
+ // Detected leaders become the only occupants of the block->small_blobs list.
+ // Non-leader small blobs get moved to the blobs list.
+ // Non-leader noise blobs remain singletons in the noise list.
+ // All small and noise blobs in high density regions are marked BTFT_NONTEXT.
+ // block is the single block for the whole page or rectangle to be OCRed.
+ // leader_parts is the output.
+ void FindLeadersAndMarkNoise(TO_BLOCK* block,
+ ColPartition_LIST* leader_parts);
+
+ /** Inserts the block blobs (normal and large) into this grid.
+ * Blobs remain owned by the block. */
+ void InsertBlobs(TO_BLOCK* block);
+
+ // Fix broken CJK characters, using the fake joined blobs mechanism.
+ // Blobs are really merged, ie the master takes all the outlines and the
+ // others are deleted.
+ // Returns true if sufficient blobs are merged that it may be worth running
+ // again, due to a better estimate of character size.
+ bool FixBrokenCJK(TO_BLOCK* block);
+
+ // Collect blobs that overlap or are within max_dist of the input bbox.
+ // Return them in the list of blobs and expand the bbox to be the union
+ // of all the boxes. not_this is excluded from the search, as are blobs
+ // that cause the merged box to exceed max_size in either dimension.
+ void AccumulateOverlaps(const BLOBNBOX* not_this, bool debug,
+ int max_size, int max_dist,
+ TBOX* bbox, BLOBNBOX_CLIST* blobs);
+
+ // For each blob in this grid, Finds the textline direction to be horizontal
+ // or vertical according to distance to neighbours and 1st and 2nd order
+ // neighbours. Non-text tends to end up without a definite direction.
+ // Result is setting of the neighbours and vert_possible/horz_possible
+ // flags in the BLOBNBOXes currently in this grid.
+ // This function is called more than once if page orientation is uncertain,
+ // so display_if_debugging is true on the final call to display the results.
+ void FindTextlineFlowDirection(PageSegMode pageseg_mode,
+ bool display_if_debugging);
+
+ // Sets the neighbours and good_stroke_neighbours members of the blob by
+ // searching close on all 4 sides.
+ // When finding leader dots/dashes, there is a slightly different rule for
+ // what makes a good neighbour.
+ // If activate_line_trap, then line-like objects are found and isolated.
+ void SetNeighbours(bool leaders, bool activate_line_trap, BLOBNBOX* blob);
+
+ // Sets the good_stroke_neighbours member of the blob if it has a
+ // GoodNeighbour on the given side.
+ // Also sets the neighbour in the blob, whether or not a good one is found.
+ // Return value is the number of neighbours in the line trap size range.
+ // Leaders get extra special lenient treatment.
+ int FindGoodNeighbour(BlobNeighbourDir dir, bool leaders, BLOBNBOX* blob);
+
+ // Makes the blob to be only horizontal or vertical where evidence
+ // is clear based on gaps of 2nd order neighbours.
+ void SetNeighbourFlows(BLOBNBOX* blob);
+
+ // Nullify the neighbours in the wrong directions where the direction
+ // is clear-cut based on a distance margin. Good for isolating vertical
+ // text from neighbouring horizontal text.
+ void SimplifyObviousNeighbours(BLOBNBOX* blob);
+
+ // Smoothes the vertical/horizontal type of the blob based on the
+ // 2nd-order neighbours. If reset_all is true, then all blobs are
+ // changed. Otherwise, only ambiguous blobs are processed.
+ void SmoothNeighbourTypes(PageSegMode pageseg_mode, bool desperate,
+ BLOBNBOX* blob);
+
+ // Checks the left or right side of the given leader partition and sets the
+ // (opposite) leader_on_right or leader_on_left flags for blobs
+ // that are next to the given side of the given leader partition.
+ void MarkLeaderNeighbours(const ColPartition* part, LeftOrRight side);
+
+ // Partition creation. Accumulates vertical and horizontal text chains,
+ // puts the remaining blobs in as unknowns, and then merges/splits to
+ // minimize overlap and smoothes the types with neighbours and the color
+ // image if provided. rerotation is used to rotate the coordinate space
+ // back to the nontext_map_ image.
+ // If find_problems is true, detects possible noise pollution by the amount
+ // of partition overlap that is created by the diacritics. If excessive, the
+ // noise is separated out into diacritic blobs, and PFR_NOISE is returned.
+ // [TODO(rays): if the partition overlap is caused by heavy skew, deskews
+ // the components, saves the skew_angle and returns PFR_SKEW.] If the return
+ // is not PFR_OK, the job is incomplete, and FindInitialPartitions must be
+ // called again after cleaning up the partly done work.
+ PartitionFindResult FindInitialPartitions(PageSegMode pageseg_mode,
+ const FCOORD& rerotation,
+ bool find_problems, TO_BLOCK* block,
+ BLOBNBOX_LIST* diacritic_blobs,
+ ColPartitionGrid* part_grid,
+ ColPartition_LIST* big_parts,
+ FCOORD* skew_angle);
+ // Detects noise by a significant increase in partition overlap from
+ // pre_overlap to now, and removes noise from the union of all the overlapping
+ // partitions, placing the blobs in diacritic_blobs. Returns true if any noise
+ // was found and removed.
+ bool DetectAndRemoveNoise(int pre_overlap, const TBOX& grid_box,
+ TO_BLOCK* block, ColPartitionGrid* part_grid,
+ BLOBNBOX_LIST* diacritic_blobs);
+ // Finds vertical chains of text-like blobs and puts them in ColPartitions.
+ void FindVerticalTextChains(ColPartitionGrid* part_grid);
+ // Finds horizontal chains of text-like blobs and puts them in ColPartitions.
+ void FindHorizontalTextChains(ColPartitionGrid* part_grid);
+ // Finds diacritics and saves their base character in the blob.
+ void TestDiacritics(ColPartitionGrid* part_grid, TO_BLOCK* block);
+ // Searches this grid for an appropriately close and sized neighbour of the
+ // given [small] blob. If such a blob is found, the diacritic base is saved
+ // in the blob and true is returned.
+ // The small_grid is a secondary grid that contains the small/noise objects
+ // that are not in this grid, but may be useful for determining a connection
+ // between blob and its potential base character. (See DiacriticXGapFilled.)
+ bool DiacriticBlob(BlobGrid* small_grid, BLOBNBOX* blob);
+ // Returns true if there is no gap between the base char and the diacritic
+ // bigger than a fraction of the height of the base char:
+ // Eg: line end.....'
+ // The quote is a long way from the end of the line, yet it needs to be a
+ // diacritic. To determine that the quote is not part of an image, or
+ // a different text block, we check for other marks in the gap between
+ // the base char and the diacritic.
+ // '<--Diacritic
+ // |---------|
+ // | |<-toobig-gap->
+ // | Base |<ok gap>
+ // |---------| x<-----Dot occupying gap
+ // The grid is const really.
+ bool DiacriticXGapFilled(BlobGrid* grid, const TBOX& diacritic_box,
+ const TBOX& base_box);
+ // Merges diacritics with the ColPartition of the base character blob.
+ void MergeDiacritics(TO_BLOCK* block, ColPartitionGrid* part_grid);
+ // Any blobs on the large_blobs list of block that are still unowned by a
+ // ColPartition, are probably drop-cap or vertically touching so the blobs
+ // are removed to the big_parts list and treated separately.
+ void RemoveLargeUnusedBlobs(TO_BLOCK* block,
+ ColPartitionGrid* part_grid,
+ ColPartition_LIST* big_parts);
+
+ // All remaining unused blobs are put in individual ColPartitions.
+ void PartitionRemainingBlobs(PageSegMode pageseg_mode,
+ ColPartitionGrid* part_grid);
+
+ // If combine, put all blobs in the cell_list into a single partition,
+ // otherwise put each one into its own partition.
+ void MakePartitionsFromCellList(PageSegMode pageseg_mode, bool combine,
+ ColPartitionGrid* part_grid,
+ BLOBNBOX_CLIST* cell_list);
+
+ // Helper function to finish setting up a ColPartition and insert into
+ // part_grid.
+ void CompletePartition(PageSegMode pageseg_mode, ColPartition* part,
+ ColPartitionGrid* part_grid);
+
+ // Helper returns true if we are looking only for vertical textlines,
+ // taking into account any rotation that has been done.
+ bool FindingVerticalOnly(PageSegMode pageseg_mode) const {
+ if (rerotation_.y() == 0.0f) {
+ return pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT;
+ }
+ return !PSM_ORIENTATION_ENABLED(pageseg_mode) &&
+ pageseg_mode != PSM_SINGLE_BLOCK_VERT_TEXT;
+ }
+ // Helper returns true if we are looking only for horizontal textlines,
+ // taking into account any rotation that has been done.
+ bool FindingHorizontalOnly(PageSegMode pageseg_mode) const {
+ if (rerotation_.y() == 0.0f) {
+ return !PSM_ORIENTATION_ENABLED(pageseg_mode) &&
+ pageseg_mode != PSM_SINGLE_BLOCK_VERT_TEXT;
+ }
+ return pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT;
+ }
+
+ // Merge partitions where the merge appears harmless.
+ void EasyMerges(ColPartitionGrid* part_grid);
+
+ // Compute a search box based on the orientation of the partition.
+ // Returns true if a suitable box can be calculated.
+ // Callback for EasyMerges.
+ bool OrientationSearchBox(ColPartition* part, TBOX* box);
+
+ // Merge confirmation callback for EasyMerges.
+ bool ConfirmEasyMerge(const ColPartition* p1, const ColPartition* p2);
+
+ // Returns true if there is no significant noise in between the boxes.
+ bool NoNoiseInBetween(const TBOX& box1, const TBOX& box2) const;
+
+ // Displays the blobs colored according to the number of good neighbours
+ // and the vertical/horizontal flow.
+ ScrollView* DisplayGoodBlobs(const char* window_name, int x, int y);
+
+ // Displays blobs colored according to whether or not they are diacritics.
+ ScrollView* DisplayDiacritics(const char* window_name,
+ int x, int y, TO_BLOCK* block);
+
+ private:
+ // Image map of photo/noise areas on the page. Borrowed pointer (not owned.)
+ Pix* nontext_map_;
+ // Textline projection map. Borrowed pointer.
+ TextlineProjection* projection_;
+ // DENORM used by projection_ to get back to image coords. Borrowed pointer.
+ const DENORM* denorm_;
+ // Bounding box of the grid.
+ TBOX grid_box_;
+ // Rerotation to get back to the original image.
+ FCOORD rerotation_;
+ // Windows for debug display.
+ ScrollView* leaders_win_;
+ ScrollView* initial_widths_win_;
+ ScrollView* widths_win_;
+ ScrollView* chains_win_;
+ ScrollView* diacritics_win_;
+ ScrollView* textlines_win_;
+ ScrollView* smoothed_win_;
+};
+
+} // namespace tesseract.
+
+#endif // TESSERACT_TEXTORD_STROKEWIDTH_H_
diff --git a/tesseract/src/textord/tabfind.cpp b/tesseract/src/textord/tabfind.cpp
new file mode 100644
index 00000000..c88421c1
--- /dev/null
+++ b/tesseract/src/textord/tabfind.cpp
@@ -0,0 +1,1438 @@
+///////////////////////////////////////////////////////////////////////
+// File: tabfind.cpp
+// Description: Subclass of BBGrid to find vertically aligned blobs.
+// Author: Ray Smith
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "tabfind.h"
+#include "alignedblob.h"
+#include "colpartitiongrid.h"
+#include "detlinefit.h"
+#include "host.h" // for NearlyEqual
+#include "linefind.h"
+
+#include <algorithm>
+
+namespace tesseract {
+
+// Multiple of box size to search for initial gaps.
+const int kTabRadiusFactor = 5;
+// Min and Max multiple of height to search vertically when extrapolating.
+const int kMinVerticalSearch = 3;
+const int kMaxVerticalSearch = 12;
+const int kMaxRaggedSearch = 25;
+// Minimum number of lines in a column width to make it interesting.
+const int kMinLinesInColumn = 10;
+// Minimum width of a column to be interesting.
+const int kMinColumnWidth = 200;
+// Minimum fraction of total column lines for a column to be interesting.
+const double kMinFractionalLinesInColumn = 0.125;
+// Fraction of height used as alignment tolerance for aligned tabs.
+const double kAlignedFraction = 0.03125;
+// Maximum gutter width (in absolute inch) that we care about
+const double kMaxGutterWidthAbsolute = 2.00;
+// Multiplier of gridsize for min gutter width of TT_MAYBE_RAGGED blobs.
+const int kRaggedGutterMultiple = 5;
+// Min aspect ratio of tall objects to be considered a separator line.
+// (These will be ignored in searching the gutter for obstructions.)
+const double kLineFragmentAspectRatio = 10.0;
+// Min number of points to accept after evaluation.
+const int kMinEvaluatedTabs = 3;
+// Up to 30 degrees is allowed for rotations of diacritic blobs.
+// Keep this value slightly larger than kCosSmallAngle in blobbox.cpp
+// so that the assert there never fails.
+const double kCosMaxSkewAngle = 0.866025;
+
+static BOOL_VAR(textord_tabfind_show_initialtabs, false, "Show tab candidates");
+static BOOL_VAR(textord_tabfind_show_finaltabs, false, "Show tab vectors");
+
+TabFind::TabFind(int gridsize, const ICOORD& bleft, const ICOORD& tright,
+ TabVector_LIST* vlines, int vertical_x, int vertical_y,
+ int resolution)
+ : AlignedBlob(gridsize, bleft, tright),
+ resolution_(resolution),
+ image_origin_(0, tright.y() - 1),
+ v_it_(&vectors_) {
+ width_cb_ = nullptr;
+ v_it_.add_list_after(vlines);
+ SetVerticalSkewAndParallelize(vertical_x, vertical_y);
+ using namespace std::placeholders; // for _1
+ width_cb_ = std::bind(&TabFind::CommonWidth, this, _1);
+}
+
+TabFind::~TabFind() {
+}
+
+///////////////// PUBLIC functions (mostly used by TabVector). //////////////
+
+// Insert a list of blobs into the given grid (not necessarily this).
+// If take_ownership is true, then the blobs are removed from the source list.
+// See InsertBlob for the other arguments.
+// It would seem to make more sense to swap this and grid, but this way
+// around allows grid to not be derived from TabFind, eg a ColPartitionGrid,
+// while the grid that provides the tab stops(this) has to be derived from
+// TabFind.
+void TabFind::InsertBlobsToGrid(bool h_spread, bool v_spread,
+ BLOBNBOX_LIST* blobs,
+ BBGrid<BLOBNBOX, BLOBNBOX_CLIST,
+ BLOBNBOX_C_IT>* grid) {
+ BLOBNBOX_IT blob_it(blobs);
+ int b_count = 0;
+ int reject_count = 0;
+ for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+ BLOBNBOX* blob = blob_it.data();
+// if (InsertBlob(true, true, blob, grid)) {
+ if (InsertBlob(h_spread, v_spread, blob, grid)) {
+ ++b_count;
+ } else {
+ ++reject_count;
+ }
+ }
+ if (textord_debug_tabfind) {
+ tprintf("Inserted %d blobs into grid, %d rejected.\n",
+ b_count, reject_count);
+ }
+}
+
+// Insert a single blob into the given grid (not necessarily this).
+// If h_spread, then all cells covered horizontally by the box are
+// used, otherwise, just the bottom-left. Similarly for v_spread.
+// A side effect is that the left and right rule edges of the blob are
+// set according to the tab vectors in this (not grid).
+bool TabFind::InsertBlob(bool h_spread, bool v_spread, BLOBNBOX* blob,
+ BBGrid<BLOBNBOX, BLOBNBOX_CLIST,
+ BLOBNBOX_C_IT>* grid) {
+ TBOX box = blob->bounding_box();
+ blob->set_left_rule(LeftEdgeForBox(box, false, false));
+ blob->set_right_rule(RightEdgeForBox(box, false, false));
+ blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false));
+ blob->set_right_crossing_rule(RightEdgeForBox(box, true, false));
+ if (blob->joined_to_prev())
+ return false;
+ grid->InsertBBox(h_spread, v_spread, blob);
+ return true;
+}
+
+// Calls SetBlobRuleEdges for all the blobs in the given block.
+void TabFind::SetBlockRuleEdges(TO_BLOCK* block) {
+ SetBlobRuleEdges(&block->blobs);
+ SetBlobRuleEdges(&block->small_blobs);
+ SetBlobRuleEdges(&block->noise_blobs);
+ SetBlobRuleEdges(&block->large_blobs);
+}
+
+// Sets the left and right rule and crossing_rules for the blobs in the given
+// list by fiding the next outermost tabvectors for each blob.
+void TabFind::SetBlobRuleEdges(BLOBNBOX_LIST* blobs) {
+ BLOBNBOX_IT blob_it(blobs);
+ for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+ BLOBNBOX* blob = blob_it.data();
+ TBOX box = blob->bounding_box();
+ blob->set_left_rule(LeftEdgeForBox(box, false, false));
+ blob->set_right_rule(RightEdgeForBox(box, false, false));
+ blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false));
+ blob->set_right_crossing_rule(RightEdgeForBox(box, true, false));
+ }
+}
+
+// Returns the gutter width of the given TabVector between the given y limits.
+// Also returns x-shift to be added to the vector to clear any intersecting
+// blobs. The shift is deducted from the returned gutter.
+// If ignore_unmergeables is true, then blobs of UnMergeableType are
+// ignored as if they don't exist. (Used for text on image.)
+// max_gutter_width is used as the maximum width worth searching for in case
+// there is nothing near the TabVector.
+int TabFind::GutterWidth(int bottom_y, int top_y, const TabVector& v,
+ bool ignore_unmergeables, int max_gutter_width,
+ int* required_shift) {
+ bool right_to_left = v.IsLeftTab();
+ int bottom_x = v.XAtY(bottom_y);
+ int top_x = v.XAtY(top_y);
+ int start_x = right_to_left ? std::max(top_x, bottom_x) : std::min(top_x, bottom_x);
+ BlobGridSearch sidesearch(this);
+ sidesearch.StartSideSearch(start_x, bottom_y, top_y);
+ int min_gap = max_gutter_width;
+ *required_shift = 0;
+ BLOBNBOX* blob = nullptr;
+ while ((blob = sidesearch.NextSideSearch(right_to_left)) != nullptr) {
+ const TBOX& box = blob->bounding_box();
+ if (box.bottom() >= top_y || box.top() <= bottom_y)
+ continue; // Doesn't overlap enough.
+ if (box.height() >= gridsize() * 2 &&
+ box.height() > box.width() * kLineFragmentAspectRatio) {
+ // Skip likely separator line residue.
+ continue;
+ }
+ if (ignore_unmergeables && BLOBNBOX::UnMergeableType(blob->region_type()))
+ continue; // Skip non-text if required.
+ int mid_y = (box.bottom() + box.top()) / 2;
+ // We use the x at the mid-y so that the required_shift guarantees
+ // to clear all the blobs on the tab-stop. If we use the min/max
+ // of x at top/bottom of the blob, then exactness would be required,
+ // which is not a good thing.
+ int tab_x = v.XAtY(mid_y);
+ int gap;
+ if (right_to_left) {
+ gap = tab_x - box.right();
+ if (gap < 0 && box.left() - tab_x < *required_shift)
+ *required_shift = box.left() - tab_x;
+ } else {
+ gap = box.left() - tab_x;
+ if (gap < 0 && box.right() - tab_x > *required_shift)
+ *required_shift = box.right() - tab_x;
+ }
+ if (gap > 0 && gap < min_gap)
+ min_gap = gap;
+ }
+ // Result may be negative, in which case, this is a really bad tabstop.
+ return min_gap - abs(*required_shift);
+}
+
+// Find the gutter width and distance to inner neighbour for the given blob.
+void TabFind::GutterWidthAndNeighbourGap(int tab_x, int mean_height,
+ int max_gutter, bool left,
+ BLOBNBOX* bbox, int* gutter_width,
+ int* neighbour_gap) {
+ const TBOX& box = bbox->bounding_box();
+ // The gutter and internal sides of the box.
+ int gutter_x = left ? box.left() : box.right();
+ int internal_x = left ? box.right() : box.left();
+ // On ragged edges, the gutter side of the box is away from the tabstop.
+ int tab_gap = left ? gutter_x - tab_x : tab_x - gutter_x;
+ *gutter_width = max_gutter;
+ // If the box is away from the tabstop, we need to increase
+ // the allowed gutter width.
+ if (tab_gap > 0)
+ *gutter_width += tab_gap;
+ bool debug = WithinTestRegion(2, box.left(), box.bottom());
+ if (debug)
+ tprintf("Looking in gutter\n");
+ // Find the nearest blob on the outside of the column.
+ BLOBNBOX* gutter_bbox = AdjacentBlob(bbox, left,
+ bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0,
+ *gutter_width, box.top(), box.bottom());
+ if (gutter_bbox != nullptr) {
+ const TBOX& gutter_box = gutter_bbox->bounding_box();
+ *gutter_width = left ? tab_x - gutter_box.right()
+ : gutter_box.left() - tab_x;
+ }
+ if (*gutter_width >= max_gutter) {
+ // If there is no box because a tab was in the way, get the tab coord.
+ TBOX gutter_box(box);
+ if (left) {
+ gutter_box.set_left(tab_x - max_gutter - 1);
+ gutter_box.set_right(tab_x - max_gutter);
+ int tab_gutter = RightEdgeForBox(gutter_box, true, false);
+ if (tab_gutter < tab_x - 1)
+ *gutter_width = tab_x - tab_gutter;
+ } else {
+ gutter_box.set_left(tab_x + max_gutter);
+ gutter_box.set_right(tab_x + max_gutter + 1);
+ int tab_gutter = LeftEdgeForBox(gutter_box, true, false);
+ if (tab_gutter > tab_x + 1)
+ *gutter_width = tab_gutter - tab_x;
+ }
+ }
+ if (*gutter_width > max_gutter)
+ *gutter_width = max_gutter;
+ // Now look for a neighbour on the inside.
+ if (debug)
+ tprintf("Looking for neighbour\n");
+ BLOBNBOX* neighbour = AdjacentBlob(bbox, !left,
+ bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0,
+ *gutter_width, box.top(), box.bottom());
+ int neighbour_edge = left ? RightEdgeForBox(box, true, false)
+ : LeftEdgeForBox(box, true, false);
+ if (neighbour != nullptr) {
+ const TBOX& n_box = neighbour->bounding_box();
+ if (debug) {
+ tprintf("Found neighbour:");
+ n_box.print();
+ }
+ if (left && n_box.left() < neighbour_edge)
+ neighbour_edge = n_box.left();
+ else if (!left && n_box.right() > neighbour_edge)
+ neighbour_edge = n_box.right();
+ }
+ *neighbour_gap = left ? neighbour_edge - internal_x
+ : internal_x - neighbour_edge;
+}
+
+// Return the x-coord that corresponds to the right edge for the given
+// box. If there is a rule line to the right that vertically overlaps it,
+// then return the x-coord of the rule line, otherwise return the right
+// edge of the page. For details see RightTabForBox below.
+int TabFind::RightEdgeForBox(const TBOX& box, bool crossing, bool extended) {
+ TabVector* v = RightTabForBox(box, crossing, extended);
+ return v == nullptr ? tright_.x() : v->XAtY((box.top() + box.bottom()) / 2);
+}
+// As RightEdgeForBox, but finds the left Edge instead.
+int TabFind::LeftEdgeForBox(const TBOX& box, bool crossing, bool extended) {
+ TabVector* v = LeftTabForBox(box, crossing, extended);
+ return v == nullptr ? bleft_.x() : v->XAtY((box.top() + box.bottom()) / 2);
+}
+
+// This comment documents how this function works.
+// For its purpose and arguments, see the comment in tabfind.h.
+// TabVectors are stored sorted by perpendicular distance of middle from
+// the global mean vertical vector. Since the individual vectors can have
+// differing directions, their XAtY for a given y is not necessarily in the
+// right order. Therefore the search has to be run with a margin.
+// The middle of a vector that passes through (x,y) cannot be higher than
+// halfway from y to the top, or lower than halfway from y to the bottom
+// of the coordinate range; therefore, the search margin is the range of
+// sort keys between these halfway points. Any vector with a sort key greater
+// than the upper margin must be to the right of x at y, and likewise any
+// vector with a sort key less than the lower margin must pass to the left
+// of x at y.
+TabVector* TabFind::RightTabForBox(const TBOX& box, bool crossing,
+ bool extended) {
+ if (v_it_.empty())
+ return nullptr;
+ int top_y = box.top();
+ int bottom_y = box.bottom();
+ int mid_y = (top_y + bottom_y) / 2;
+ int right = crossing ? (box.left() + box.right()) / 2 : box.right();
+ int min_key, max_key;
+ SetupTabSearch(right, mid_y, &min_key, &max_key);
+ // Position the iterator at the first TabVector with sort_key >= min_key.
+ while (!v_it_.at_first() && v_it_.data()->sort_key() >= min_key)
+ v_it_.backward();
+ while (!v_it_.at_last() && v_it_.data()->sort_key() < min_key)
+ v_it_.forward();
+ // Find the leftmost tab vector that overlaps and has XAtY(mid_y) >= right.
+ TabVector* best_v = nullptr;
+ int best_x = -1;
+ int key_limit = -1;
+ do {
+ TabVector* v = v_it_.data();
+ int x = v->XAtY(mid_y);
+ if (x >= right &&
+ (v->VOverlap(top_y, bottom_y) > 0 ||
+ (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) {
+ if (best_v == nullptr || x < best_x) {
+ best_v = v;
+ best_x = x;
+ // We can guarantee that no better vector can be found if the
+ // sort key exceeds that of the best by max_key - min_key.
+ key_limit = v->sort_key() + max_key - min_key;
+ }
+ }
+ // Break when the search is done to avoid wrapping the iterator and
+ // thereby potentially slowing the next search.
+ if (v_it_.at_last() ||
+ (best_v != nullptr && v->sort_key() > key_limit))
+ break; // Prevent restarting list for next call.
+ v_it_.forward();
+ } while (!v_it_.at_first());
+ return best_v;
+}
+
+// As RightTabForBox, but finds the left TabVector instead.
+TabVector* TabFind::LeftTabForBox(const TBOX& box, bool crossing,
+ bool extended) {
+ if (v_it_.empty())
+ return nullptr;
+ int top_y = box.top();
+ int bottom_y = box.bottom();
+ int mid_y = (top_y + bottom_y) / 2;
+ int left = crossing ? (box.left() + box.right()) / 2 : box.left();
+ int min_key, max_key;
+ SetupTabSearch(left, mid_y, &min_key, &max_key);
+ // Position the iterator at the last TabVector with sort_key <= max_key.
+ while (!v_it_.at_last() && v_it_.data()->sort_key() <= max_key)
+ v_it_.forward();
+ while (!v_it_.at_first() && v_it_.data()->sort_key() > max_key) {
+ v_it_.backward();
+ }
+ // Find the rightmost tab vector that overlaps and has XAtY(mid_y) <= left.
+ TabVector* best_v = nullptr;
+ int best_x = -1;
+ int key_limit = -1;
+ do {
+ TabVector* v = v_it_.data();
+ int x = v->XAtY(mid_y);
+ if (x <= left &&
+ (v->VOverlap(top_y, bottom_y) > 0 ||
+ (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) {
+ if (best_v == nullptr || x > best_x) {
+ best_v = v;
+ best_x = x;
+ // We can guarantee that no better vector can be found if the
+ // sort key is less than that of the best by max_key - min_key.
+ key_limit = v->sort_key() - (max_key - min_key);
+ }
+ }
+ // Break when the search is done to avoid wrapping the iterator and
+ // thereby potentially slowing the next search.
+ if (v_it_.at_first() ||
+ (best_v != nullptr && v->sort_key() < key_limit))
+ break; // Prevent restarting list for next call.
+ v_it_.backward();
+ } while (!v_it_.at_last());
+ return best_v;
+}
+
+// Return true if the given width is close to one of the common
+// widths in column_widths_.
+bool TabFind::CommonWidth(int width) {
+ width /= kColumnWidthFactor;
+ ICOORDELT_IT it(&column_widths_);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ ICOORDELT* w = it.data();
+ if (w->x() - 1 <= width && width <= w->y() + 1)
+ return true;
+ }
+ return false;
+}
+
+// Return true if the sizes are more than a
+// factor of 2 different.
+bool TabFind::DifferentSizes(int size1, int size2) {
+ return size1 > size2 * 2 || size2 > size1 * 2;
+}
+
+// Return true if the sizes are more than a
+// factor of 5 different.
+bool TabFind::VeryDifferentSizes(int size1, int size2) {
+ return size1 > size2 * 5 || size2 > size1 * 5;
+}
+
+///////////////// PROTECTED functions (used by ColumnFinder). //////////////
+
+// Top-level function to find TabVectors in an input page block.
+// Returns false if the detected skew angle is impossible.
+// Applies the detected skew angle to deskew the tabs, blobs and part_grid.
+bool TabFind::FindTabVectors(TabVector_LIST* hlines,
+ BLOBNBOX_LIST* image_blobs, TO_BLOCK* block,
+ int min_gutter_width,
+ double tabfind_aligned_gap_fraction,
+ ColPartitionGrid* part_grid,
+ FCOORD* deskew, FCOORD* reskew) {
+ ScrollView* tab_win = FindInitialTabVectors(image_blobs, min_gutter_width,
+ tabfind_aligned_gap_fraction,
+ block);
+ ComputeColumnWidths(tab_win, part_grid);
+ TabVector::MergeSimilarTabVectors(vertical_skew_, &vectors_, this);
+ SortVectors();
+ CleanupTabs();
+ if (!Deskew(hlines, image_blobs, block, deskew, reskew))
+ return false; // Skew angle is too large.
+ part_grid->Deskew(*deskew);
+ ApplyTabConstraints();
+ #ifndef GRAPHICS_DISABLED
+ if (textord_tabfind_show_finaltabs) {
+ tab_win = MakeWindow(640, 50, "FinalTabs");
+ DisplayBoxes(tab_win);
+ DisplayTabs("FinalTabs", tab_win);
+ tab_win = DisplayTabVectors(tab_win);
+ }
+ #endif // !GRAPHICS_DISABLED
+ return true;
+}
+
+// Top-level function to not find TabVectors in an input page block,
+// but setup for single column mode.
+void TabFind::DontFindTabVectors(BLOBNBOX_LIST* image_blobs, TO_BLOCK* block,
+ FCOORD* deskew, FCOORD* reskew) {
+ InsertBlobsToGrid(false, false, image_blobs, this);
+ InsertBlobsToGrid(true, false, &block->blobs, this);
+ deskew->set_x(1.0f);
+ deskew->set_y(0.0f);
+ reskew->set_x(1.0f);
+ reskew->set_y(0.0f);
+}
+
+// Cleans up the lists of blobs in the block ready for use by TabFind.
+// Large blobs that look like text are moved to the main blobs list.
+// Main blobs that are superseded by the image blobs are deleted.
+void TabFind::TidyBlobs(TO_BLOCK* block) {
+ BLOBNBOX_IT large_it = &block->large_blobs;
+ BLOBNBOX_IT blob_it = &block->blobs;
+ int b_count = 0;
+ for (large_it.mark_cycle_pt(); !large_it.cycled_list(); large_it.forward()) {
+ BLOBNBOX* large_blob = large_it.data();
+ if (large_blob->owner() != nullptr) {
+ blob_it.add_to_end(large_it.extract());
+ ++b_count;
+ }
+ }
+ if (textord_debug_tabfind) {
+ tprintf("Moved %d large blobs to normal list\n",
+ b_count);
+ #ifndef GRAPHICS_DISABLED
+ ScrollView* rej_win = MakeWindow(500, 300, "Image blobs");
+ block->plot_graded_blobs(rej_win);
+ block->plot_noise_blobs(rej_win);
+ rej_win->Update();
+ #endif // !GRAPHICS_DISABLED
+ }
+ block->DeleteUnownedNoise();
+}
+
+// Helper function to setup search limits for *TabForBox.
+void TabFind::SetupTabSearch(int x, int y, int* min_key, int* max_key) {
+ int key1 = TabVector::SortKey(vertical_skew_, x, (y + tright_.y()) / 2);
+ int key2 = TabVector::SortKey(vertical_skew_, x, (y + bleft_.y()) / 2);
+ *min_key = std::min(key1, key2);
+ *max_key = std::max(key1, key2);
+}
+
+#ifndef GRAPHICS_DISABLED
+
+ScrollView* TabFind::DisplayTabVectors(ScrollView* tab_win) {
+ // For every vector, display it.
+ TabVector_IT it(&vectors_);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ TabVector* vector = it.data();
+ vector->Display(tab_win);
+ }
+ tab_win->Update();
+ return tab_win;
+}
+
+#endif
+
+// PRIVATE CODE.
+//
+// First part of FindTabVectors, which may be used twice if the text
+// is mostly of vertical alignment.
+ScrollView* TabFind::FindInitialTabVectors(BLOBNBOX_LIST* image_blobs,
+ int min_gutter_width,
+ double tabfind_aligned_gap_fraction,
+ TO_BLOCK* block) {
+#ifndef GRAPHICS_DISABLED
+ if (textord_tabfind_show_initialtabs) {
+ ScrollView* line_win = MakeWindow(0, 0, "VerticalLines");
+ line_win = DisplayTabVectors(line_win);
+ }
+#endif
+ // Prepare the grid.
+ if (image_blobs != nullptr)
+ InsertBlobsToGrid(true, false, image_blobs, this);
+ InsertBlobsToGrid(true, false, &block->blobs, this);
+ ScrollView* initial_win = FindTabBoxes(min_gutter_width,
+ tabfind_aligned_gap_fraction);
+ FindAllTabVectors(min_gutter_width);
+
+ TabVector::MergeSimilarTabVectors(vertical_skew_, &vectors_, this);
+ SortVectors();
+ EvaluateTabs();
+#ifndef GRAPHICS_DISABLED
+ if (textord_tabfind_show_initialtabs && initial_win != nullptr)
+ initial_win = DisplayTabVectors(initial_win);
+#endif
+ MarkVerticalText();
+ return initial_win;
+}
+
+#ifndef GRAPHICS_DISABLED
+
+// Helper displays all the boxes in the given vector on the given window.
+static void DisplayBoxVector(const GenericVector<BLOBNBOX*>& boxes,
+ ScrollView* win) {
+ for (int i = 0; i < boxes.size(); ++i) {
+ TBOX box = boxes[i]->bounding_box();
+ int left_x = box.left();
+ int right_x = box.right();
+ int top_y = box.top();
+ int bottom_y = box.bottom();
+ ScrollView::Color box_color = boxes[i]->BoxColor();
+ win->Pen(box_color);
+ win->Rectangle(left_x, bottom_y, right_x, top_y);
+ }
+ win->Update();
+}
+
+#endif // !GRAPHICS_DISABLED
+
+// For each box in the grid, decide whether it is a candidate tab-stop,
+// and if so add it to the left/right tab boxes.
+ScrollView* TabFind::FindTabBoxes(int min_gutter_width,
+ double tabfind_aligned_gap_fraction) {
+ left_tab_boxes_.clear();
+ right_tab_boxes_.clear();
+ // For every bbox in the grid, determine whether it uses a tab on an edge.
+ GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> gsearch(this);
+ gsearch.StartFullSearch();
+ BLOBNBOX* bbox;
+ while ((bbox = gsearch.NextFullSearch()) != nullptr) {
+ if (TestBoxForTabs(bbox, min_gutter_width, tabfind_aligned_gap_fraction)) {
+ // If it is any kind of tab, insert it into the vectors.
+ if (bbox->left_tab_type() != TT_NONE)
+ left_tab_boxes_.push_back(bbox);
+ if (bbox->right_tab_type() != TT_NONE)
+ right_tab_boxes_.push_back(bbox);
+ }
+ }
+ // Sort left tabs by left and right by right to see the outermost one first
+ // on a ragged tab.
+ left_tab_boxes_.sort(SortByBoxLeft<BLOBNBOX>);
+ right_tab_boxes_.sort(SortRightToLeft<BLOBNBOX>);
+ ScrollView* tab_win = nullptr;
+ #ifndef GRAPHICS_DISABLED
+ if (textord_tabfind_show_initialtabs) {
+ tab_win = MakeWindow(0, 100, "InitialTabs");
+ tab_win->Pen(ScrollView::BLUE);
+ tab_win->Brush(ScrollView::NONE);
+ // Display the left and right tab boxes.
+ DisplayBoxVector(left_tab_boxes_, tab_win);
+ DisplayBoxVector(right_tab_boxes_, tab_win);
+ tab_win = DisplayTabs("Tabs", tab_win);
+ }
+ #endif // !GRAPHICS_DISABLED
+ return tab_win;
+}
+
+bool TabFind::TestBoxForTabs(BLOBNBOX* bbox, int min_gutter_width,
+ double tabfind_aligned_gap_fraction) {
+ GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> radsearch(this);
+ TBOX box = bbox->bounding_box();
+ // If there are separator lines, get the column edges.
+ int left_column_edge = bbox->left_rule();
+ int right_column_edge = bbox->right_rule();
+ // The edges of the bounding box of the blob being processed.
+ int left_x = box.left();
+ int right_x = box.right();
+ int top_y = box.top();
+ int bottom_y = box.bottom();
+ int height = box.height();
+ bool debug = WithinTestRegion(3, left_x, top_y);
+ if (debug) {
+ tprintf("Column edges for blob at (%d,%d)->(%d,%d) are [%d, %d]\n",
+ left_x, top_y, right_x, bottom_y,
+ left_column_edge, right_column_edge);
+ }
+ // Compute a search radius based on a multiple of the height.
+ int radius = (height * kTabRadiusFactor + gridsize_ - 1) / gridsize_;
+ radsearch.StartRadSearch((left_x + right_x)/2, (top_y + bottom_y)/2, radius);
+ // In Vertical Page mode, once we have an estimate of the vertical line
+ // spacing, the minimum amount of gutter space before a possible tab is
+ // increased under the assumption that column partition is always larger
+ // than line spacing.
+ int min_spacing =
+ static_cast<int>(height * tabfind_aligned_gap_fraction);
+ if (min_gutter_width > min_spacing)
+ min_spacing = min_gutter_width;
+ int min_ragged_gutter = kRaggedGutterMultiple * gridsize();
+ if (min_gutter_width > min_ragged_gutter)
+ min_ragged_gutter = min_gutter_width;
+ int target_right = left_x - min_spacing;
+ int target_left = right_x + min_spacing;
+ // We will be evaluating whether the left edge could be a left tab, and
+ // whether the right edge could be a right tab.
+ // A box can be a tab if its bool is_(left/right)_tab remains true, meaning
+ // that no blobs have been found in the gutter during the radial search.
+ // A box can also be a tab if there are objects in the gutter only above
+ // or only below, and there are aligned objects on the opposite side, but
+ // not too many unaligned objects. The maybe_(left/right)_tab_up counts
+ // aligned objects above and negatively counts unaligned objects above,
+ // and is set to -INT32_MAX if a gutter object is found above.
+ // The other 3 maybe ints work similarly for the other sides.
+ // These conditions are very strict, to minimize false positives, and really
+ // only aligned tabs and outermost ragged tab blobs will qualify, so we
+ // also have maybe_ragged_left/right with less stringent rules.
+ // A blob that is maybe_ragged_left/right will be further qualified later,
+ // using the min_ragged_gutter.
+ bool is_left_tab = true;
+ bool is_right_tab = true;
+ bool maybe_ragged_left = true;
+ bool maybe_ragged_right = true;
+ int maybe_left_tab_up = 0;
+ int maybe_right_tab_up = 0;
+ int maybe_left_tab_down = 0;
+ int maybe_right_tab_down = 0;
+ if (bbox->leader_on_left()) {
+ is_left_tab = false;
+ maybe_ragged_left = false;
+ maybe_left_tab_up = -INT32_MAX;
+ maybe_left_tab_down = -INT32_MAX;
+ }
+ if (bbox->leader_on_right()) {
+ is_right_tab = false;
+ maybe_ragged_right = false;
+ maybe_right_tab_up = -INT32_MAX;
+ maybe_right_tab_down = -INT32_MAX;
+ }
+ int alignment_tolerance = static_cast<int>(resolution_ * kAlignedFraction);
+ BLOBNBOX* neighbour = nullptr;
+ while ((neighbour = radsearch.NextRadSearch()) != nullptr) {
+ if (neighbour == bbox)
+ continue;
+ TBOX nbox = neighbour->bounding_box();
+ int n_left = nbox.left();
+ int n_right = nbox.right();
+ if (debug)
+ tprintf("Neighbour at (%d,%d)->(%d,%d)\n",
+ n_left, nbox.bottom(), n_right, nbox.top());
+ // If the neighbouring blob is the wrong side of a separator line, then it
+ // "doesn't exist" as far as we are concerned.
+ if (n_right > right_column_edge || n_left < left_column_edge ||
+ left_x < neighbour->left_rule() || right_x > neighbour->right_rule())
+ continue; // Separator line in the way.
+ int n_mid_x = (n_left + n_right) / 2;
+ int n_mid_y = (nbox.top() + nbox.bottom()) / 2;
+ if (n_mid_x <= left_x && n_right >= target_right) {
+ if (debug)
+ tprintf("Not a left tab\n");
+ is_left_tab = false;
+ if (n_mid_y < top_y)
+ maybe_left_tab_down = -INT32_MAX;
+ if (n_mid_y > bottom_y)
+ maybe_left_tab_up = -INT32_MAX;
+ } else if (NearlyEqual(left_x, n_left, alignment_tolerance)) {
+ if (debug)
+ tprintf("Maybe a left tab\n");
+ if (n_mid_y > top_y && maybe_left_tab_up > -INT32_MAX)
+ ++maybe_left_tab_up;
+ if (n_mid_y < bottom_y && maybe_left_tab_down > -INT32_MAX)
+ ++maybe_left_tab_down;
+ } else if (n_left < left_x && n_right >= left_x) {
+ // Overlaps but not aligned so negative points on a maybe.
+ if (debug)
+ tprintf("Maybe Not a left tab\n");
+ if (n_mid_y > top_y && maybe_left_tab_up > -INT32_MAX)
+ --maybe_left_tab_up;
+ if (n_mid_y < bottom_y && maybe_left_tab_down > -INT32_MAX)
+ --maybe_left_tab_down;
+ }
+ if (n_left < left_x && nbox.y_overlap(box) && n_right >= target_right) {
+ maybe_ragged_left = false;
+ if (debug)
+ tprintf("Not a ragged left\n");
+ }
+ if (n_mid_x >= right_x && n_left <= target_left) {
+ if (debug)
+ tprintf("Not a right tab\n");
+ is_right_tab = false;
+ if (n_mid_y < top_y)
+ maybe_right_tab_down = -INT32_MAX;
+ if (n_mid_y > bottom_y)
+ maybe_right_tab_up = -INT32_MAX;
+ } else if (NearlyEqual(right_x, n_right, alignment_tolerance)) {
+ if (debug)
+ tprintf("Maybe a right tab\n");
+ if (n_mid_y > top_y && maybe_right_tab_up > -INT32_MAX)
+ ++maybe_right_tab_up;
+ if (n_mid_y < bottom_y && maybe_right_tab_down > -INT32_MAX)
+ ++maybe_right_tab_down;
+ } else if (n_right > right_x && n_left <= right_x) {
+ // Overlaps but not aligned so negative points on a maybe.
+ if (debug)
+ tprintf("Maybe Not a right tab\n");
+ if (n_mid_y > top_y && maybe_right_tab_up > -INT32_MAX)
+ --maybe_right_tab_up;
+ if (n_mid_y < bottom_y && maybe_right_tab_down > -INT32_MAX)
+ --maybe_right_tab_down;
+ }
+ if (n_right > right_x && nbox.y_overlap(box) && n_left <= target_left) {
+ maybe_ragged_right = false;
+ if (debug)
+ tprintf("Not a ragged right\n");
+ }
+ if (maybe_left_tab_down == -INT32_MAX && maybe_left_tab_up == -INT32_MAX &&
+ maybe_right_tab_down == -INT32_MAX && maybe_right_tab_up == -INT32_MAX)
+ break;
+ }
+ if (is_left_tab || maybe_left_tab_up > 1 || maybe_left_tab_down > 1) {
+ bbox->set_left_tab_type(TT_MAYBE_ALIGNED);
+ } else if (maybe_ragged_left && ConfirmRaggedLeft(bbox, min_ragged_gutter)) {
+ bbox->set_left_tab_type(TT_MAYBE_RAGGED);
+ } else {
+ bbox->set_left_tab_type(TT_NONE);
+ }
+ if (is_right_tab || maybe_right_tab_up > 1 || maybe_right_tab_down > 1) {
+ bbox->set_right_tab_type(TT_MAYBE_ALIGNED);
+ } else if (maybe_ragged_right &&
+ ConfirmRaggedRight(bbox, min_ragged_gutter)) {
+ bbox->set_right_tab_type(TT_MAYBE_RAGGED);
+ } else {
+ bbox->set_right_tab_type(TT_NONE);
+ }
+ if (debug) {
+ tprintf("Left result = %s, Right result=%s\n",
+ bbox->left_tab_type() == TT_MAYBE_ALIGNED ? "Aligned" :
+ (bbox->left_tab_type() == TT_MAYBE_RAGGED ? "Ragged" : "None"),
+ bbox->right_tab_type() == TT_MAYBE_ALIGNED ? "Aligned" :
+ (bbox->right_tab_type() == TT_MAYBE_RAGGED ? "Ragged" : "None"));
+ }
+ return bbox->left_tab_type() != TT_NONE || bbox->right_tab_type() != TT_NONE;
+}
+
+// Returns true if there is nothing in the rectangle of width min_gutter to
+// the left of bbox.
+bool TabFind::ConfirmRaggedLeft(BLOBNBOX* bbox, int min_gutter) {
+ TBOX search_box(bbox->bounding_box());
+ search_box.set_right(search_box.left());
+ search_box.set_left(search_box.left() - min_gutter);
+ return NothingYOverlapsInBox(search_box, bbox->bounding_box());
+}
+
+// Returns true if there is nothing in the rectangle of width min_gutter to
+// the right of bbox.
+bool TabFind::ConfirmRaggedRight(BLOBNBOX* bbox, int min_gutter) {
+ TBOX search_box(bbox->bounding_box());
+ search_box.set_left(search_box.right());
+ search_box.set_right(search_box.right() + min_gutter);
+ return NothingYOverlapsInBox(search_box, bbox->bounding_box());
+}
+
+// Returns true if there is nothing in the given search_box that vertically
+// overlaps target_box other than target_box itself.
+bool TabFind::NothingYOverlapsInBox(const TBOX& search_box,
+ const TBOX& target_box) {
+ BlobGridSearch rsearch(this);
+ rsearch.StartRectSearch(search_box);
+ BLOBNBOX* blob;
+ while ((blob = rsearch.NextRectSearch()) != nullptr) {
+ const TBOX& box = blob->bounding_box();
+ if (box.y_overlap(target_box) && !(box == target_box))
+ return false;
+ }
+ return true;
+}
+
+void TabFind::FindAllTabVectors(int min_gutter_width) {
+ // A list of vectors that will be created in estimating the skew.
+ TabVector_LIST dummy_vectors;
+ // An estimate of the vertical direction, revised as more lines are added.
+ int vertical_x = 0;
+ int vertical_y = 1;
+ // Find an estimate of the vertical direction by finding some tab vectors.
+ // Slowly up the search size until we get some vectors.
+ for (int search_size = kMinVerticalSearch; search_size < kMaxVerticalSearch;
+ search_size += kMinVerticalSearch) {
+ int vector_count = FindTabVectors(search_size, TA_LEFT_ALIGNED,
+ min_gutter_width,
+ &dummy_vectors,
+ &vertical_x, &vertical_y);
+ vector_count += FindTabVectors(search_size, TA_RIGHT_ALIGNED,
+ min_gutter_width,
+ &dummy_vectors,
+ &vertical_x, &vertical_y);
+ if (vector_count > 0)
+ break;
+ }
+ // Get rid of the test vectors and reset the types of the tabs.
+ dummy_vectors.clear();
+ for (int i = 0; i < left_tab_boxes_.size(); ++i) {
+ BLOBNBOX* bbox = left_tab_boxes_[i];
+ if (bbox->left_tab_type() == TT_CONFIRMED)
+ bbox->set_left_tab_type(TT_MAYBE_ALIGNED);
+ }
+ for (int i = 0; i < right_tab_boxes_.size(); ++i) {
+ BLOBNBOX* bbox = right_tab_boxes_[i];
+ if (bbox->right_tab_type() == TT_CONFIRMED)
+ bbox->set_right_tab_type(TT_MAYBE_ALIGNED);
+ }
+ if (textord_debug_tabfind) {
+ tprintf("Beginning real tab search with vertical = %d,%d...\n",
+ vertical_x, vertical_y);
+ }
+ // Now do the real thing ,but keep the vectors in the dummy_vectors list
+ // until they are all done, so we don't get the tab vectors confused with
+ // the rule line vectors.
+ FindTabVectors(kMaxVerticalSearch, TA_LEFT_ALIGNED, min_gutter_width,
+ &dummy_vectors, &vertical_x, &vertical_y);
+ FindTabVectors(kMaxVerticalSearch, TA_RIGHT_ALIGNED, min_gutter_width,
+ &dummy_vectors, &vertical_x, &vertical_y);
+ FindTabVectors(kMaxRaggedSearch, TA_LEFT_RAGGED, min_gutter_width,
+ &dummy_vectors, &vertical_x, &vertical_y);
+ FindTabVectors(kMaxRaggedSearch, TA_RIGHT_RAGGED, min_gutter_width,
+ &dummy_vectors, &vertical_x, &vertical_y);
+ // Now add the vectors to the vectors_ list.
+ TabVector_IT v_it(&vectors_);
+ v_it.add_list_after(&dummy_vectors);
+ // Now use the summed (mean) vertical vector as the direction for everything.
+ SetVerticalSkewAndParallelize(vertical_x, vertical_y);
+}
+
+// Helper for FindAllTabVectors finds the vectors of a particular type.
+int TabFind::FindTabVectors(int search_size_multiple, TabAlignment alignment,
+ int min_gutter_width, TabVector_LIST* vectors,
+ int* vertical_x, int* vertical_y) {
+ TabVector_IT vector_it(vectors);
+ int vector_count = 0;
+ // Search the right or left tab boxes, looking for tab vectors.
+ bool right = alignment == TA_RIGHT_ALIGNED || alignment == TA_RIGHT_RAGGED;
+ const GenericVector<BLOBNBOX*>& boxes = right ? right_tab_boxes_
+ : left_tab_boxes_;
+ for (int i = 0; i < boxes.size(); ++i) {
+ BLOBNBOX* bbox = boxes[i];
+ if ((!right && bbox->left_tab_type() == TT_MAYBE_ALIGNED) ||
+ (right && bbox->right_tab_type() == TT_MAYBE_ALIGNED)) {
+ TabVector* vector = FindTabVector(search_size_multiple, min_gutter_width,
+ alignment,
+ bbox, vertical_x, vertical_y);
+ if (vector != nullptr) {
+ ++vector_count;
+ vector_it.add_to_end(vector);
+ }
+ }
+ }
+ return vector_count;
+}
+
+// Finds a vector corresponding to a tabstop running through the
+// given box of the given alignment type.
+// search_size_multiple is a multiple of height used to control
+// the size of the search.
+// vertical_x and y are updated with an estimate of the real
+// vertical direction. (skew finding.)
+// Returns nullptr if no decent tabstop can be found.
+TabVector* TabFind::FindTabVector(int search_size_multiple,
+ int min_gutter_width,
+ TabAlignment alignment,
+ BLOBNBOX* bbox,
+ int* vertical_x, int* vertical_y) {
+ int height = std::max(static_cast<int>(bbox->bounding_box().height()), gridsize());
+ AlignedBlobParams align_params(*vertical_x, *vertical_y,
+ height,
+ search_size_multiple, min_gutter_width,
+ resolution_, alignment);
+ // FindVerticalAlignment is in the parent (AlignedBlob) class.
+ return FindVerticalAlignment(align_params, bbox, vertical_x, vertical_y);
+}
+
+// Set the vertical_skew_ member from the given vector and refit
+// all vectors parallel to the skew vector.
+void TabFind::SetVerticalSkewAndParallelize(int vertical_x, int vertical_y) {
+ // Fit the vertical vector into an ICOORD, which is 16 bit.
+ vertical_skew_.set_with_shrink(vertical_x, vertical_y);
+ if (textord_debug_tabfind)
+ tprintf("Vertical skew vector=(%d,%d)\n",
+ vertical_skew_.x(), vertical_skew_.y());
+ v_it_.set_to_list(&vectors_);
+ for (v_it_.mark_cycle_pt(); !v_it_.cycled_list(); v_it_.forward()) {
+ TabVector* v = v_it_.data();
+ v->Fit(vertical_skew_, true);
+ }
+ // Now sort the vectors as their direction has potentially changed.
+ SortVectors();
+}
+
+// Sort all the current vectors using the given vertical direction vector.
+void TabFind::SortVectors() {
+ vectors_.sort(TabVector::SortVectorsByKey);
+ v_it_.set_to_list(&vectors_);
+}
+
+// Evaluate all the current tab vectors.
+void TabFind::EvaluateTabs() {
+ TabVector_IT rule_it(&vectors_);
+ for (rule_it.mark_cycle_pt(); !rule_it.cycled_list(); rule_it.forward()) {
+ TabVector* tab = rule_it.data();
+ if (!tab->IsSeparator()) {
+ tab->Evaluate(vertical_skew_, this);
+ if (tab->BoxCount() < kMinEvaluatedTabs) {
+ if (textord_debug_tabfind > 2)
+ tab->Print("Too few boxes");
+ delete rule_it.extract();
+ v_it_.set_to_list(&vectors_);
+ } else if (WithinTestRegion(3, tab->startpt().x(), tab->startpt().y())) {
+ tab->Print("Evaluated tab");
+ }
+ }
+ }
+}
+
+// Trace textlines from one side to the other of each tab vector, saving
+// the most frequent column widths found in a list so that a given width
+// can be tested for being a common width with a simple callback function.
+void TabFind::ComputeColumnWidths(ScrollView* tab_win,
+ ColPartitionGrid* part_grid) {
+ #ifndef GRAPHICS_DISABLED
+ if (tab_win != nullptr)
+ tab_win->Pen(ScrollView::WHITE);
+ #endif // !GRAPHICS_DISABLED
+ // Accumulate column sections into a STATS
+ int col_widths_size = (tright_.x() - bleft_.x()) / kColumnWidthFactor;
+ STATS col_widths(0, col_widths_size + 1);
+ ApplyPartitionsToColumnWidths(part_grid, &col_widths);
+ #ifndef GRAPHICS_DISABLED
+ if (tab_win != nullptr) {
+ tab_win->Update();
+ }
+ #endif // !GRAPHICS_DISABLED
+ if (textord_debug_tabfind > 1)
+ col_widths.print();
+ // Now make a list of column widths.
+ MakeColumnWidths(col_widths_size, &col_widths);
+ // Turn the column width into a range.
+ ApplyPartitionsToColumnWidths(part_grid, nullptr);
+}
+
+// Finds column width and:
+// if col_widths is not null (pass1):
+// pair-up tab vectors with existing ColPartitions and accumulate widths.
+// else (pass2):
+// find the largest real partition width for each recorded column width,
+// to be used as the minimum acceptable width.
+void TabFind::ApplyPartitionsToColumnWidths(ColPartitionGrid* part_grid,
+ STATS* col_widths) {
+ // For every ColPartition in the part_grid, add partners to the tabvectors
+ // and accumulate the column widths.
+ ColPartitionGridSearch gsearch(part_grid);
+ gsearch.StartFullSearch();
+ ColPartition* part;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ BLOBNBOX_C_IT blob_it(part->boxes());
+ if (blob_it.empty())
+ continue;
+ BLOBNBOX* left_blob = blob_it.data();
+ blob_it.move_to_last();
+ BLOBNBOX* right_blob = blob_it.data();
+ TabVector* left_vector = LeftTabForBox(left_blob->bounding_box(),
+ true, false);
+ if (left_vector == nullptr || left_vector->IsRightTab())
+ continue;
+ TabVector* right_vector = RightTabForBox(right_blob->bounding_box(),
+ true, false);
+ if (right_vector == nullptr || right_vector->IsLeftTab())
+ continue;
+
+ int line_left = left_vector->XAtY(left_blob->bounding_box().bottom());
+ int line_right = right_vector->XAtY(right_blob->bounding_box().bottom());
+ // Add to STATS of measurements if the width is significant.
+ int width = line_right - line_left;
+ if (col_widths != nullptr) {
+ AddPartnerVector(left_blob, right_blob, left_vector, right_vector);
+ if (width >= kMinColumnWidth)
+ col_widths->add(width / kColumnWidthFactor, 1);
+ } else {
+ width /= kColumnWidthFactor;
+ ICOORDELT_IT it(&column_widths_);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ ICOORDELT* w = it.data();
+ if (NearlyEqual<int>(width, w->y(), 1)) {
+ int true_width = part->bounding_box().width() / kColumnWidthFactor;
+ if (true_width <= w->y() && true_width > w->x())
+ w->set_x(true_width);
+ break;
+ }
+ }
+ }
+ }
+}
+
+// Helper makes the list of common column widths in column_widths_ from the
+// input col_widths. Destroys the content of col_widths by repeatedly
+// finding the mode and erasing the peak.
+void TabFind::MakeColumnWidths(int col_widths_size, STATS* col_widths) {
+ ICOORDELT_IT w_it(&column_widths_);
+ int total_col_count = col_widths->get_total();
+ while (col_widths->get_total() > 0) {
+ int width = col_widths->mode();
+ int col_count = col_widths->pile_count(width);
+ col_widths->add(width, -col_count);
+ // Get the entire peak.
+ for (int left = width - 1; left > 0 &&
+ col_widths->pile_count(left) > 0;
+ --left) {
+ int new_count = col_widths->pile_count(left);
+ col_count += new_count;
+ col_widths->add(left, -new_count);
+ }
+ for (int right = width + 1; right < col_widths_size &&
+ col_widths->pile_count(right) > 0;
+ ++right) {
+ int new_count = col_widths->pile_count(right);
+ col_count += new_count;
+ col_widths->add(right, -new_count);
+ }
+ if (col_count > kMinLinesInColumn &&
+ col_count > kMinFractionalLinesInColumn * total_col_count) {
+ auto* w = new ICOORDELT(0, width);
+ w_it.add_after_then_move(w);
+ if (textord_debug_tabfind)
+ tprintf("Column of width %d has %d = %.2f%% lines\n",
+ width * kColumnWidthFactor, col_count,
+ 100.0 * col_count / total_col_count);
+ }
+ }
+}
+
+// Mark blobs as being in a vertical text line where that is the case.
+// Returns true if the majority of the image is vertical text lines.
+void TabFind::MarkVerticalText() {
+ if (textord_debug_tabfind)
+ tprintf("Checking for vertical lines\n");
+ BlobGridSearch gsearch(this);
+ gsearch.StartFullSearch();
+ BLOBNBOX* blob = nullptr;
+ while ((blob = gsearch.NextFullSearch()) != nullptr) {
+ if (blob->region_type() < BRT_UNKNOWN)
+ continue;
+ if (blob->UniquelyVertical()) {
+ blob->set_region_type(BRT_VERT_TEXT);
+ }
+ }
+}
+
+int TabFind::FindMedianGutterWidth(TabVector_LIST *lines) {
+ TabVector_IT it(lines);
+ int prev_right = -1;
+ int max_gap = static_cast<int>(kMaxGutterWidthAbsolute * resolution_);
+ STATS gaps(0, max_gap);
+ STATS heights(0, max_gap);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ TabVector* v = it.data();
+ TabVector* partner = v->GetSinglePartner();
+ if (!v->IsLeftTab() || v->IsSeparator() || !partner) continue;
+ heights.add(partner->startpt().x() - v->startpt().x(), 1);
+ if (prev_right > 0 && v->startpt().x() > prev_right) {
+ gaps.add(v->startpt().x() - prev_right, 1);
+ }
+ prev_right = partner->startpt().x();
+ }
+ if (textord_debug_tabfind)
+ tprintf("TabGutter total %d median_gap %.2f median_hgt %.2f\n",
+ gaps.get_total(), gaps.median(), heights.median());
+ if (gaps.get_total() < kMinLinesInColumn) return 0;
+ return static_cast<int>(gaps.median());
+}
+
+// Find the next adjacent (looking to the left or right) blob on this text
+// line, with the constraint that it must vertically significantly overlap
+// the [top_y, bottom_y] range.
+// If ignore_images is true, then blobs with aligned_text() < 0 are treated
+// as if they do not exist.
+BLOBNBOX* TabFind::AdjacentBlob(const BLOBNBOX* bbox,
+ bool look_left, bool ignore_images,
+ double min_overlap_fraction,
+ int gap_limit, int top_y, int bottom_y) {
+ GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> sidesearch(this);
+ const TBOX& box = bbox->bounding_box();
+ int left = box.left();
+ int right = box.right();
+ int mid_x = (left + right) / 2;
+ sidesearch.StartSideSearch(mid_x, bottom_y, top_y);
+ int best_gap = 0;
+ bool debug = WithinTestRegion(3, left, bottom_y);
+ BLOBNBOX* result = nullptr;
+ BLOBNBOX* neighbour = nullptr;
+ while ((neighbour = sidesearch.NextSideSearch(look_left)) != nullptr) {
+ if (debug) {
+ tprintf("Adjacent blob: considering box:");
+ neighbour->bounding_box().print();
+ }
+ if (neighbour == bbox ||
+ (ignore_images && neighbour->region_type() < BRT_UNKNOWN))
+ continue;
+ const TBOX& nbox = neighbour->bounding_box();
+ int n_top_y = nbox.top();
+ int n_bottom_y = nbox.bottom();
+ int v_overlap = std::min(n_top_y, top_y) - std::max(n_bottom_y, bottom_y);
+ int height = top_y - bottom_y;
+ int n_height = n_top_y - n_bottom_y;
+ if (v_overlap > min_overlap_fraction * std::min(height, n_height) &&
+ (min_overlap_fraction == 0.0 || !DifferentSizes(height, n_height))) {
+ int n_left = nbox.left();
+ int n_right = nbox.right();
+ int h_gap = std::max(n_left, left) - std::min(n_right, right);
+ int n_mid_x = (n_left + n_right) / 2;
+ if (look_left == (n_mid_x < mid_x) && n_mid_x != mid_x) {
+ if (h_gap > gap_limit) {
+ // Hit a big gap before next tab so don't return anything.
+ if (debug)
+ tprintf("Giving up due to big gap = %d vs %d\n",
+ h_gap, gap_limit);
+ return result;
+ }
+ if (h_gap > 0 && (look_left ? neighbour->right_tab_type()
+ : neighbour->left_tab_type()) >= TT_CONFIRMED) {
+ // Hit a tab facing the wrong way. Stop in case we are crossing
+ // the column boundary.
+ if (debug)
+ tprintf("Collision with like tab of type %d at %d,%d\n",
+ look_left ? neighbour->right_tab_type()
+ : neighbour->left_tab_type(),
+ n_left, nbox.bottom());
+ return result;
+ }
+ // This is a good fit to the line. Continue with this
+ // neighbour as the bbox if the best gap.
+ if (result == nullptr || h_gap < best_gap) {
+ if (debug)
+ tprintf("Good result\n");
+ result = neighbour;
+ best_gap = h_gap;
+ } else {
+ // The new one is worse, so we probably already have the best result.
+ return result;
+ }
+ } else if (debug) {
+ tprintf("Wrong way\n");
+ }
+ } else if (debug) {
+ tprintf("Insufficient overlap\n");
+ }
+ }
+ if (WithinTestRegion(3, left, box.top()))
+ tprintf("Giving up due to end of search\n");
+ return result; // Hit the edge and found nothing.
+}
+
+// Add a bi-directional partner relationship between the left
+// and the right. If one (or both) of the vectors is a separator,
+// extend a nearby extendable vector or create a new one of the
+// correct type, using the given left or right blob as a guide.
+void TabFind::AddPartnerVector(BLOBNBOX* left_blob, BLOBNBOX* right_blob,
+ TabVector* left, TabVector* right) {
+ const TBOX& left_box = left_blob->bounding_box();
+ const TBOX& right_box = right_blob->bounding_box();
+ if (left->IsSeparator()) {
+ // Try to find a nearby left edge to extend.
+ TabVector* v = LeftTabForBox(left_box, true, true);
+ if (v != nullptr && v != left && v->IsLeftTab() &&
+ v->XAtY(left_box.top()) > left->XAtY(left_box.top())) {
+ left = v; // Found a good replacement.
+ left->ExtendToBox(left_blob);
+ } else {
+ // Fake a vector.
+ left = new TabVector(*left, TA_LEFT_RAGGED, vertical_skew_, left_blob);
+ vectors_.add_sorted(TabVector::SortVectorsByKey, left);
+ v_it_.move_to_first();
+ }
+ }
+ if (right->IsSeparator()) {
+ // Try to find a nearby left edge to extend.
+ if (WithinTestRegion(3, right_box.right(), right_box.bottom())) {
+ tprintf("Box edge (%d,%d-%d)",
+ right_box.right(), right_box.bottom(), right_box.top());
+ right->Print(" looking for improvement for");
+ }
+ TabVector* v = RightTabForBox(right_box, true, true);
+ if (v != nullptr && v != right && v->IsRightTab() &&
+ v->XAtY(right_box.top()) < right->XAtY(right_box.top())) {
+ right = v; // Found a good replacement.
+ right->ExtendToBox(right_blob);
+ if (WithinTestRegion(3, right_box.right(), right_box.bottom())) {
+ right->Print("Extended vector");
+ }
+ } else {
+ // Fake a vector.
+ right = new TabVector(*right, TA_RIGHT_RAGGED, vertical_skew_,
+ right_blob);
+ vectors_.add_sorted(TabVector::SortVectorsByKey, right);
+ v_it_.move_to_first();
+ if (WithinTestRegion(3, right_box.right(), right_box.bottom())) {
+ right->Print("Created new vector");
+ }
+ }
+ }
+ left->AddPartner(right);
+ right->AddPartner(left);
+}
+
+// Remove separators and unused tabs from the main vectors_ list
+// to the dead_vectors_ list.
+void TabFind::CleanupTabs() {
+ // TODO(rays) Before getting rid of separators and unused vectors, it
+ // would be useful to try moving ragged vectors outwards to see if this
+ // allows useful extension. Could be combined with checking ends of partners.
+ TabVector_IT it(&vectors_);
+ TabVector_IT dead_it(&dead_vectors_);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ TabVector* v = it.data();
+ if (v->IsSeparator() || v->Partnerless()) {
+ dead_it.add_after_then_move(it.extract());
+ v_it_.set_to_list(&vectors_);
+ } else {
+ v->FitAndEvaluateIfNeeded(vertical_skew_, this);
+ }
+ }
+}
+
+// Apply the given rotation to the given list of blobs.
+void TabFind::RotateBlobList(const FCOORD& rotation, BLOBNBOX_LIST* blobs) {
+ BLOBNBOX_IT it(blobs);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ it.data()->rotate_box(rotation);
+ }
+}
+
+// Recreate the grid with deskewed BLOBNBOXes.
+// Returns false if the detected skew angle is impossible.
+bool TabFind::Deskew(TabVector_LIST* hlines, BLOBNBOX_LIST* image_blobs,
+ TO_BLOCK* block, FCOORD* deskew, FCOORD* reskew) {
+ ComputeDeskewVectors(deskew, reskew);
+ if (deskew->x() < kCosMaxSkewAngle)
+ return false;
+ RotateBlobList(*deskew, image_blobs);
+ RotateBlobList(*deskew, &block->blobs);
+ RotateBlobList(*deskew, &block->small_blobs);
+ RotateBlobList(*deskew, &block->noise_blobs);
+
+ // Rotate the horizontal vectors. The vertical vectors don't need
+ // rotating as they can just be refitted.
+ TabVector_IT h_it(hlines);
+ for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
+ TabVector* h = h_it.data();
+ h->Rotate(*deskew);
+ }
+ TabVector_IT d_it(&dead_vectors_);
+ for (d_it.mark_cycle_pt(); !d_it.cycled_list(); d_it.forward()) {
+ TabVector* d = d_it.data();
+ d->Rotate(*deskew);
+ }
+ SetVerticalSkewAndParallelize(0, 1);
+ // Rebuild the grid to the new size.
+ TBOX grid_box(bleft_, tright_);
+ grid_box.rotate_large(*deskew);
+ Init(gridsize(), grid_box.botleft(), grid_box.topright());
+ InsertBlobsToGrid(false, false, image_blobs, this);
+ InsertBlobsToGrid(true, false, &block->blobs, this);
+ return true;
+}
+
+// Flip the vertical and horizontal lines and rotate the grid ready
+// for working on the rotated image.
+// This also makes parameter adjustments for FindInitialTabVectors().
+void TabFind::ResetForVerticalText(const FCOORD& rotate, const FCOORD& rerotate,
+ TabVector_LIST* horizontal_lines,
+ int* min_gutter_width) {
+ // Rotate the horizontal and vertical vectors and swap them over.
+ // Only the separators are kept and rotated; other tabs are used
+ // to estimate the gutter width then thrown away.
+ TabVector_LIST ex_verticals;
+ TabVector_IT ex_v_it(&ex_verticals);
+ TabVector_LIST vlines;
+ TabVector_IT v_it(&vlines);
+ while (!v_it_.empty()) {
+ TabVector* v = v_it_.extract();
+ if (v->IsSeparator()) {
+ v->Rotate(rotate);
+ ex_v_it.add_after_then_move(v);
+ } else {
+ v_it.add_after_then_move(v);
+ }
+ v_it_.forward();
+ }
+
+ // Adjust the min gutter width for better tabbox selection
+ // in 2nd call to FindInitialTabVectors().
+ int median_gutter = FindMedianGutterWidth(&vlines);
+ if (median_gutter > *min_gutter_width)
+ *min_gutter_width = median_gutter;
+
+ TabVector_IT h_it(horizontal_lines);
+ for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
+ TabVector* h = h_it.data();
+ h->Rotate(rotate);
+ }
+ v_it_.add_list_after(horizontal_lines);
+ v_it_.move_to_first();
+ h_it.set_to_list(horizontal_lines);
+ h_it.add_list_after(&ex_verticals);
+
+ // Rebuild the grid to the new size.
+ TBOX grid_box(bleft(), tright());
+ grid_box.rotate_large(rotate);
+ Init(gridsize(), grid_box.botleft(), grid_box.topright());
+}
+
+// Clear the grid and get rid of the tab vectors, but not separators,
+// ready to start again.
+void TabFind::Reset() {
+ v_it_.move_to_first();
+ for (v_it_.mark_cycle_pt(); !v_it_.cycled_list(); v_it_.forward()) {
+ if (!v_it_.data()->IsSeparator())
+ delete v_it_.extract();
+ }
+ Clear();
+}
+
+// Reflect the separator tab vectors and the grids in the y-axis.
+// Can only be called after Reset!
+void TabFind::ReflectInYAxis() {
+ TabVector_LIST temp_list;
+ TabVector_IT temp_it(&temp_list);
+ v_it_.move_to_first();
+ // The TabVector list only contains vertical lines, but they need to be
+ // reflected and the list needs to be reversed, so they are still in
+ // sort_key order.
+ while (!v_it_.empty()) {
+ TabVector* v = v_it_.extract();
+ v_it_.forward();
+ v->ReflectInYAxis();
+ temp_it.add_before_then_move(v);
+ }
+ v_it_.add_list_after(&temp_list);
+ v_it_.move_to_first();
+ // Reset this grid with reflected bounding boxes.
+ TBOX grid_box(bleft(), tright());
+ int tmp = grid_box.left();
+ grid_box.set_left(-grid_box.right());
+ grid_box.set_right(-tmp);
+ Init(gridsize(), grid_box.botleft(), grid_box.topright());
+}
+
+// Compute the rotation required to deskew, and its inverse rotation.
+void TabFind::ComputeDeskewVectors(FCOORD* deskew, FCOORD* reskew) {
+ double length = vertical_skew_ % vertical_skew_;
+ length = sqrt(length);
+ deskew->set_x(static_cast<float>(vertical_skew_.y() / length));
+ deskew->set_y(static_cast<float>(vertical_skew_.x() / length));
+ reskew->set_x(deskew->x());
+ reskew->set_y(-deskew->y());
+}
+
+// Compute and apply constraints to the end positions of TabVectors so
+// that where possible partners end at the same y coordinate.
+void TabFind::ApplyTabConstraints() {
+ TabVector_IT it(&vectors_);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ TabVector* v = it.data();
+ v->SetupConstraints();
+ }
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ TabVector* v = it.data();
+ // With the first and last partner, we want a common bottom and top,
+ // respectively, and for each change of partner, we want a common
+ // top of first with bottom of next.
+ v->SetupPartnerConstraints();
+ }
+ // TODO(rays) The back-to-back pairs should really be done like the
+ // front-to-front pairs, but there is no convenient way of producing the
+ // list of partners like there is with the front-to-front.
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ TabVector* v = it.data();
+ if (!v->IsRightTab())
+ continue;
+ // For each back-to-back pair of vectors, try for common top and bottom.
+ TabVector_IT partner_it(it);
+ for (partner_it.forward(); !partner_it.at_first(); partner_it.forward()) {
+ TabVector* partner = partner_it.data();
+ if (!partner->IsLeftTab() || !v->VOverlap(*partner))
+ continue;
+ v->SetupPartnerConstraints(partner);
+ }
+ }
+ // Now actually apply the constraints to get common start/end points.
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ TabVector* v = it.data();
+ if (!v->IsSeparator())
+ v->ApplyConstraints();
+ }
+ // TODO(rays) Where constraint application fails, it would be good to try
+ // checking the ends to see if they really should be moved.
+}
+
+} // namespace tesseract.
diff --git a/tesseract/src/textord/tabfind.h b/tesseract/src/textord/tabfind.h
new file mode 100644
index 00000000..d16a533c
--- /dev/null
+++ b/tesseract/src/textord/tabfind.h
@@ -0,0 +1,384 @@
+///////////////////////////////////////////////////////////////////////
+// File: tabfind.h
+// Description: Subclass of BBGrid to find tabstops.
+// Author: Ray Smith
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TEXTORD_TABFIND_H_
+#define TESSERACT_TEXTORD_TABFIND_H_
+
+#include <functional> // for std::function
+#include "alignedblob.h"
+#include "tabvector.h"
+#include "linefind.h"
+
+class BLOBNBOX;
+class BLOBNBOX_LIST;
+class TO_BLOCK;
+class ScrollView;
+struct Pix;
+
+namespace tesseract {
+
+using WidthCallback = std::function<bool(int)>;
+
+struct AlignedBlobParams;
+class ColPartitionGrid;
+
+/** Pixel resolution of column width estimates. */
+const int kColumnWidthFactor = 20;
+
+/**
+ * The TabFind class contains code to find tab-stops and maintain the
+ * vectors_ list of tab vectors.
+ * Also provides an interface to find neighbouring blobs
+ * in the grid of BLOBNBOXes that is used by multiple subclasses.
+ * Searching is a complex operation because of the need to enforce
+ * rule/separator lines, and tabstop boundaries, (when available), so
+ * as the holder of the list of TabVectors this class provides the functions.
+ */
+class TESS_API TabFind : public AlignedBlob {
+ public:
+ TabFind(int gridsize, const ICOORD& bleft, const ICOORD& tright,
+ TabVector_LIST* vlines, int vertical_x, int vertical_y,
+ int resolution);
+ ~TabFind() override;
+
+ /**
+ * Insert a list of blobs into the given grid (not necessarily this).
+ * See InsertBlob for the other arguments.
+ * It would seem to make more sense to swap this and grid, but this way
+ * around allows grid to not be derived from TabFind, eg a ColPartitionGrid,
+ * while the grid that provides the tab stops(this) has to be derived from
+ * TabFind.
+ */
+ void InsertBlobsToGrid(bool h_spread, bool v_spread,
+ BLOBNBOX_LIST* blobs,
+ BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>* grid);
+
+ /**
+ * Insert a single blob into the given grid (not necessarily this).
+ * If h_spread, then all cells covered horizontally by the box are
+ * used, otherwise, just the bottom-left. Similarly for v_spread.
+ * A side effect is that the left and right rule edges of the blob are
+ * set according to the tab vectors in this (not grid).
+ */
+ bool InsertBlob(bool h_spread, bool v_spread, BLOBNBOX* blob,
+ BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>* grid);
+ // Calls SetBlobRuleEdges for all the blobs in the given block.
+ void SetBlockRuleEdges(TO_BLOCK* block);
+ // Sets the left and right rule and crossing_rules for the blobs in the given
+ // list by finding the next outermost tabvectors for each blob.
+ void SetBlobRuleEdges(BLOBNBOX_LIST* blobs);
+
+ // Returns the gutter width of the given TabVector between the given y limits.
+ // Also returns x-shift to be added to the vector to clear any intersecting
+ // blobs. The shift is deducted from the returned gutter.
+ // If ignore_unmergeables is true, then blobs of UnMergeableType are
+ // ignored as if they don't exist. (Used for text on image.)
+ // max_gutter_width is used as the maximum width worth searching for in case
+ // there is nothing near the TabVector.
+ int GutterWidth(int bottom_y, int top_y, const TabVector& v,
+ bool ignore_unmergeables, int max_gutter_width,
+ int* required_shift);
+ /**
+ * Find the gutter width and distance to inner neighbour for the given blob.
+ */
+ void GutterWidthAndNeighbourGap(int tab_x, int mean_height,
+ int max_gutter, bool left,
+ BLOBNBOX* bbox, int* gutter_width,
+ int* neighbour_gap);
+
+ /**
+ * Return the x-coord that corresponds to the right edge for the given
+ * box. If there is a rule line to the right that vertically overlaps it,
+ * then return the x-coord of the rule line, otherwise return the right
+ * edge of the page. For details see RightTabForBox below.
+ */
+ int RightEdgeForBox(const TBOX& box, bool crossing, bool extended);
+ /**
+ * As RightEdgeForBox, but finds the left Edge instead.
+ */
+ int LeftEdgeForBox(const TBOX& box, bool crossing, bool extended);
+
+ /**
+ * Return the TabVector that corresponds to the right edge for the given
+ * box. If there is a TabVector to the right that vertically overlaps it,
+ * then return it, otherwise return nullptr. Note that Right and Left refer
+ * to the position of the TabVector, not its type, ie RightTabForBox
+ * returns the nearest TabVector to the right of the box, regardless of
+ * its type.
+ * If a TabVector crosses right through the box (as opposed to grazing one
+ * edge or missing entirely), then crossing false will ignore such a line.
+ * Crossing true will return the line for BOTH left and right edges.
+ * If extended is true, then TabVectors are considered to extend to their
+ * extended_start/end_y, otherwise, just the startpt_ and endpt_.
+ * These functions make use of an internal iterator to the vectors_ list
+ * for speed when used repeatedly on neighbouring boxes. The caveat is
+ * that the iterator must be updated whenever the list is modified.
+ */
+ TabVector* RightTabForBox(const TBOX& box, bool crossing, bool extended);
+ /**
+ * As RightTabForBox, but finds the left TabVector instead.
+ */
+ TabVector* LeftTabForBox(const TBOX& box, bool crossing, bool extended);
+
+ /**
+ * Return true if the given width is close to one of the common
+ * widths in column_widths_.
+ */
+ bool CommonWidth(int width);
+ /**
+ * Return true if the sizes are more than a
+ * factor of 2 different.
+ */
+ static bool DifferentSizes(int size1, int size2);
+ /**
+ * Return true if the sizes are more than a
+ * factor of 5 different.
+ */
+ static bool VeryDifferentSizes(int size1, int size2);
+
+ /**
+ * Return a callback for testing CommonWidth.
+ */
+ WidthCallback WidthCB() {
+ return width_cb_;
+ }
+
+ /**
+ * Return the coords at which to draw the image backdrop.
+ */
+ const ICOORD& image_origin() const {
+ return image_origin_;
+ }
+
+ protected:
+ /**
+ // Accessors
+ */
+ TabVector_LIST* vectors() {
+ return &vectors_;
+ }
+ TabVector_LIST* dead_vectors() {
+ return &dead_vectors_;
+ }
+
+ /**
+ * Top-level function to find TabVectors in an input page block.
+ * Returns false if the detected skew angle is impossible.
+ * Applies the detected skew angle to deskew the tabs, blobs and part_grid.
+ * tabfind_aligned_gap_fraction should be the value of parameter
+ * textord_tabfind_aligned_gap_fraction
+ */
+ bool FindTabVectors(TabVector_LIST* hlines,
+ BLOBNBOX_LIST* image_blobs, TO_BLOCK* block,
+ int min_gutter_width, double tabfind_aligned_gap_fraction,
+ ColPartitionGrid* part_grid,
+ FCOORD* deskew, FCOORD* reskew);
+
+ // Top-level function to not find TabVectors in an input page block,
+ // but setup for single column mode.
+ void DontFindTabVectors(BLOBNBOX_LIST* image_blobs,
+ TO_BLOCK* block, FCOORD* deskew, FCOORD* reskew);
+
+ // Cleans up the lists of blobs in the block ready for use by TabFind.
+ // Large blobs that look like text are moved to the main blobs list.
+ // Main blobs that are superseded by the image blobs are deleted.
+ void TidyBlobs(TO_BLOCK* block);
+
+ // Helper function to setup search limits for *TabForBox.
+ void SetupTabSearch(int x, int y, int* min_key, int* max_key);
+
+ /**
+ * Display the tab vectors found in this grid.
+ */
+ ScrollView* DisplayTabVectors(ScrollView* tab_win);
+
+ // First part of FindTabVectors, which may be used twice if the text
+ // is mostly of vertical alignment. If find_vertical_text flag is
+ // true, this finds vertical textlines in possibly rotated blob space.
+ // In other words, when the page has mostly vertical lines and is rotated,
+ // setting this to true will find horizontal lines on the page.
+ // tabfind_aligned_gap_fraction should be the value of parameter
+ // textord_tabfind_aligned_gap_fraction
+ ScrollView* FindInitialTabVectors(BLOBNBOX_LIST* image_blobs,
+ int min_gutter_width,
+ double tabfind_aligned_gap_fraction,
+ TO_BLOCK* block);
+
+ // Apply the given rotation to the given list of blobs.
+ static void RotateBlobList(const FCOORD& rotation, BLOBNBOX_LIST* blobs);
+
+ // Flip the vertical and horizontal lines and rotate the grid ready
+ // for working on the rotated image.
+ // The min_gutter_width will be adjusted to the median gutter width between
+ // vertical tabs to set a better threshold for tabboxes in the 2nd pass.
+ void ResetForVerticalText(const FCOORD& rotate, const FCOORD& rerotate,
+ TabVector_LIST* horizontal_lines,
+ int* min_gutter_width);
+
+ // Clear the grid and get rid of the tab vectors, but not separators,
+ // ready to start again.
+ void Reset();
+
+ // Reflect the separator tab vectors and the grids in the y-axis.
+ // Can only be called after Reset!
+ void ReflectInYAxis();
+
+ private:
+ // For each box in the grid, decide whether it is a candidate tab-stop,
+ // and if so add it to the left and right tab boxes.
+ // tabfind_aligned_gap_fraction should be the value of parameter
+ // textord_tabfind_aligned_gap_fraction
+ ScrollView* FindTabBoxes(int min_gutter_width,
+ double tabfind_aligned_gap_fraction);
+
+ // Return true if this box looks like a candidate tab stop, and set
+ // the appropriate tab type(s) to TT_UNCONFIRMED.
+ // tabfind_aligned_gap_fraction should be the value of parameter
+ // textord_tabfind_aligned_gap_fraction
+ bool TestBoxForTabs(BLOBNBOX* bbox, int min_gutter_width,
+ double tabfind_aligned_gap_fraction);
+
+ // Returns true if there is nothing in the rectangle of width min_gutter to
+ // the left of bbox.
+ bool ConfirmRaggedLeft(BLOBNBOX* bbox, int min_gutter);
+ // Returns true if there is nothing in the rectangle of width min_gutter to
+ // the right of bbox.
+ bool ConfirmRaggedRight(BLOBNBOX* bbox, int min_gutter);
+ // Returns true if there is nothing in the given search_box that vertically
+ // overlaps target_box other than target_box itself.
+ bool NothingYOverlapsInBox(const TBOX& search_box, const TBOX& target_box);
+
+ // Fills the list of TabVector with the tabstops found in the grid,
+ // and estimates the logical vertical direction.
+ void FindAllTabVectors(int min_gutter_width);
+ // Helper for FindAllTabVectors finds the vectors of a particular type.
+ int FindTabVectors(int search_size_multiple,
+ TabAlignment alignment,
+ int min_gutter_width,
+ TabVector_LIST* vectors,
+ int* vertical_x, int* vertical_y);
+ // Finds a vector corresponding to a tabstop running through the
+ // given box of the given alignment type.
+ // search_size_multiple is a multiple of height used to control
+ // the size of the search.
+ // vertical_x and y are updated with an estimate of the real
+ // vertical direction. (skew finding.)
+ // Returns nullptr if no decent tabstop can be found.
+ TabVector* FindTabVector(int search_size_multiple, int min_gutter_width,
+ TabAlignment alignment,
+ BLOBNBOX* bbox,
+ int* vertical_x, int* vertical_y);
+
+ // Set the vertical_skew_ member from the given vector and refit
+ // all vectors parallel to the skew vector.
+ void SetVerticalSkewAndParallelize(int vertical_x, int vertical_y);
+
+ // Sort all the current vectors using the vertical_skew_ vector.
+ void SortVectors();
+
+ // Evaluate all the current tab vectors.
+ void EvaluateTabs();
+
+ // Trace textlines from one side to the other of each tab vector, saving
+ // the most frequent column widths found in a list so that a given width
+ // can be tested for being a common width with a simple callback function.
+ void ComputeColumnWidths(ScrollView* tab_win,
+ ColPartitionGrid* part_grid);
+
+ // Finds column width and:
+ // if col_widths is not null (pass1):
+ // pair-up tab vectors with existing ColPartitions and accumulate widths.
+ // else (pass2):
+ // find the largest real partition width for each recorded column width,
+ // to be used as the minimum acceptable width.
+ void ApplyPartitionsToColumnWidths(ColPartitionGrid* part_grid,
+ STATS* col_widths);
+
+ // Helper makes the list of common column widths in column_widths_ from the
+ // input col_widths. Destroys the content of col_widths by repeatedly
+ // finding the mode and erasing the peak.
+ void MakeColumnWidths(int col_widths_size, STATS* col_widths);
+
+ // Mark blobs as being in a vertical text line where that is the case.
+ void MarkVerticalText();
+
+ // Returns the median gutter width between pairs of matching tab vectors
+ // assuming they are sorted left-to-right. If there are too few data
+ // points (< kMinLinesInColumn), then 0 is returned.
+ int FindMedianGutterWidth(TabVector_LIST* tab_vectors);
+
+ // Find the next adjacent (to left or right) blob on this text line,
+ // with the constraint that it must vertically significantly overlap
+ // the [top_y, bottom_y] range.
+ // If ignore_images is true, then blobs with aligned_text() < 0 are treated
+ // as if they do not exist.
+ BLOBNBOX* AdjacentBlob(const BLOBNBOX* bbox,
+ bool look_left, bool ignore_images,
+ double min_overlap_fraction,
+ int gap_limit, int top_y, int bottom_y);
+
+ // Add a bi-directional partner relationship between the left
+ // and the right. If one (or both) of the vectors is a separator,
+ // extend a nearby extendable vector or create a new one of the
+ // correct type, using the given left or right blob as a guide.
+ void AddPartnerVector(BLOBNBOX* left_blob, BLOBNBOX* right_blob,
+ TabVector* left, TabVector* right);
+
+ /**
+ * Remove separators and unused tabs from the main vectors_ list
+ * to the dead_vectors_ list.
+ */
+ void CleanupTabs();
+
+ /**
+ * Deskew the tab vectors and blobs, computing the rotation and resetting
+ * the storked vertical_skew_. The deskew inverse is returned in reskew.
+ * Returns false if the detected skew angle is impossible.
+ */
+ bool Deskew(TabVector_LIST* hlines, BLOBNBOX_LIST* image_blobs,
+ TO_BLOCK* block, FCOORD* deskew, FCOORD* reskew);
+
+ // Compute the rotation required to deskew, and its inverse rotation.
+ void ComputeDeskewVectors(FCOORD* deskew, FCOORD* reskew);
+
+ /**
+ * Compute and apply constraints to the end positions of TabVectors so
+ * that where possible partners end at the same y coordinate.
+ */
+ void ApplyTabConstraints();
+
+ protected:
+ ICOORD vertical_skew_; ///< Estimate of true vertical in this image.
+ int resolution_; ///< Of source image in pixels per inch.
+ private:
+ ICOORD image_origin_; ///< Top-left of image in deskewed coords
+ TabVector_LIST vectors_; ///< List of rule line and tabstops.
+ TabVector_IT v_it_; ///< Iterator for searching vectors_.
+ TabVector_LIST dead_vectors_; ///< Separators and unpartnered tab vectors.
+ // List of commonly occurring width ranges with x=min and y=max.
+ ICOORDELT_LIST column_widths_; ///< List of commonly occurring width ranges.
+ /** Callback to test an int for being a common width. */
+ WidthCallback width_cb_;
+ // Sets of bounding boxes that are candidate tab stops.
+ GenericVector<BLOBNBOX*> left_tab_boxes_;
+ GenericVector<BLOBNBOX*> right_tab_boxes_;
+};
+
+} // namespace tesseract.
+
+#endif // TESSERACT_TEXTORD_TABFIND_H_
diff --git a/tesseract/src/textord/tablefind.cpp b/tesseract/src/textord/tablefind.cpp
new file mode 100644
index 00000000..6326b858
--- /dev/null
+++ b/tesseract/src/textord/tablefind.cpp
@@ -0,0 +1,2088 @@
+///////////////////////////////////////////////////////////////////////
+// File: tablefind.cpp
+// Description: Helper classes to find tables from ColPartitions.
+// Author: Faisal Shafait (faisal.shafait@dfki.de)
+//
+// (C) Copyright 2009, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "tablefind.h"
+#include <algorithm>
+#include <cmath>
+
+#include "allheaders.h"
+
+#include "colpartitionset.h"
+#include "tablerecog.h"
+
+namespace tesseract {
+
+// These numbers are used to calculate the global median stats.
+// They just set an upper bound on the stats objects.
+// Maximum vertical spacing between neighbor partitions.
+const int kMaxVerticalSpacing = 500;
+// Maximum width of a blob in a partition.
+const int kMaxBlobWidth = 500;
+
+// Minimum whitespace size to split a partition (measured as a multiple
+// of a partition's median width).
+const double kSplitPartitionSize = 2.0;
+// To insert text, the partition must satisfy these size constraints
+// in AllowTextPartition(). The idea is to filter noise partitions
+// determined by the size compared to the global medians.
+// TODO(nbeato): Need to find good numbers again.
+const double kAllowTextHeight = 0.5;
+const double kAllowTextWidth = 0.6;
+const double kAllowTextArea = 0.8;
+// The same thing applies to blobs (to filter noise).
+// TODO(nbeato): These numbers are a shot in the dark...
+// height and width are 0.5 * gridsize() in colfind.cpp
+// area is a rough guess for the size of a period.
+const double kAllowBlobHeight = 0.3;
+const double kAllowBlobWidth = 0.4;
+const double kAllowBlobArea = 0.05;
+
+// Minimum number of components in a text partition. A partition having fewer
+// components than that is more likely a data partition and is a candidate
+// table cell.
+const int kMinBoxesInTextPartition = 10;
+
+// Maximum number of components that a data partition can have
+const int kMaxBoxesInDataPartition = 20;
+
+// Maximum allowed gap in a text partitions as a multiple of its median size.
+const double kMaxGapInTextPartition = 4.0;
+
+// Minimum value that the maximum gap in a text partition should have as a
+// factor of its median size.
+const double kMinMaxGapInTextPartition = 0.5;
+
+// The amount of overlap that is "normal" for adjacent blobs in a text
+// partition. This is used to calculate gap between overlapping blobs.
+const double kMaxBlobOverlapFactor = 4.0;
+
+// Maximum x-height a table partition can have as a multiple of global
+// median x-height
+const double kMaxTableCellXheight = 2.0;
+
+// Maximum line spacing between a table column header and column contents
+// for merging the two (as a multiple of the partition's median_height).
+const int kMaxColumnHeaderDistance = 4;
+
+// Minimum ratio of num_table_partitions to num_text_partitions in a column
+// block to be called it a table column
+const double kTableColumnThreshold = 3.0;
+
+// Search for horizontal ruling lines within the vertical margin as a
+// multiple of grid size
+// const int kRulingVerticalMargin = 3;
+
+// Minimum overlap that a colpartition must have with a table region
+// to become part of that table
+const double kMinOverlapWithTable = 0.6;
+
+// Maximum side space (distance from column boundary) that a typical
+// text-line in flowing text should have as a multiple of its x-height
+// (Median size).
+const int kSideSpaceMargin = 10;
+
+// Fraction of the peak of x-projection of a table region to set the
+// threshold for the x-projection histogram
+const double kSmallTableProjectionThreshold = 0.35;
+const double kLargeTableProjectionThreshold = 0.45;
+// Minimum number of rows required to look for more rows in the projection.
+const int kLargeTableRowCount = 6;
+
+// Minimum number of rows in a table
+const int kMinRowsInTable = 3;
+
+// The amount of padding (multiplied by global_median_xheight_ during use)
+// that is vertically added to the search adjacent leader search during
+// ColPartition marking.
+const int kAdjacentLeaderSearchPadding = 2;
+
+// Used when filtering false positives. When finding the last line
+// of a paragraph (typically left-aligned), the previous line should have
+// its center to the right of the last line by this scaled amount.
+const double kParagraphEndingPreviousLineRatio = 1.3;
+
+// The maximum amount of whitespace allowed left of a paragraph ending.
+// Do not filter a ColPartition with more than this space left of it.
+const double kMaxParagraphEndingLeftSpaceMultiple = 3.0;
+
+// Used when filtering false positives. The last line of a paragraph
+// should be preceded by a line that is predominantly text. This is the
+// ratio of text to whitespace (to the right of the text) that is required
+// for the previous line to be a text.
+const double kMinParagraphEndingTextToWhitespaceRatio = 3.0;
+
+// When counting table columns, this is the required gap between two columns
+// (it is multiplied by global_median_xheight_).
+const double kMaxXProjectionGapFactor = 2.0;
+
+// Used for similarity in partitions using stroke width. Values copied
+// from ColFind.cpp in Ray's CL.
+const double kStrokeWidthFractionalTolerance = 0.25;
+const double kStrokeWidthConstantTolerance = 2.0;
+
+#ifndef GRAPHICS_DISABLED
+static BOOL_VAR(textord_show_tables, false, "Show table regions (ScrollView)");
+static BOOL_VAR(textord_tablefind_show_mark, false,
+ "Debug table marking steps in detail (ScrollView)");
+static BOOL_VAR(textord_tablefind_show_stats, false,
+ "Show page stats used in table finding (ScrollView)");
+#endif
+static BOOL_VAR(textord_tablefind_recognize_tables, false,
+ "Enables the table recognizer for table layout and filtering.");
+
+ELISTIZE(ColSegment)
+CLISTIZE(ColSegment)
+
+// Templated helper function used to create destructor callbacks for the
+// BBGrid::ClearGridData() method.
+template <typename T> void DeleteObject(T *object) {
+ delete object;
+}
+
+TableFinder::TableFinder()
+ : resolution_(0),
+ global_median_xheight_(0),
+ global_median_blob_width_(0),
+ global_median_ledding_(0),
+ left_to_right_language_(true) {
+}
+
+TableFinder::~TableFinder() {
+ // ColPartitions and ColSegments created by this class for storage in grids
+ // need to be deleted explicitly.
+ clean_part_grid_.ClearGridData(&DeleteObject<ColPartition>);
+ leader_and_ruling_grid_.ClearGridData(&DeleteObject<ColPartition>);
+ fragmented_text_grid_.ClearGridData(&DeleteObject<ColPartition>);
+ col_seg_grid_.ClearGridData(&DeleteObject<ColSegment>);
+ table_grid_.ClearGridData(&DeleteObject<ColSegment>);
+}
+
+void TableFinder::set_left_to_right_language(bool order) {
+ left_to_right_language_ = order;
+}
+
+void TableFinder::Init(int grid_size, const ICOORD& bottom_left,
+ const ICOORD& top_right) {
+ // Initialize clean partitions list and grid
+ clean_part_grid_.Init(grid_size, bottom_left, top_right);
+ leader_and_ruling_grid_.Init(grid_size, bottom_left, top_right);
+ fragmented_text_grid_.Init(grid_size, bottom_left, top_right);
+ col_seg_grid_.Init(grid_size, bottom_left, top_right);
+ table_grid_.Init(grid_size, bottom_left, top_right);
+}
+
+// Copy cleaned partitions from part_grid_ to clean_part_grid_ and
+// insert leaders and rulers into the leader_and_ruling_grid_
+void TableFinder::InsertCleanPartitions(ColPartitionGrid* grid,
+ TO_BLOCK* block) {
+ // Calculate stats. This lets us filter partitions in AllowTextPartition()
+ // and filter blobs in AllowBlob().
+ SetGlobalSpacings(grid);
+
+ // Iterate the ColPartitions in the grid.
+ ColPartitionGridSearch gsearch(grid);
+ gsearch.SetUniqueMode(true);
+ gsearch.StartFullSearch();
+ ColPartition* part = nullptr;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ // Reject partitions with nothing useful inside of them.
+ if (part->blob_type() == BRT_NOISE || part->bounding_box().area() <= 0)
+ continue;
+ ColPartition* clean_part = part->ShallowCopy();
+ ColPartition* leader_part = nullptr;
+ if (part->IsLineType()) {
+ InsertRulingPartition(clean_part);
+ continue;
+ }
+ // Insert all non-text partitions to clean_parts
+ if (!part->IsTextType()) {
+ InsertImagePartition(clean_part);
+ continue;
+ }
+ // Insert text colpartitions after removing noisy components from them
+ // The leaders are split into a separate grid.
+ BLOBNBOX_CLIST* part_boxes = part->boxes();
+ BLOBNBOX_C_IT pit(part_boxes);
+ for (pit.mark_cycle_pt(); !pit.cycled_list(); pit.forward()) {
+ BLOBNBOX *pblob = pit.data();
+ // Bad blobs... happens in UNLV set.
+ // news.3G1, page 17 (around x=6)
+ if (!AllowBlob(*pblob))
+ continue;
+ if (pblob->flow() == BTFT_LEADER) {
+ if (leader_part == nullptr) {
+ leader_part = part->ShallowCopy();
+ leader_part->set_flow(BTFT_LEADER);
+ }
+ leader_part->AddBox(pblob);
+ } else if (pblob->region_type() != BRT_NOISE) {
+ clean_part->AddBox(pblob);
+ }
+ }
+ clean_part->ComputeLimits();
+ ColPartition* fragmented = clean_part->CopyButDontOwnBlobs();
+ InsertTextPartition(clean_part);
+ SplitAndInsertFragmentedTextPartition(fragmented);
+ if (leader_part != nullptr) {
+ // TODO(nbeato): Note that ComputeLimits does not update the column
+ // information. So the leader may appear to span more columns than it
+ // really does later on when IsInSameColumnAs gets called to test
+ // for adjacent leaders.
+ leader_part->ComputeLimits();
+ InsertLeaderPartition(leader_part);
+ }
+ }
+
+ // Make the partition partners better for upper and lower neighbors.
+ clean_part_grid_.FindPartitionPartners();
+ clean_part_grid_.RefinePartitionPartners(false);
+}
+
+// High level function to perform table detection
+void TableFinder::LocateTables(ColPartitionGrid* grid,
+ ColPartitionSet** all_columns,
+ WidthCallback width_cb,
+ const FCOORD& reskew) {
+ // initialize spacing, neighbors, and columns
+ InitializePartitions(all_columns);
+
+#ifndef GRAPHICS_DISABLED
+ if (textord_show_tables) {
+ ScrollView* table_win = MakeWindow(0, 300, "Column Partitions & Neighbors");
+ DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE);
+ DisplayColPartitions(table_win, &leader_and_ruling_grid_,
+ ScrollView::AQUAMARINE);
+ DisplayColPartitionConnections(table_win, &clean_part_grid_,
+ ScrollView::ORANGE);
+
+ table_win = MakeWindow(100, 300, "Fragmented Text");
+ DisplayColPartitions(table_win, &fragmented_text_grid_, ScrollView::BLUE);
+ }
+#endif // !GRAPHICS_DISABLED
+
+ // mark, filter, and smooth candidate table partitions
+ MarkTablePartitions();
+
+ // Make single-column blocks from good_columns_ partitions. col_segments are
+ // moved to a grid later which takes the ownership
+ ColSegment_LIST column_blocks;
+ GetColumnBlocks(all_columns, &column_blocks);
+ // Set the ratio of candidate table partitions in each column
+ SetColumnsType(&column_blocks);
+
+ // Move column segments to col_seg_grid_
+ MoveColSegmentsToGrid(&column_blocks, &col_seg_grid_);
+
+ // Detect split in column layout that might have occurred due to the
+ // presence of a table. In such a case, merge the corresponding columns.
+ GridMergeColumnBlocks();
+
+ // Group horizontally overlapping table partitions into table columns.
+ // table_columns created here get deleted at the end of this method.
+ ColSegment_LIST table_columns;
+ GetTableColumns(&table_columns);
+
+ // Within each column, mark the range table regions occupy based on the
+ // table columns detected. table_regions are moved to a grid later which
+ // takes the ownership
+ ColSegment_LIST table_regions;
+ GetTableRegions(&table_columns, &table_regions);
+
+#ifndef GRAPHICS_DISABLED
+ if (textord_tablefind_show_mark) {
+ ScrollView* table_win = MakeWindow(1200, 300, "Table Columns and Regions");
+ DisplayColSegments(table_win, &table_columns, ScrollView::DARK_TURQUOISE);
+ DisplayColSegments(table_win, &table_regions, ScrollView::YELLOW);
+ }
+#endif // !GRAPHICS_DISABLED
+
+ // Merge table regions across columns for tables spanning multiple
+ // columns
+ MoveColSegmentsToGrid(&table_regions, &table_grid_);
+ GridMergeTableRegions();
+
+ // Adjust table boundaries by including nearby horizontal lines and left
+ // out column headers
+ AdjustTableBoundaries();
+ GridMergeTableRegions();
+
+ if (textord_tablefind_recognize_tables) {
+ // Remove false alarms consisting of a single column
+ DeleteSingleColumnTables();
+
+#ifndef GRAPHICS_DISABLED
+ if (textord_show_tables) {
+ ScrollView* table_win = MakeWindow(1200, 300, "Detected Table Locations");
+ DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE);
+ DisplayColSegments(table_win, &table_columns, ScrollView::KHAKI);
+ table_grid_.DisplayBoxes(table_win);
+ }
+#endif // !GRAPHICS_DISABLED
+
+ // Find table grid structure and reject tables that are malformed.
+ RecognizeTables();
+ GridMergeTableRegions();
+ RecognizeTables();
+
+#ifndef GRAPHICS_DISABLED
+ if (textord_show_tables) {
+ ScrollView* table_win = MakeWindow(1400, 600, "Recognized Tables");
+ DisplayColPartitions(table_win, &clean_part_grid_,
+ ScrollView::BLUE, ScrollView::BLUE);
+ table_grid_.DisplayBoxes(table_win);
+ }
+#endif // !GRAPHICS_DISABLED
+ } else {
+ // Remove false alarms consisting of a single column
+ // TODO(nbeato): verify this is a NOP after structured table rejection.
+ // Right now it isn't. If the recognize function is doing what it is
+ // supposed to do, this function is obsolete.
+ DeleteSingleColumnTables();
+
+#ifndef GRAPHICS_DISABLED
+ if (textord_show_tables) {
+ ScrollView* table_win = MakeWindow(1500, 300, "Detected Tables");
+ DisplayColPartitions(table_win, &clean_part_grid_,
+ ScrollView::BLUE, ScrollView::BLUE);
+ table_grid_.DisplayBoxes(table_win);
+ }
+#endif // !GRAPHICS_DISABLED
+ }
+
+ // Merge all colpartitions in table regions to make them a single
+ // colpartition and revert types of isolated table cells not
+ // assigned to any table to their original types.
+ MakeTableBlocks(grid, all_columns, width_cb);
+}
+// All grids have the same dimensions. The clean_part_grid_ sizes are set from
+// the part_grid_ that is passed to InsertCleanPartitions, which was the same as
+// the grid that is the base of ColumnFinder. Just return the clean_part_grid_
+// dimensions instead of duplicated memory.
+int TableFinder::gridsize() const {
+ return clean_part_grid_.gridsize();
+}
+int TableFinder::gridwidth() const {
+ return clean_part_grid_.gridwidth();
+}
+int TableFinder::gridheight() const {
+ return clean_part_grid_.gridheight();
+}
+const ICOORD& TableFinder::bleft() const {
+ return clean_part_grid_.bleft();
+}
+const ICOORD& TableFinder::tright() const {
+ return clean_part_grid_.tright();
+}
+
+void TableFinder::InsertTextPartition(ColPartition* part) {
+ ASSERT_HOST(part != nullptr);
+ if (AllowTextPartition(*part)) {
+ clean_part_grid_.InsertBBox(true, true, part);
+ } else {
+ delete part;
+ }
+}
+void TableFinder::InsertFragmentedTextPartition(ColPartition* part) {
+ ASSERT_HOST(part != nullptr);
+ if (AllowTextPartition(*part)) {
+ fragmented_text_grid_.InsertBBox(true, true, part);
+ } else {
+ delete part;
+ }
+}
+void TableFinder::InsertLeaderPartition(ColPartition* part) {
+ ASSERT_HOST(part != nullptr);
+ if (!part->IsEmpty() && part->bounding_box().area() > 0) {
+ leader_and_ruling_grid_.InsertBBox(true, true, part);
+ } else {
+ delete part;
+ }
+}
+void TableFinder::InsertRulingPartition(ColPartition* part) {
+ leader_and_ruling_grid_.InsertBBox(true, true, part);
+}
+void TableFinder::InsertImagePartition(ColPartition* part) {
+ // NOTE: If images are placed into a different grid in the future,
+ // the function SetPartitionSpacings needs to be updated. It should
+ // be the only thing that cares about image partitions.
+ clean_part_grid_.InsertBBox(true, true, part);
+}
+
+// Splits a partition into its "words". The splits happen
+// at locations with wide inter-blob spacing. This is useful
+// because it allows the table recognize to "cut through" the
+// text lines on the page. The assumption is that a table
+// will have several lines with similar overlapping whitespace
+// whereas text will not have this type of property.
+// Note: The code Assumes that blobs are sorted by the left side x!
+// This will not work (as well) if the blobs are sorted by center/right.
+void TableFinder::SplitAndInsertFragmentedTextPartition(ColPartition* part) {
+ ASSERT_HOST(part != nullptr);
+ // Bye bye empty partitions!
+ if (part->boxes()->empty()) {
+ delete part;
+ return;
+ }
+
+ // The AllowBlob function prevents this.
+ ASSERT_HOST(part->median_width() > 0);
+ const double kThreshold = part->median_width() * kSplitPartitionSize;
+
+ ColPartition* right_part = part;
+ bool found_split = true;
+ while (found_split) {
+ found_split = false;
+ BLOBNBOX_C_IT box_it(right_part->boxes());
+ // Blobs are sorted left side first. If blobs overlap,
+ // the previous blob may have a "more right" right side.
+ // Account for this by always keeping the largest "right"
+ // so far.
+ int previous_right = INT32_MIN;
+
+ // Look for the next split in the partition.
+ for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) {
+ const TBOX& box = box_it.data()->bounding_box();
+ if (previous_right != INT32_MIN &&
+ box.left() - previous_right > kThreshold) {
+ // We have a split position. Split the partition in two pieces.
+ // Insert the left piece in the grid and keep processing the right.
+ int mid_x = (box.left() + previous_right) / 2;
+ ColPartition* left_part = right_part;
+ right_part = left_part->SplitAt(mid_x);
+
+ InsertFragmentedTextPartition(left_part);
+ found_split = true;
+ break;
+ }
+
+ // The right side of the previous blobs.
+ previous_right = std::max(previous_right, static_cast<int>(box.right()));
+ }
+ }
+ // When a split is not found, the right part is minimized
+ // as much as possible, so process it.
+ InsertFragmentedTextPartition(right_part);
+}
+
+// Some simple criteria to filter out now. We want to make sure the
+// average blob size in the partition is consistent with the
+// global page stats.
+// The area metric will almost always pass for multi-blob partitions.
+// It is useful when filtering out noise caused by an isolated blob.
+bool TableFinder::AllowTextPartition(const ColPartition& part) const {
+ const double kHeightRequired = global_median_xheight_ * kAllowTextHeight;
+ const double kWidthRequired = global_median_blob_width_ * kAllowTextWidth;
+ const int median_area = global_median_xheight_ * global_median_blob_width_;
+ const double kAreaPerBlobRequired = median_area * kAllowTextArea;
+ // Keep comparisons strictly greater to disallow 0!
+ return part.median_height() > kHeightRequired &&
+ part.median_width() > kWidthRequired &&
+ part.bounding_box().area() > kAreaPerBlobRequired * part.boxes_count();
+}
+
+// Same as above, applied to blobs. Keep in mind that
+// leaders, commas, and periods are important in tables.
+bool TableFinder::AllowBlob(const BLOBNBOX& blob) const {
+ const TBOX& box = blob.bounding_box();
+ const double kHeightRequired = global_median_xheight_ * kAllowBlobHeight;
+ const double kWidthRequired = global_median_blob_width_ * kAllowBlobWidth;
+ const int median_area = global_median_xheight_ * global_median_blob_width_;
+ const double kAreaRequired = median_area * kAllowBlobArea;
+ // Keep comparisons strictly greater to disallow 0!
+ return box.height() > kHeightRequired &&
+ box.width() > kWidthRequired &&
+ box.area() > kAreaRequired;
+}
+
+// TODO(nbeato): The grid that makes the window doesn't seem to matter.
+// The only downside is that window messages will be caught by
+// clean_part_grid_ instead of a useful object. This is a temporary solution
+// for the debug windows created by the TableFinder.
+#ifndef GRAPHICS_DISABLED
+ScrollView* TableFinder::MakeWindow(int x, int y, const char* window_name) {
+ return clean_part_grid_.MakeWindow(x, y, window_name);
+}
+#endif
+
+// Make single-column blocks from good_columns_ partitions.
+void TableFinder::GetColumnBlocks(ColPartitionSet** all_columns,
+ ColSegment_LIST* column_blocks) {
+ for (int i = 0; i < gridheight(); ++i) {
+ ColPartitionSet* columns = all_columns[i];
+ if (columns != nullptr) {
+ ColSegment_LIST new_blocks;
+ // Get boxes from the current vertical position on the grid
+ columns->GetColumnBoxes(i * gridsize(), (i+1) * gridsize(), &new_blocks);
+ // Merge the new_blocks boxes into column_blocks if they are well-aligned
+ GroupColumnBlocks(&new_blocks, column_blocks);
+ }
+ }
+}
+
+// Merge column segments into the current list if they are well aligned.
+void TableFinder::GroupColumnBlocks(ColSegment_LIST* new_blocks,
+ ColSegment_LIST* column_blocks) {
+ ColSegment_IT src_it(new_blocks);
+ ColSegment_IT dest_it(column_blocks);
+ // iterate through the source list
+ for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
+ ColSegment* src_seg = src_it.data();
+ const TBOX& src_box = src_seg->bounding_box();
+ bool match_found = false;
+ // iterate through the destination list to find a matching column block
+ for (dest_it.mark_cycle_pt(); !dest_it.cycled_list(); dest_it.forward()) {
+ ColSegment* dest_seg = dest_it.data();
+ TBOX dest_box = dest_seg->bounding_box();
+ if (ConsecutiveBoxes(src_box, dest_box)) {
+ // If matching block is found, insert the current block into it
+ // and delete the source block.
+ dest_seg->InsertBox(src_box);
+ match_found = true;
+ delete src_it.extract();
+ break;
+ }
+ }
+ // If no match is found, just append the source block to column_blocks
+ if (!match_found) {
+ dest_it.add_after_then_move(src_it.extract());
+ }
+ }
+}
+
+// are the two boxes immediate neighbors along the vertical direction
+bool TableFinder::ConsecutiveBoxes(const TBOX &b1, const TBOX &b2) {
+ int x_margin = 20;
+ int y_margin = 5;
+ return (abs(b1.left() - b2.left()) < x_margin) &&
+ (abs(b1.right() - b2.right()) < x_margin) &&
+ (abs(b1.top()-b2.bottom()) < y_margin ||
+ abs(b2.top()-b1.bottom()) < y_margin);
+}
+
+// Set up info for clean_part_grid_ partitions to be valid during detection
+// code.
+void TableFinder::InitializePartitions(ColPartitionSet** all_columns) {
+ FindNeighbors();
+ SetPartitionSpacings(&clean_part_grid_, all_columns);
+ SetGlobalSpacings(&clean_part_grid_);
+}
+
+// Set left, right and top, bottom spacings of each colpartition.
+void TableFinder::SetPartitionSpacings(ColPartitionGrid* grid,
+ ColPartitionSet** all_columns) {
+ // Iterate the ColPartitions in the grid.
+ ColPartitionGridSearch gsearch(grid);
+ gsearch.StartFullSearch();
+ ColPartition* part = nullptr;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ ColPartitionSet* columns = all_columns[gsearch.GridY()];
+ TBOX box = part->bounding_box();
+ int y = part->MidY();
+ ColPartition* left_column = columns->ColumnContaining(box.left(), y);
+ ColPartition* right_column = columns->ColumnContaining(box.right(), y);
+ // set distance from left column as space to the left
+ if (left_column) {
+ int left_space = std::max(0, box.left() - left_column->LeftAtY(y));
+ part->set_space_to_left(left_space);
+ }
+ // set distance from right column as space to the right
+ if (right_column) {
+ int right_space = std::max(0, right_column->RightAtY(y) - box.right());
+ part->set_space_to_right(right_space);
+ }
+
+ // Look for images that may be closer.
+ // NOTE: used to be part_grid_, might cause issues now
+ ColPartitionGridSearch hsearch(grid);
+ hsearch.StartSideSearch(box.left(), box.bottom(), box.top());
+ ColPartition* neighbor = nullptr;
+ while ((neighbor = hsearch.NextSideSearch(true)) != nullptr) {
+ if (neighbor->type() == PT_PULLOUT_IMAGE ||
+ neighbor->type() == PT_FLOWING_IMAGE ||
+ neighbor->type() == PT_HEADING_IMAGE) {
+ int right = neighbor->bounding_box().right();
+ if (right < box.left()) {
+ int space = std::min(box.left() - right, part->space_to_left());
+ part->set_space_to_left(space);
+ }
+ }
+ }
+ hsearch.StartSideSearch(box.left(), box.bottom(), box.top());
+ neighbor = nullptr;
+ while ((neighbor = hsearch.NextSideSearch(false)) != nullptr) {
+ if (neighbor->type() == PT_PULLOUT_IMAGE ||
+ neighbor->type() == PT_FLOWING_IMAGE ||
+ neighbor->type() == PT_HEADING_IMAGE) {
+ int left = neighbor->bounding_box().left();
+ if (left > box.right()) {
+ int space = std::min(left - box.right(), part->space_to_right());
+ part->set_space_to_right(space);
+ }
+ }
+ }
+
+ ColPartition* upper_part = part->SingletonPartner(true);
+ if (upper_part) {
+ int space = std::max(0, static_cast<int>(upper_part->bounding_box().bottom() -
+ part->bounding_box().bottom()));
+ part->set_space_above(space);
+ } else {
+ // TODO(nbeato): What constitutes a good value?
+ // 0 is the default value when not set, explicitly noting it needs to
+ // be something else.
+ part->set_space_above(INT32_MAX);
+ }
+
+ ColPartition* lower_part = part->SingletonPartner(false);
+ if (lower_part) {
+ int space = std::max(0, static_cast<int>(part->bounding_box().bottom() -
+ lower_part->bounding_box().bottom()));
+ part->set_space_below(space);
+ } else {
+ // TODO(nbeato): What constitutes a good value?
+ // 0 is the default value when not set, explicitly noting it needs to
+ // be something else.
+ part->set_space_below(INT32_MAX);
+ }
+ }
+}
+
+// Set spacing and closest neighbors above and below a given colpartition.
+void TableFinder::SetVerticalSpacing(ColPartition* part) {
+ TBOX box = part->bounding_box();
+ int top_range = std::min(box.top() + kMaxVerticalSpacing, static_cast<int>(tright().y()));
+ int bottom_range = std::max(box.bottom() - kMaxVerticalSpacing, static_cast<int>(bleft().y()));
+ box.set_top(top_range);
+ box.set_bottom(bottom_range);
+
+ TBOX part_box = part->bounding_box();
+ // Start a rect search
+ GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
+ rectsearch(&clean_part_grid_);
+ rectsearch.StartRectSearch(box);
+ ColPartition* neighbor;
+ int min_space_above = kMaxVerticalSpacing;
+ int min_space_below = kMaxVerticalSpacing;
+ ColPartition* above_neighbor = nullptr;
+ ColPartition* below_neighbor = nullptr;
+ while ((neighbor = rectsearch.NextRectSearch()) != nullptr) {
+ if (neighbor == part)
+ continue;
+ TBOX neighbor_box = neighbor->bounding_box();
+ if (neighbor_box.major_x_overlap(part_box)) {
+ int gap = abs(part->median_bottom() - neighbor->median_bottom());
+ // If neighbor is below current partition
+ if (neighbor_box.top() < part_box.bottom() &&
+ gap < min_space_below) {
+ min_space_below = gap;
+ below_neighbor = neighbor;
+ } // If neighbor is above current partition
+ else if (part_box.top() < neighbor_box.bottom() &&
+ gap < min_space_above) {
+ min_space_above = gap;
+ above_neighbor = neighbor;
+ }
+ }
+ }
+ part->set_space_above(min_space_above);
+ part->set_space_below(min_space_below);
+ part->set_nearest_neighbor_above(above_neighbor);
+ part->set_nearest_neighbor_below(below_neighbor);
+}
+
+// Set global spacing and x-height estimates
+void TableFinder::SetGlobalSpacings(ColPartitionGrid* grid) {
+ STATS xheight_stats(0, kMaxVerticalSpacing + 1);
+ STATS width_stats(0, kMaxBlobWidth + 1);
+ STATS ledding_stats(0, kMaxVerticalSpacing + 1);
+ // Iterate the ColPartitions in the grid.
+ ColPartitionGridSearch gsearch(grid);
+ gsearch.SetUniqueMode(true);
+ gsearch.StartFullSearch();
+ ColPartition* part = nullptr;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ // TODO(nbeato): HACK HACK HACK! medians are equal to partition length.
+ // ComputeLimits needs to get called somewhere outside of TableFinder
+ // to make sure the partitions are properly initialized.
+ // When this is called, SmoothPartitionPartners dies in an assert after
+ // table find runs. Alternative solution.
+ // part->ComputeLimits();
+ if (part->IsTextType()) {
+ // xheight_stats.add(part->median_height(), part->boxes_count());
+ // width_stats.add(part->median_width(), part->boxes_count());
+
+ // This loop can be removed when above issues are fixed.
+ // Replace it with the 2 lines commented out above.
+ BLOBNBOX_C_IT it(part->boxes());
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ xheight_stats.add(it.data()->bounding_box().height(), 1);
+ width_stats.add(it.data()->bounding_box().width(), 1);
+ }
+
+ ledding_stats.add(part->space_above(), 1);
+ ledding_stats.add(part->space_below(), 1);
+ }
+ }
+ // Set estimates based on median of statistics obtained
+ set_global_median_xheight(static_cast<int>(xheight_stats.median() + 0.5));
+ set_global_median_blob_width(static_cast<int>(width_stats.median() + 0.5));
+ set_global_median_ledding(static_cast<int>(ledding_stats.median() + 0.5));
+ #ifndef GRAPHICS_DISABLED
+ if (textord_tablefind_show_stats) {
+ const char* kWindowName = "X-height (R), X-width (G), and ledding (B)";
+ ScrollView* stats_win = MakeWindow(500, 10, kWindowName);
+ xheight_stats.plot(stats_win, 10, 200, 2, 15, ScrollView::RED);
+ width_stats.plot(stats_win, 10, 200, 2, 15, ScrollView::GREEN);
+ ledding_stats.plot(stats_win, 10, 200, 2, 15, ScrollView::BLUE);
+ }
+ #endif // !GRAPHICS_DISABLED
+}
+
+void TableFinder::set_global_median_xheight(int xheight) {
+ global_median_xheight_ = xheight;
+}
+void TableFinder::set_global_median_blob_width(int width) {
+ global_median_blob_width_ = width;
+}
+void TableFinder::set_global_median_ledding(int ledding) {
+ global_median_ledding_ = ledding;
+}
+
+void TableFinder::FindNeighbors() {
+ ColPartitionGridSearch gsearch(&clean_part_grid_);
+ gsearch.StartFullSearch();
+ ColPartition* part = nullptr;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ // TODO(nbeato): Rename this function, meaning is different now.
+ // IT is finding nearest neighbors its own way
+ //SetVerticalSpacing(part);
+
+ ColPartition* upper = part->SingletonPartner(true);
+ if (upper)
+ part->set_nearest_neighbor_above(upper);
+
+ ColPartition* lower = part->SingletonPartner(false);
+ if (lower)
+ part->set_nearest_neighbor_below(lower);
+ }
+}
+
+// High level interface. Input is an unmarked ColPartitionGrid
+// (namely, clean_part_grid_). Partitions are identified using local
+// information and filter/smoothed. The function exit should contain
+// a good sampling of the table partitions.
+void TableFinder::MarkTablePartitions() {
+ MarkPartitionsUsingLocalInformation();
+#ifndef GRAPHICS_DISABLED
+ if (textord_tablefind_show_mark) {
+ ScrollView* table_win = MakeWindow(300, 300, "Initial Table Partitions");
+ DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE);
+ DisplayColPartitions(table_win, &leader_and_ruling_grid_,
+ ScrollView::AQUAMARINE);
+ }
+#endif
+ FilterFalseAlarms();
+#ifndef GRAPHICS_DISABLED
+ if (textord_tablefind_show_mark) {
+ ScrollView* table_win = MakeWindow(600, 300, "Filtered Table Partitions");
+ DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE);
+ DisplayColPartitions(table_win, &leader_and_ruling_grid_,
+ ScrollView::AQUAMARINE);
+ }
+#endif
+ SmoothTablePartitionRuns();
+#ifndef GRAPHICS_DISABLED
+ if (textord_tablefind_show_mark) {
+ ScrollView* table_win = MakeWindow(900, 300, "Smoothed Table Partitions");
+ DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE);
+ DisplayColPartitions(table_win, &leader_and_ruling_grid_,
+ ScrollView::AQUAMARINE);
+ }
+#endif
+ FilterFalseAlarms();
+#ifndef GRAPHICS_DISABLED
+ if (textord_tablefind_show_mark || textord_show_tables) {
+ ScrollView* table_win = MakeWindow(900, 300, "Final Table Partitions");
+ DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE);
+ DisplayColPartitions(table_win, &leader_and_ruling_grid_,
+ ScrollView::AQUAMARINE);
+ }
+#endif
+}
+
+// These types of partitions are marked as table partitions:
+// 1- Partitions that have at lease one large gap between words
+// 2- Partitions that consist of only one word (no significant gap
+// between components)
+// 3- Partitions that vertically overlap with other partitions within the
+// same column.
+// 4- Partitions with leaders before/after them.
+void TableFinder::MarkPartitionsUsingLocalInformation() {
+ // Iterate the ColPartitions in the grid.
+ GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
+ gsearch(&clean_part_grid_);
+ gsearch.StartFullSearch();
+ ColPartition* part = nullptr;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ if (!part->IsTextType()) // Only consider text partitions
+ continue;
+ // Only consider partitions in dominant font size or smaller
+ if (part->median_height() > kMaxTableCellXheight * global_median_xheight_)
+ continue;
+ // Mark partitions with a large gap, or no significant gap as
+ // table partitions.
+ // Comments: It produces several false alarms at:
+ // - last line of a paragraph (fixed)
+ // - single word section headings
+ // - page headers and footers
+ // - numbered equations
+ // - line drawing regions
+ // TODO(faisal): detect and fix above-mentioned cases
+ if (HasWideOrNoInterWordGap(part) ||
+ HasLeaderAdjacent(*part)) {
+ part->set_table_type();
+ }
+ }
+}
+
+// Check if the partition has at least one large gap between words or no
+// significant gap at all
+bool TableFinder::HasWideOrNoInterWordGap(ColPartition* part) const {
+ // Should only get text partitions.
+ ASSERT_HOST(part->IsTextType());
+ // Blob access
+ BLOBNBOX_CLIST* part_boxes = part->boxes();
+ BLOBNBOX_C_IT it(part_boxes);
+ // Check if this is a relatively small partition (such as a single word)
+ if (part->bounding_box().width() <
+ kMinBoxesInTextPartition * part->median_height() &&
+ part_boxes->length() < kMinBoxesInTextPartition)
+ return true;
+
+ // Variables used to compute inter-blob spacing.
+ int current_x0 = -1;
+ int current_x1 = -1;
+ int previous_x1 = -1;
+ // Stores the maximum gap detected.
+ int largest_partition_gap_found = -1;
+ // Text partition gap limits. If this is text (and not a table),
+ // there should be at least one gap larger than min_gap and no gap
+ // larger than max_gap.
+ const double max_gap = kMaxGapInTextPartition * part->median_height();
+ const double min_gap = kMinMaxGapInTextPartition * part->median_height();
+
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ BLOBNBOX* blob = it.data();
+ current_x0 = blob->bounding_box().left();
+ current_x1 = blob->bounding_box().right();
+ if (previous_x1 != -1) {
+ int gap = current_x0 - previous_x1;
+
+ // TODO(nbeato): Boxes may overlap? Huh?
+ // For example, mag.3B 8003_033.3B.tif in UNLV data. The titles/authors
+ // on the top right of the page are filtered out with this line.
+ // Note 2: Iterating over blobs in a partition, so we are looking for
+ // spacing between the words.
+ if (gap < 0) {
+ // More likely case, the blobs slightly overlap. This can happen
+ // with diacritics (accents) or broken alphabet symbols (characters).
+ // Merge boxes together by taking max of right sides.
+ if (-gap < part->median_height() * kMaxBlobOverlapFactor) {
+ previous_x1 = std::max(previous_x1, current_x1);
+ continue;
+ }
+ // Extreme case, blobs overlap significantly in the same partition...
+ // This should not happen often (if at all), but it does.
+ // TODO(nbeato): investigate cases when this happens.
+ else {
+ // The behavior before was to completely ignore this case.
+ }
+ }
+
+ // If a large enough gap is found, mark it as a table cell (return true)
+ if (gap > max_gap)
+ return true;
+ if (gap > largest_partition_gap_found)
+ largest_partition_gap_found = gap;
+ }
+ previous_x1 = current_x1;
+ }
+ // Since no large gap was found, return false if the partition is too
+ // long to be a data cell
+ if (part->bounding_box().width() >
+ kMaxBoxesInDataPartition * part->median_height() ||
+ part_boxes->length() > kMaxBoxesInDataPartition)
+ return false;
+
+ // A partition may be a single blob. In this case, it's an isolated symbol
+ // or non-text (such as a ruling or image).
+ // Detect these as table partitions? Shouldn't this be case by case?
+ // The behavior before was to ignore this, making max_partition_gap < 0
+ // and implicitly return true. Just making it explicit.
+ if (largest_partition_gap_found == -1)
+ return true;
+
+ // return true if the maximum gap found is smaller than the minimum allowed
+ // max_gap in a text partition. This indicates that there is no significant
+ // space in the partition, hence it is likely a single word.
+ return largest_partition_gap_found < min_gap;
+}
+
+// A criteria for possible tables is that a table may have leaders
+// between data cells. An aggressive solution to find such tables is to
+// explicitly mark partitions that have adjacent leaders.
+// Note that this includes overlapping leaders. However, it does not
+// include leaders in different columns on the page.
+// Possible false-positive will include lists, such as a table of contents.
+// As these arise, the aggressive nature of this search may need to be
+// trimmed down.
+bool TableFinder::HasLeaderAdjacent(const ColPartition& part) {
+ if (part.flow() == BTFT_LEADER)
+ return true;
+ // Search range is left and right bounded by an offset of the
+ // median xheight. This offset is to allow some tolerance to the
+ // the leaders on the page in the event that the alignment is still
+ // a bit off.
+ const TBOX& box = part.bounding_box();
+ const int search_size = kAdjacentLeaderSearchPadding * global_median_xheight_;
+ const int top = box.top() + search_size;
+ const int bottom = box.bottom() - search_size;
+ ColPartitionGridSearch hsearch(&leader_and_ruling_grid_);
+ for (int direction = 0; direction < 2; ++direction) {
+ bool right_to_left = (direction == 0);
+ int x = right_to_left ? box.right() : box.left();
+ hsearch.StartSideSearch(x, bottom, top);
+ ColPartition* leader = nullptr;
+ while ((leader = hsearch.NextSideSearch(right_to_left)) != nullptr) {
+ // The leader could be a horizontal ruling in the grid.
+ // Make sure it is actually a leader.
+ if (leader->flow() != BTFT_LEADER)
+ continue;
+ // This should not happen, they are in different grids.
+ ASSERT_HOST(&part != leader);
+ // Make sure the leader shares a page column with the partition,
+ // otherwise we are spreading across columns.
+ if (!part.IsInSameColumnAs(*leader))
+ break;
+ // There should be a significant vertical overlap
+ if (!leader->VSignificantCoreOverlap(part))
+ continue;
+ // Leader passed all tests, so it is adjacent.
+ return true;
+ }
+ }
+ // No leaders are adjacent to the given partition.
+ return false;
+}
+
+// Filter individual text partitions marked as table partitions
+// consisting of paragraph endings, small section headings, and
+// headers and footers.
+void TableFinder::FilterFalseAlarms() {
+ FilterParagraphEndings();
+ FilterHeaderAndFooter();
+ // TODO(nbeato): Fully justified text as non-table?
+}
+
+void TableFinder::FilterParagraphEndings() {
+ // Detect last line of paragraph
+ // Iterate the ColPartitions in the grid.
+ ColPartitionGridSearch gsearch(&clean_part_grid_);
+ gsearch.StartFullSearch();
+ ColPartition* part = nullptr;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ if (part->type() != PT_TABLE)
+ continue; // Consider only table partitions
+
+ // Paragraph ending should have flowing text above it.
+ ColPartition* upper_part = part->nearest_neighbor_above();
+ if (!upper_part)
+ continue;
+ if (upper_part->type() != PT_FLOWING_TEXT)
+ continue;
+ if (upper_part->bounding_box().width() <
+ 2 * part->bounding_box().width())
+ continue;
+ // Check if its the last line of a paragraph.
+ // In most cases, a paragraph ending should be left-aligned to text line
+ // above it. Sometimes, it could be a 2 line paragraph, in which case
+ // the line above it is indented.
+ // To account for that, check if the partition center is to
+ // the left of the one above it.
+ int mid = (part->bounding_box().left() + part->bounding_box().right()) / 2;
+ int upper_mid = (upper_part->bounding_box().left() +
+ upper_part->bounding_box().right()) / 2;
+ int current_spacing = 0; // spacing of the current line to margin
+ int upper_spacing = 0; // spacing of the previous line to the margin
+ if (left_to_right_language_) {
+ // Left to right languages, use mid - left to figure out the distance
+ // the middle is from the left margin.
+ int left = std::min(part->bounding_box().left(),
+ upper_part->bounding_box().left());
+ current_spacing = mid - left;
+ upper_spacing = upper_mid - left;
+ } else {
+ // Right to left languages, use right - mid to figure out the distance
+ // the middle is from the right margin.
+ int right = std::max(part->bounding_box().right(),
+ upper_part->bounding_box().right());
+ current_spacing = right - mid;
+ upper_spacing = right - upper_mid;
+ }
+ if (current_spacing * kParagraphEndingPreviousLineRatio > upper_spacing)
+ continue;
+
+ // Paragraphs should have similar fonts.
+ if (!part->MatchingSizes(*upper_part) ||
+ !part->MatchingStrokeWidth(*upper_part, kStrokeWidthFractionalTolerance,
+ kStrokeWidthConstantTolerance)) {
+ continue;
+ }
+
+ // The last line of a paragraph should be left aligned.
+ // TODO(nbeato): This would be untrue if the text was right aligned.
+ // How often is that?
+ if (part->space_to_left() >
+ kMaxParagraphEndingLeftSpaceMultiple * part->median_height())
+ continue;
+ // The line above it should be right aligned (assuming justified format).
+ // Since we can't assume justified text, we compare whitespace to text.
+ // The above line should have majority spanning text (or the current
+ // line could have fit on the previous line). So compare
+ // whitespace to text.
+ if (upper_part->bounding_box().width() <
+ kMinParagraphEndingTextToWhitespaceRatio * upper_part->space_to_right())
+ continue;
+
+ // Ledding above the line should be less than ledding below
+ if (part->space_above() >= part->space_below() ||
+ part->space_above() > 2 * global_median_ledding_)
+ continue;
+
+ // If all checks failed, it is probably text.
+ part->clear_table_type();
+ }
+}
+
+void TableFinder::FilterHeaderAndFooter() {
+ // Consider top-most text colpartition as header and bottom most as footer
+ ColPartition* header = nullptr;
+ ColPartition* footer = nullptr;
+ int max_top = INT32_MIN;
+ int min_bottom = INT32_MAX;
+ ColPartitionGridSearch gsearch(&clean_part_grid_);
+ gsearch.StartFullSearch();
+ ColPartition* part = nullptr;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ if (!part->IsTextType())
+ continue; // Consider only text partitions
+ int top = part->bounding_box().top();
+ int bottom = part->bounding_box().bottom();
+ if (top > max_top) {
+ max_top = top;
+ header = part;
+ }
+ if (bottom < min_bottom) {
+ min_bottom = bottom;
+ footer = part;
+ }
+ }
+ if (header)
+ header->clear_table_type();
+ if (footer)
+ footer->clear_table_type();
+}
+
+// Mark all ColPartitions as table cells that have a table cell above
+// and below them
+// TODO(faisal): This is too aggressive at the moment. The method needs to
+// consider spacing and alignment as well. Detection of false alarm table cells
+// should also be done as part of it.
+void TableFinder::SmoothTablePartitionRuns() {
+ // Iterate the ColPartitions in the grid.
+ ColPartitionGridSearch gsearch(&clean_part_grid_);
+ gsearch.StartFullSearch();
+ ColPartition* part = nullptr;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ if (part->type() >= PT_TABLE || part->type() == PT_UNKNOWN)
+ continue; // Consider only text partitions
+ ColPartition* upper_part = part->nearest_neighbor_above();
+ ColPartition* lower_part = part->nearest_neighbor_below();
+ if (!upper_part || !lower_part)
+ continue;
+ if (upper_part->type() == PT_TABLE && lower_part->type() == PT_TABLE)
+ part->set_table_type();
+ }
+
+ // Pass 2, do the opposite. If both the upper and lower neighbors
+ // exist and are not tables, this probably shouldn't be a table.
+ gsearch.StartFullSearch();
+ part = nullptr;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ if (part->type() != PT_TABLE)
+ continue; // Consider only text partitions
+ ColPartition* upper_part = part->nearest_neighbor_above();
+ ColPartition* lower_part = part->nearest_neighbor_below();
+
+ // table can't be by itself
+ if ((upper_part && upper_part->type() != PT_TABLE) &&
+ (lower_part && lower_part->type() != PT_TABLE)) {
+ part->clear_table_type();
+ }
+ }
+}
+
+// Set the type of a column segment based on the ratio of table to text cells
+void TableFinder::SetColumnsType(ColSegment_LIST* column_blocks) {
+ ColSegment_IT it(column_blocks);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ ColSegment* seg = it.data();
+ TBOX box = seg->bounding_box();
+ int num_table_cells = 0;
+ int num_text_cells = 0;
+ GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
+ rsearch(&clean_part_grid_);
+ rsearch.SetUniqueMode(true);
+ rsearch.StartRectSearch(box);
+ ColPartition* part = nullptr;
+ while ((part = rsearch.NextRectSearch()) != nullptr) {
+ if (part->type() == PT_TABLE) {
+ num_table_cells++;
+ } else if (part->type() == PT_FLOWING_TEXT) {
+ num_text_cells++;
+ }
+ }
+ // If a column block has no text or table partition in it, it is not needed
+ // for table detection.
+ if (!num_table_cells && !num_text_cells) {
+ delete it.extract();
+ } else {
+ seg->set_num_table_cells(num_table_cells);
+ seg->set_num_text_cells(num_text_cells);
+ // set column type based on the ratio of table to text cells
+ seg->set_type();
+ }
+ }
+}
+
+// Move column blocks to grid
+void TableFinder::MoveColSegmentsToGrid(ColSegment_LIST *segments,
+ ColSegmentGrid *col_seg_grid) {
+ ColSegment_IT it(segments);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ ColSegment* seg = it.extract();
+ col_seg_grid->InsertBBox(true, true, seg);
+ }
+}
+
+// Merge column blocks if a split is detected due to the presence of a
+// table. A text block is considered split if it has multiple
+// neighboring blocks above/below it, and at least one of the
+// neighboring blocks is of table type (has a high density of table
+// partitions). In this case neighboring blocks in the direction
+// (above/below) of the table block are merged with the text block.
+
+// Comment: This method does not handle split due to a full page table
+// since table columns in this case do not have a text column on which
+// split decision can be based.
+void TableFinder::GridMergeColumnBlocks() {
+ int margin = gridsize();
+
+ // Iterate the Column Blocks in the grid.
+ GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT>
+ gsearch(&col_seg_grid_);
+ gsearch.StartFullSearch();
+ ColSegment* seg;
+ while ((seg = gsearch.NextFullSearch()) != nullptr) {
+ if (seg->type() != COL_TEXT)
+ continue; // only consider text blocks for split detection
+ bool neighbor_found = false;
+ bool modified = false; // Modified at least once
+ // keep expanding current box as long as neighboring table columns
+ // are found above or below it.
+ do {
+ TBOX box = seg->bounding_box();
+ // slightly expand the search region vertically
+ int top_range = std::min(box.top() + margin, static_cast<int>(tright().y()));
+ int bottom_range = std::max(box.bottom() - margin, static_cast<int>(bleft().y()));
+ box.set_top(top_range);
+ box.set_bottom(bottom_range);
+ neighbor_found = false;
+ GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT>
+ rectsearch(&col_seg_grid_);
+ rectsearch.StartRectSearch(box);
+ ColSegment* neighbor = nullptr;
+ while ((neighbor = rectsearch.NextRectSearch()) != nullptr) {
+ if (neighbor == seg)
+ continue;
+ const TBOX& neighbor_box = neighbor->bounding_box();
+ // If the neighbor box significantly overlaps with the current
+ // box (due to the expansion of the current box in the
+ // previous iteration of this loop), remove the neighbor box
+ // and expand the current box to include it.
+ if (neighbor_box.overlap_fraction(box) >= 0.9) {
+ seg->InsertBox(neighbor_box);
+ modified = true;
+ rectsearch.RemoveBBox();
+ gsearch.RepositionIterator();
+ delete neighbor;
+ continue;
+ }
+ // Only expand if the neighbor box is of table type
+ if (neighbor->type() != COL_TABLE)
+ continue;
+ // Insert the neighbor box into the current column block
+ if (neighbor_box.major_x_overlap(box) &&
+ !box.contains(neighbor_box)) {
+ seg->InsertBox(neighbor_box);
+ neighbor_found = true;
+ modified = true;
+ rectsearch.RemoveBBox();
+ gsearch.RepositionIterator();
+ delete neighbor;
+ }
+ }
+ } while (neighbor_found);
+ if (modified) {
+ // Because the box has changed, it has to be removed first.
+ gsearch.RemoveBBox();
+ col_seg_grid_.InsertBBox(true, true, seg);
+ gsearch.RepositionIterator();
+ }
+ }
+}
+
+// Group horizontally overlapping table partitions into table columns.
+// TODO(faisal): This is too aggressive at the moment. The method should
+// consider more attributes to group table partitions together. Some common
+// errors are:
+// 1- page number is merged with a table column above it even
+// if there is a large vertical gap between them.
+// 2- column headers go on to catch one of the columns arbitrarily
+// 3- an isolated noise blob near page top or bottom merges with the table
+// column below/above it
+// 4- cells from two vertically adjacent tables merge together to make a
+// single column resulting in merging of the two tables
+void TableFinder::GetTableColumns(ColSegment_LIST *table_columns) {
+ ColSegment_IT it(table_columns);
+ // Iterate the ColPartitions in the grid.
+ GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
+ gsearch(&clean_part_grid_);
+ gsearch.StartFullSearch();
+ ColPartition* part;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ if (part->inside_table_column() || part->type() != PT_TABLE)
+ continue; // prevent a partition to be assigned to multiple columns
+ const TBOX& box = part->bounding_box();
+ auto* col = new ColSegment();
+ col->InsertBox(box);
+ part->set_inside_table_column(true);
+ // Start a search below the current cell to find bottom neighbours
+ // Note: a full search will always process things above it first, so
+ // this should be starting at the highest cell and working its way down.
+ GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
+ vsearch(&clean_part_grid_);
+ vsearch.StartVerticalSearch(box.left(), box.right(), box.bottom());
+ ColPartition* neighbor = nullptr;
+ bool found_neighbours = false;
+ while ((neighbor = vsearch.NextVerticalSearch(true)) != nullptr) {
+ // only consider neighbors not assigned to any column yet
+ if (neighbor->inside_table_column())
+ continue;
+ // Horizontal lines should not break the flow
+ if (neighbor->IsHorizontalLine())
+ continue;
+ // presence of a non-table neighbor marks the end of current
+ // table column
+ if (neighbor->type() != PT_TABLE)
+ break;
+ // add the neighbor partition to the table column
+ const TBOX& neighbor_box = neighbor->bounding_box();
+ col->InsertBox(neighbor_box);
+ neighbor->set_inside_table_column(true);
+ found_neighbours = true;
+ }
+ if (found_neighbours) {
+ it.add_after_then_move(col);
+ } else {
+ part->set_inside_table_column(false);
+ delete col;
+ }
+ }
+}
+
+// Mark regions in a column that are x-bounded by the column boundaries and
+// y-bounded by the table columns' projection on the y-axis as table regions
+void TableFinder::GetTableRegions(ColSegment_LIST* table_columns,
+ ColSegment_LIST* table_regions) {
+ ColSegment_IT cit(table_columns);
+ ColSegment_IT rit(table_regions);
+ // Iterate through column blocks
+ GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT>
+ gsearch(&col_seg_grid_);
+ gsearch.StartFullSearch();
+ ColSegment* part;
+ int page_height = tright().y() - bleft().y();
+ ASSERT_HOST(page_height > 0);
+ // create a bool array to hold projection on y-axis
+ bool* table_region = new bool[page_height];
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ const TBOX& part_box = part->bounding_box();
+ // reset the projection array
+ for (int i = 0; i < page_height; i++) {
+ table_region[i] = false;
+ }
+ // iterate through all table columns to find regions in the current
+ // page column block
+ cit.move_to_first();
+ for (cit.mark_cycle_pt(); !cit.cycled_list(); cit.forward()) {
+ TBOX col_box = cit.data()->bounding_box();
+ // find intersection region of table column and page column
+ TBOX intersection_box = col_box.intersection(part_box);
+ // project table column on the y-axis
+ for (int i = intersection_box.bottom(); i < intersection_box.top(); i++) {
+ table_region[i - bleft().y()] = true;
+ }
+ }
+ // set x-limits of table regions to page column width
+ TBOX current_table_box;
+ current_table_box.set_left(part_box.left());
+ current_table_box.set_right(part_box.right());
+ // go through the y-axis projection to find runs of table
+ // regions. Each run makes one table region.
+ for (int i = 1; i < page_height; i++) {
+ // detect start of a table region
+ if (!table_region[i - 1] && table_region[i]) {
+ current_table_box.set_bottom(i + bleft().y());
+ }
+ // TODO(nbeato): Is it guaranteed that the last row is not a table region?
+ // detect end of a table region
+ if (table_region[i - 1] && !table_region[i]) {
+ current_table_box.set_top(i + bleft().y());
+ if (!current_table_box.null_box()) {
+ auto* seg = new ColSegment();
+ seg->InsertBox(current_table_box);
+ rit.add_after_then_move(seg);
+ }
+ }
+ }
+ }
+ delete[] table_region;
+}
+
+// Merge table regions corresponding to tables spanning multiple columns if
+// there is a colpartition (horizontal ruling line or normal text) that
+// touches both regions.
+// TODO(faisal): A rare error occurs if there are two horizontally adjacent
+// tables with aligned ruling lines. In this case, line finder returns a
+// single line and hence the tables get merged together
+void TableFinder::GridMergeTableRegions() {
+ // Iterate the table regions in the grid.
+ GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT>
+ gsearch(&table_grid_);
+ gsearch.StartFullSearch();
+ ColSegment* seg = nullptr;
+ while ((seg = gsearch.NextFullSearch()) != nullptr) {
+ bool neighbor_found = false;
+ bool modified = false; // Modified at least once
+ do {
+ // Start a rectangle search x-bounded by the image and y by the table
+ const TBOX& box = seg->bounding_box();
+ TBOX search_region(box);
+ search_region.set_left(bleft().x());
+ search_region.set_right(tright().x());
+ neighbor_found = false;
+ GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT>
+ rectsearch(&table_grid_);
+ rectsearch.StartRectSearch(search_region);
+ ColSegment* neighbor = nullptr;
+ while ((neighbor = rectsearch.NextRectSearch()) != nullptr) {
+ if (neighbor == seg)
+ continue;
+ const TBOX& neighbor_box = neighbor->bounding_box();
+ // Check if a neighbor box has a large overlap with the table
+ // region. This may happen as a result of merging two table
+ // regions in the previous iteration.
+ if (neighbor_box.overlap_fraction(box) >= 0.9) {
+ seg->InsertBox(neighbor_box);
+ rectsearch.RemoveBBox();
+ gsearch.RepositionIterator();
+ delete neighbor;
+ modified = true;
+ continue;
+ }
+ // Check if two table regions belong together based on a common
+ // horizontal ruling line
+ if (BelongToOneTable(box, neighbor_box)) {
+ seg->InsertBox(neighbor_box);
+ neighbor_found = true;
+ modified = true;
+ rectsearch.RemoveBBox();
+ gsearch.RepositionIterator();
+ delete neighbor;
+ }
+ }
+ } while (neighbor_found);
+ if (modified) {
+ // Because the box has changed, it has to be removed first.
+ gsearch.RemoveBBox();
+ table_grid_.InsertBBox(true, true, seg);
+ gsearch.RepositionIterator();
+ }
+ }
+}
+
+// Decide if two table regions belong to one table based on a common
+// horizontal ruling line or another colpartition
+bool TableFinder::BelongToOneTable(const TBOX &box1, const TBOX &box2) {
+ // Check the obvious case. Most likely not true because overlapping boxes
+ // should already be merged, but seems like a good thing to do in case things
+ // change.
+ if (box1.overlap(box2))
+ return true;
+ // Check for ColPartitions spanning both table regions
+ TBOX bbox = box1.bounding_union(box2);
+ // Start a rect search on bbox
+ GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
+ rectsearch(&clean_part_grid_);
+ rectsearch.StartRectSearch(bbox);
+ ColPartition* part = nullptr;
+ while ((part = rectsearch.NextRectSearch()) != nullptr) {
+ const TBOX& part_box = part->bounding_box();
+ // return true if a colpartition spanning both table regions is found
+ if (part_box.overlap(box1) && part_box.overlap(box2) &&
+ !part->IsImageType())
+ return true;
+ }
+ return false;
+}
+
+// Adjust table boundaries by:
+// - building a tight bounding box around all ColPartitions contained in it.
+// - expanding table boundaries to include all colpartitions that overlap the
+// table by more than half of their area
+// - expanding table boundaries to include nearby horizontal rule lines
+// - expanding table vertically to include left out column headers
+// TODO(faisal): Expansion of table boundaries is quite aggressive. It usually
+// makes following errors:
+// 1- horizontal lines consisting of underlines are included in the table if
+// they are close enough
+// 2- horizontal lines originating from noise tend to get merged with a table
+// near the top of the page
+// 3- the criteria for including horizontal lines is very generous. Many times
+// horizontal lines separating headers and footers get merged with a
+// single-column table in a multi-column page thereby including text
+// from the neighboring column inside the table
+// 4- the criteria for including left out column headers also tends to
+// occasionally include text-lines above the tables, typically from
+// table caption
+void TableFinder::AdjustTableBoundaries() {
+ // Iterate the table regions in the grid
+ ColSegment_CLIST adjusted_tables;
+ ColSegment_C_IT it(&adjusted_tables);
+ ColSegmentGridSearch gsearch(&table_grid_);
+ gsearch.StartFullSearch();
+ ColSegment* table = nullptr;
+ while ((table = gsearch.NextFullSearch()) != nullptr) {
+ const TBOX& table_box = table->bounding_box();
+ TBOX grown_box = table_box;
+ GrowTableBox(table_box, &grown_box);
+ // To prevent a table from expanding again, do not insert the
+ // modified box back to the grid. Instead move it to a list and
+ // and remove it from the grid. The list is moved later back to the grid.
+ if (!grown_box.null_box()) {
+ auto* col = new ColSegment();
+ col->InsertBox(grown_box);
+ it.add_after_then_move(col);
+ }
+ gsearch.RemoveBBox();
+ delete table;
+ }
+ // clear table grid to move final tables in it
+ // TODO(nbeato): table_grid_ should already be empty. The above loop
+ // removed everything. Maybe just assert it is empty?
+ table_grid_.Clear();
+ it.move_to_first();
+ // move back final tables to table_grid_
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ ColSegment* seg = it.extract();
+ table_grid_.InsertBBox(true, true, seg);
+ }
+}
+
+void TableFinder::GrowTableBox(const TBOX& table_box, TBOX* result_box) {
+ // TODO(nbeato): The growing code is a bit excessive right now.
+ // By removing these lines, the partitions considered need
+ // to have some overlap or be special cases. These lines could
+ // be added again once a check is put in place to make sure that
+ // growing tables don't stomp on a lot of non-table partitions.
+
+ // search for horizontal ruling lines within the vertical margin
+ // int vertical_margin = kRulingVerticalMargin * gridsize();
+ TBOX search_box = table_box;
+ // int top = MIN(search_box.top() + vertical_margin, tright().y());
+ // int bottom = MAX(search_box.bottom() - vertical_margin, bleft().y());
+ // search_box.set_top(top);
+ // search_box.set_bottom(bottom);
+
+ GrowTableToIncludePartials(table_box, search_box, result_box);
+ GrowTableToIncludeLines(table_box, search_box, result_box);
+ IncludeLeftOutColumnHeaders(result_box);
+}
+
+// Grow a table by increasing the size of the box to include
+// partitions with significant overlap with the table.
+void TableFinder::GrowTableToIncludePartials(const TBOX& table_box,
+ const TBOX& search_range,
+ TBOX* result_box) {
+ // Rulings are in a different grid, so search 2 grids for rulings, text,
+ // and table partitions that are not entirely within the new box.
+ for (int i = 0; i < 2; ++i) {
+ ColPartitionGrid* grid = (i == 0) ? &fragmented_text_grid_ :
+ &leader_and_ruling_grid_;
+ ColPartitionGridSearch rectsearch(grid);
+ rectsearch.StartRectSearch(search_range);
+ ColPartition* part = nullptr;
+ while ((part = rectsearch.NextRectSearch()) != nullptr) {
+ // Only include text and table types.
+ if (part->IsImageType())
+ continue;
+ const TBOX& part_box = part->bounding_box();
+ // Include partition in the table if more than half of it
+ // is covered by the table
+ if (part_box.overlap_fraction(table_box) > kMinOverlapWithTable) {
+ *result_box = result_box->bounding_union(part_box);
+ continue;
+ }
+ }
+ }
+}
+
+// Grow a table by expanding to the extents of significantly
+// overlapping lines.
+void TableFinder::GrowTableToIncludeLines(const TBOX& table_box,
+ const TBOX& search_range,
+ TBOX* result_box) {
+ ColPartitionGridSearch rsearch(&leader_and_ruling_grid_);
+ rsearch.SetUniqueMode(true);
+ rsearch.StartRectSearch(search_range);
+ ColPartition* part = nullptr;
+ while ((part = rsearch.NextRectSearch()) != nullptr) {
+ // TODO(nbeato) This should also do vertical, but column
+ // boundaries are breaking things. This function needs to be
+ // updated to allow vertical lines as well.
+ if (!part->IsLineType())
+ continue;
+ // Avoid the following function call if the result of the
+ // function is irrelevant.
+ const TBOX& part_box = part->bounding_box();
+ if (result_box->contains(part_box))
+ continue;
+ // Include a partially overlapping horizontal line only if the
+ // extra ColPartitions that will be included due to expansion
+ // have large side spacing w.r.t. columns containing them.
+ if (HLineBelongsToTable(*part, table_box))
+ *result_box = result_box->bounding_union(part_box);
+ // TODO(nbeato): Vertical
+ }
+}
+
+// Checks whether the horizontal line belong to the table by looking at the
+// side spacing of extra ColParitions that will be included in the table
+// due to expansion
+bool TableFinder::HLineBelongsToTable(const ColPartition& part,
+ const TBOX& table_box) {
+ if (!part.IsHorizontalLine())
+ return false;
+ const TBOX& part_box = part.bounding_box();
+ if (!part_box.major_x_overlap(table_box))
+ return false;
+ // Do not consider top-most horizontal line since it usually
+ // originates from noise.
+ // TODO(nbeato): I had to comment this out because the ruling grid doesn't
+ // have neighbors solved.
+ // if (!part.nearest_neighbor_above())
+ // return false;
+ const TBOX bbox = part_box.bounding_union(table_box);
+ // In the "unioned table" box (the table extents expanded by the line),
+ // keep track of how many partitions have significant padding to the left
+ // and right. If more than half of the partitions covered by the new table
+ // have significant spacing, the line belongs to the table and the table
+ // grows to include all of the partitions.
+ int num_extra_partitions = 0;
+ int extra_space_to_right = 0;
+ int extra_space_to_left = 0;
+ // Rulings are in a different grid, so search 2 grids for rulings, text,
+ // and table partitions that are introduced by the new box.
+ for (int i = 0; i < 2; ++i) {
+ ColPartitionGrid* grid = (i == 0) ? &clean_part_grid_ :
+ &leader_and_ruling_grid_;
+ // Start a rect search on bbox
+ ColPartitionGridSearch rectsearch(grid);
+ rectsearch.SetUniqueMode(true);
+ rectsearch.StartRectSearch(bbox);
+ ColPartition* extra_part = nullptr;
+ while ((extra_part = rectsearch.NextRectSearch()) != nullptr) {
+ // ColPartition already in table
+ const TBOX& extra_part_box = extra_part->bounding_box();
+ if (extra_part_box.overlap_fraction(table_box) > kMinOverlapWithTable)
+ continue;
+ // Non-text ColPartitions do not contribute
+ if (extra_part->IsImageType())
+ continue;
+ // Consider this partition.
+ num_extra_partitions++;
+ // presence of a table cell is a strong hint, so just increment the scores
+ // without looking at the spacing.
+ if (extra_part->type() == PT_TABLE || extra_part->IsLineType()) {
+ extra_space_to_right++;
+ extra_space_to_left++;
+ continue;
+ }
+ int space_threshold = kSideSpaceMargin * part.median_height();
+ if (extra_part->space_to_right() > space_threshold)
+ extra_space_to_right++;
+ if (extra_part->space_to_left() > space_threshold)
+ extra_space_to_left++;
+ }
+ }
+ // tprintf("%d %d %d\n",
+ // num_extra_partitions,extra_space_to_right,extra_space_to_left);
+ return (extra_space_to_right > num_extra_partitions / 2) ||
+ (extra_space_to_left > num_extra_partitions / 2);
+}
+
+// Look for isolated column headers above the given table box and
+// include them in the table
+void TableFinder::IncludeLeftOutColumnHeaders(TBOX* table_box) {
+ // Start a search above the current table to look for column headers
+ ColPartitionGridSearch vsearch(&clean_part_grid_);
+ vsearch.StartVerticalSearch(table_box->left(), table_box->right(),
+ table_box->top());
+ ColPartition* neighbor = nullptr;
+ ColPartition* previous_neighbor = nullptr;
+ while ((neighbor = vsearch.NextVerticalSearch(false)) != nullptr) {
+ // Max distance to find a table heading.
+ const int max_distance = kMaxColumnHeaderDistance *
+ neighbor->median_height();
+ int table_top = table_box->top();
+ const TBOX& box = neighbor->bounding_box();
+ // Do not continue if the next box is way above
+ if (box.bottom() - table_top > max_distance)
+ break;
+ // Unconditionally include partitions of type TABLE or LINE
+ // TODO(faisal): add some reasonable conditions here
+ if (neighbor->type() == PT_TABLE || neighbor->IsLineType()) {
+ table_box->set_top(box.top());
+ previous_neighbor = nullptr;
+ continue;
+ }
+ // If there are two text partitions, one above the other, without a table
+ // cell on their left or right side, consider them a barrier and quit
+ if (previous_neighbor == nullptr) {
+ previous_neighbor = neighbor;
+ } else {
+ const TBOX& previous_box = previous_neighbor->bounding_box();
+ if (!box.major_y_overlap(previous_box))
+ break;
+ }
+ }
+}
+
+// Remove false alarms consisting of a single column based on their
+// projection on the x-axis. Projection of a real table on the x-axis
+// should have at least one zero-valley larger than the global median
+// x-height of the page.
+void TableFinder::DeleteSingleColumnTables() {
+ int page_width = tright().x() - bleft().x();
+ ASSERT_HOST(page_width > 0);
+ // create an integer array to hold projection on x-axis
+ int* table_xprojection = new int[page_width];
+ // Iterate through all tables in the table grid
+ GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT>
+ table_search(&table_grid_);
+ table_search.StartFullSearch();
+ ColSegment* table;
+ while ((table = table_search.NextFullSearch()) != nullptr) {
+ TBOX table_box = table->bounding_box();
+ // reset the projection array
+ for (int i = 0; i < page_width; i++) {
+ table_xprojection[i] = 0;
+ }
+ // Start a rect search on table_box
+ GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
+ rectsearch(&clean_part_grid_);
+ rectsearch.SetUniqueMode(true);
+ rectsearch.StartRectSearch(table_box);
+ ColPartition* part;
+ while ((part = rectsearch.NextRectSearch()) != nullptr) {
+ if (!part->IsTextType())
+ continue; // Do not consider non-text partitions
+ if (part->flow() == BTFT_LEADER)
+ continue; // Assume leaders are in tables
+ TBOX part_box = part->bounding_box();
+ // Do not consider partitions partially covered by the table
+ if (part_box.overlap_fraction(table_box) < kMinOverlapWithTable)
+ continue;
+ BLOBNBOX_CLIST* part_boxes = part->boxes();
+ BLOBNBOX_C_IT pit(part_boxes);
+
+ // Make sure overlapping blobs don't artificially inflate the number
+ // of rows in the table. This happens frequently with things such as
+ // decimals and split characters. Do this by assuming the column
+ // partition is sorted mostly left to right and just clip
+ // bounding boxes by the previous box's extent.
+ int next_position_to_write = 0;
+
+ for (pit.mark_cycle_pt(); !pit.cycled_list(); pit.forward()) {
+ BLOBNBOX *pblob = pit.data();
+ // ignore blob height for the purpose of projection since we
+ // are only interested in finding valleys
+ int xstart = pblob->bounding_box().left();
+ int xend = pblob->bounding_box().right();
+
+ xstart = std::max(xstart, next_position_to_write);
+ for (int i = xstart; i < xend; i++)
+ table_xprojection[i - bleft().x()]++;
+ next_position_to_write = xend;
+ }
+ }
+ // Find largest valley between two reasonable peaks in the table
+ if (!GapInXProjection(table_xprojection, page_width)) {
+ table_search.RemoveBBox();
+ delete table;
+ }
+ }
+ delete[] table_xprojection;
+}
+
+// Return true if at least one gap larger than the global x-height
+// exists in the horizontal projection
+bool TableFinder::GapInXProjection(int* xprojection, int length) {
+ // Find peak value of the histogram
+ int peak_value = 0;
+ for (int i = 0; i < length; i++) {
+ if (xprojection[i] > peak_value) {
+ peak_value = xprojection[i];
+ }
+ }
+ // Peak value represents the maximum number of horizontally
+ // overlapping colpartitions, so this can be considered as the
+ // number of rows in the table
+ if (peak_value < kMinRowsInTable)
+ return false;
+ double projection_threshold = kSmallTableProjectionThreshold * peak_value;
+ if (peak_value >= kLargeTableRowCount)
+ projection_threshold = kLargeTableProjectionThreshold * peak_value;
+ // Threshold the histogram
+ for (int i = 0; i < length; i++) {
+ xprojection[i] = (xprojection[i] >= projection_threshold) ? 1 : 0;
+ }
+ // Find the largest run of zeros between two ones
+ int largest_gap = 0;
+ int run_start = -1;
+ for (int i = 1; i < length; i++) {
+ // detect start of a run of zeros
+ if (xprojection[i - 1] && !xprojection[i]) {
+ run_start = i;
+ }
+ // detect end of a run of zeros and update the value of largest gap
+ if (run_start != -1 && !xprojection[i - 1] && xprojection[i]) {
+ int gap = i - run_start;
+ if (gap > largest_gap)
+ largest_gap = gap;
+ run_start = -1;
+ }
+ }
+ return largest_gap > kMaxXProjectionGapFactor * global_median_xheight_;
+}
+
+// Given the location of a table "guess", try to overlay a cellular
+// grid in the location, adjusting the boundaries.
+// TODO(nbeato): Falsely introduces:
+// -headers/footers (not any worse, too much overlap destroys cells)
+// -page numbers (not worse, included because maximize margins)
+// -equations (nicely fit into a celluar grid, but more sparsely)
+// -figures (random text box, also sparse)
+// -small left-aligned text areas with overlapping positioned whitespace
+// (rejected before)
+// Overall, this just needs some more work.
+void TableFinder::RecognizeTables() {
+ ScrollView* table_win = nullptr;
+#ifndef GRAPHICS_DISABLED
+ if (textord_show_tables) {
+ table_win = MakeWindow(0, 0, "Table Structure");
+ DisplayColPartitions(table_win, &fragmented_text_grid_,
+ ScrollView::BLUE, ScrollView::LIGHT_BLUE);
+ // table_grid_.DisplayBoxes(table_win);
+ }
+#endif
+
+ TableRecognizer recognizer;
+ recognizer.Init();
+ recognizer.set_line_grid(&leader_and_ruling_grid_);
+ recognizer.set_text_grid(&fragmented_text_grid_);
+ recognizer.set_max_text_height(global_median_xheight_ * 2.0);
+ recognizer.set_min_height(1.5 * gridheight());
+ // Loop over all of the tables and try to fit them.
+ // Store the good tables here.
+ ColSegment_CLIST good_tables;
+ ColSegment_C_IT good_it(&good_tables);
+
+ ColSegmentGridSearch gsearch(&table_grid_);
+ gsearch.StartFullSearch();
+ ColSegment* found_table = nullptr;
+ while ((found_table = gsearch.NextFullSearch()) != nullptr) {
+ gsearch.RemoveBBox();
+
+ // The goal is to make the tables persistent in a list.
+ // When that happens, this will move into the search loop.
+ const TBOX& found_box = found_table->bounding_box();
+ StructuredTable* table_structure = recognizer.RecognizeTable(found_box);
+
+ // Process a table. Good tables are inserted into the grid again later on
+ // We can't change boxes in the grid while it is running a search.
+ if (table_structure != nullptr) {
+#ifndef GRAPHICS_DISABLED
+ if (textord_show_tables) {
+ table_structure->Display(table_win, ScrollView::LIME_GREEN);
+ }
+#endif
+ found_table->set_bounding_box(table_structure->bounding_box());
+ delete table_structure;
+ good_it.add_after_then_move(found_table);
+ } else {
+ delete found_table;
+ }
+ }
+ // TODO(nbeato): MERGE!! There is awesome info now available for merging.
+
+ // At this point, the grid is empty. We can safely insert the good tables
+ // back into grid.
+ for (good_it.mark_cycle_pt(); !good_it.cycled_list(); good_it.forward())
+ table_grid_.InsertBBox(true, true, good_it.extract());
+}
+
+#ifndef GRAPHICS_DISABLED
+
+// Displays the column segments in some window.
+void TableFinder::DisplayColSegments(ScrollView* win,
+ ColSegment_LIST *segments,
+ ScrollView::Color color) {
+ win->Pen(color);
+ win->Brush(ScrollView::NONE);
+ ColSegment_IT it(segments);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ ColSegment* col = it.data();
+ const TBOX& box = col->bounding_box();
+ int left_x = box.left();
+ int right_x = box.right();
+ int top_y = box.top();
+ int bottom_y = box.bottom();
+ win->Rectangle(left_x, bottom_y, right_x, top_y);
+ }
+ win->UpdateWindow();
+}
+
+// Displays the colpartitions using a new coloring on an existing window.
+// Note: This method is only for debug purpose during development and
+// would not be part of checked in code
+void TableFinder::DisplayColPartitions(ScrollView* win,
+ ColPartitionGrid* grid,
+ ScrollView::Color default_color,
+ ScrollView::Color table_color) {
+ ScrollView::Color color = default_color;
+ // Iterate the ColPartitions in the grid.
+ GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
+ gsearch(grid);
+ gsearch.StartFullSearch();
+ ColPartition* part = nullptr;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ color = default_color;
+ if (part->type() == PT_TABLE)
+ color = table_color;
+
+ const TBOX& box = part->bounding_box();
+ int left_x = box.left();
+ int right_x = box.right();
+ int top_y = box.top();
+ int bottom_y = box.bottom();
+ win->Brush(ScrollView::NONE);
+ win->Pen(color);
+ win->Rectangle(left_x, bottom_y, right_x, top_y);
+ }
+ win->UpdateWindow();
+}
+
+void TableFinder::DisplayColPartitions(ScrollView* win,
+ ColPartitionGrid* grid,
+ ScrollView::Color default_color) {
+ DisplayColPartitions(win, grid, default_color, ScrollView::YELLOW);
+}
+
+void TableFinder::DisplayColPartitionConnections(
+ ScrollView* win,
+ ColPartitionGrid* grid,
+ ScrollView::Color color) {
+ // Iterate the ColPartitions in the grid.
+ GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
+ gsearch(grid);
+ gsearch.StartFullSearch();
+ ColPartition* part = nullptr;
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ const TBOX& box = part->bounding_box();
+ int left_x = box.left();
+ int right_x = box.right();
+ int top_y = box.top();
+ int bottom_y = box.bottom();
+
+ ColPartition* upper_part = part->nearest_neighbor_above();
+ if (upper_part) {
+ const TBOX& upper_box = upper_part->bounding_box();
+ int mid_x = (left_x + right_x) / 2;
+ int mid_y = (top_y + bottom_y) / 2;
+ int other_x = (upper_box.left() + upper_box.right()) / 2;
+ int other_y = (upper_box.top() + upper_box.bottom()) / 2;
+ win->Brush(ScrollView::NONE);
+ win->Pen(color);
+ win->Line(mid_x, mid_y, other_x, other_y);
+ }
+ ColPartition* lower_part = part->nearest_neighbor_below();
+ if (lower_part) {
+ const TBOX& lower_box = lower_part->bounding_box();
+ int mid_x = (left_x + right_x) / 2;
+ int mid_y = (top_y + bottom_y) / 2;
+ int other_x = (lower_box.left() + lower_box.right()) / 2;
+ int other_y = (lower_box.top() + lower_box.bottom()) / 2;
+ win->Brush(ScrollView::NONE);
+ win->Pen(color);
+ win->Line(mid_x, mid_y, other_x, other_y);
+ }
+ }
+ win->UpdateWindow();
+}
+
+#endif
+
+// Merge all colpartitions in table regions to make them a single
+// colpartition and revert types of isolated table cells not
+// assigned to any table to their original types.
+void TableFinder::MakeTableBlocks(ColPartitionGrid* grid,
+ ColPartitionSet** all_columns,
+ WidthCallback width_cb) {
+ // Since we have table blocks already, remove table tags from all
+ // colpartitions
+ GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
+ gsearch(grid);
+ gsearch.StartFullSearch();
+ ColPartition* part = nullptr;
+
+ while ((part = gsearch.NextFullSearch()) != nullptr) {
+ if (part->type() == PT_TABLE) {
+ part->clear_table_type();
+ }
+ }
+ // Now make a single colpartition out of each table block and remove
+ // all colpartitions contained within a table
+ GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT>
+ table_search(&table_grid_);
+ table_search.StartFullSearch();
+ ColSegment* table;
+ while ((table = table_search.NextFullSearch()) != nullptr) {
+ const TBOX& table_box = table->bounding_box();
+ // Start a rect search on table_box
+ GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT>
+ rectsearch(grid);
+ rectsearch.StartRectSearch(table_box);
+ ColPartition* part;
+ ColPartition* table_partition = nullptr;
+ while ((part = rectsearch.NextRectSearch()) != nullptr) {
+ // Do not consider image partitions
+ if (!part->IsTextType())
+ continue;
+ TBOX part_box = part->bounding_box();
+ // Include partition in the table if more than half of it
+ // is covered by the table
+ if (part_box.overlap_fraction(table_box) > kMinOverlapWithTable) {
+ rectsearch.RemoveBBox();
+ if (table_partition) {
+ table_partition->Absorb(part, width_cb);
+ } else {
+ table_partition = part;
+ }
+ }
+ }
+ // Insert table colpartition back to part_grid_
+ if (table_partition) {
+ // To match the columns used when transforming to blocks, the new table
+ // partition must have its first and last column set at the grid y that
+ // corresponds to its bottom.
+ const TBOX& table_box = table_partition->bounding_box();
+ int grid_x, grid_y;
+ grid->GridCoords(table_box.left(), table_box.bottom(), &grid_x, &grid_y);
+ table_partition->SetPartitionType(resolution_, all_columns[grid_y]);
+ table_partition->set_table_type();
+ table_partition->set_blob_type(BRT_TEXT);
+ table_partition->set_flow(BTFT_CHAIN);
+ table_partition->SetBlobTypes();
+ grid->InsertBBox(true, true, table_partition);
+ }
+ }
+}
+
+//////// ColSegment code
+////////
+ColSegment::ColSegment()
+ : ELIST_LINK(),
+ num_table_cells_(0),
+ num_text_cells_(0),
+ type_(COL_UNKNOWN) {
+}
+
+// Provides a color for BBGrid to draw the rectangle.
+ScrollView::Color ColSegment::BoxColor() const {
+ const ScrollView::Color kBoxColors[PT_COUNT] = {
+ ScrollView::YELLOW,
+ ScrollView::BLUE,
+ ScrollView::YELLOW,
+ ScrollView::MAGENTA,
+ };
+ return kBoxColors[type_];
+}
+
+// Insert a box into this column segment
+void ColSegment::InsertBox(const TBOX& other) {
+ bounding_box_ = bounding_box_.bounding_union(other);
+}
+
+// Set column segment type based on the ratio of text and table partitions
+// in it.
+void ColSegment::set_type() {
+ if (num_table_cells_ > kTableColumnThreshold * num_text_cells_)
+ type_ = COL_TABLE;
+ else if (num_text_cells_ > num_table_cells_)
+ type_ = COL_TEXT;
+ else
+ type_ = COL_MIXED;
+}
+
+} // namespace tesseract.
diff --git a/tesseract/src/textord/tablefind.h b/tesseract/src/textord/tablefind.h
new file mode 100644
index 00000000..dc6ff932
--- /dev/null
+++ b/tesseract/src/textord/tablefind.h
@@ -0,0 +1,427 @@
+///////////////////////////////////////////////////////////////////////
+// File: tablefind.h
+// Description: Helper classes to find tables from ColPartitions.
+// Author: Faisal Shafait (faisal.shafait@dfki.de)
+//
+// (C) Copyright 2009, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TEXTORD_TABLEFIND_H_
+#define TESSERACT_TEXTORD_TABLEFIND_H_
+
+#include "colpartitiongrid.h"
+#include "elst.h"
+#include "rect.h"
+
+namespace tesseract {
+
+// Possible types for a column segment.
+enum ColSegType {
+ COL_UNKNOWN,
+ COL_TEXT,
+ COL_TABLE,
+ COL_MIXED,
+ COL_COUNT
+};
+
+class ColPartitionSet;
+
+// ColSegment holds rectangular blocks that represent segmentation of a page
+// into regions containing single column text/table.
+class ColSegment;
+ELISTIZEH(ColSegment)
+CLISTIZEH(ColSegment)
+
+class ColSegment : public ELIST_LINK {
+ public:
+ ColSegment();
+ ~ColSegment() = default;
+
+ // Simple accessors and mutators
+ const TBOX& bounding_box() const {
+ return bounding_box_;
+ }
+
+ void set_top(int y) {
+ bounding_box_.set_top(y);
+ }
+
+ void set_bottom(int y) {
+ bounding_box_.set_bottom(y);
+ }
+
+ void set_left(int x) {
+ bounding_box_.set_left(x);
+ }
+
+ void set_right(int x) {
+ bounding_box_.set_right(x);
+ }
+
+ void set_bounding_box(const TBOX& other) {
+ bounding_box_ = other;
+ }
+
+ int get_num_table_cells() const {
+ return num_table_cells_;
+ }
+
+ // set the number of table colpartitions covered by the bounding_box_
+ void set_num_table_cells(int n) {
+ num_table_cells_ = n;
+ }
+
+ int get_num_text_cells() const {
+ return num_text_cells_;
+ }
+
+ // set the number of text colpartitions covered by the bounding_box_
+ void set_num_text_cells(int n) {
+ num_text_cells_ = n;
+ }
+
+ ColSegType type() const {
+ return type_;
+ }
+
+ // set the type of the block based on the ratio of table to text
+ // colpartitions covered by it.
+ void set_type();
+
+ // Provides a color for BBGrid to draw the rectangle.
+ ScrollView::Color BoxColor() const;
+
+ // Insert a rectangle into bounding_box_
+ void InsertBox(const TBOX& other);
+
+ private:
+ TBOX bounding_box_; // bounding box
+ int num_table_cells_;
+ int num_text_cells_;
+ ColSegType type_;
+};
+
+// Typedef BBGrid of ColSegments
+using ColSegmentGrid = BBGrid<ColSegment,
+ ColSegment_CLIST,
+ ColSegment_C_IT>;
+using ColSegmentGridSearch = GridSearch<ColSegment,
+ ColSegment_CLIST,
+ ColSegment_C_IT>;
+
+// TableFinder is a utility class to find a set of tables given a set of
+// ColPartitions and Columns. The TableFinder will mark candidate ColPartitions
+// based on research in "Table Detection in Heterogeneous Documents".
+// Usage flow is as follows:
+// TableFinder finder;
+// finder.InsertCleanPartitions(/* grid info */)
+// finder.LocateTables(/* ColPartitions and Columns */);
+// finder.Update TODO(nbeato)
+class TESS_API TableFinder {
+ public:
+ // Constructor is simple initializations
+ TableFinder();
+ ~TableFinder();
+
+ // Set the resolution of the connected components in ppi.
+ void set_resolution(int resolution) {
+ resolution_ = resolution;
+ }
+ // Change the reading order. Initially it is left to right.
+ void set_left_to_right_language(bool order);
+
+ // Initialize
+ void Init(int grid_size, const ICOORD& bottom_left, const ICOORD& top_right);
+
+ // Copy cleaned partitions from ColumnFinder's part_grid_ to this
+ // clean_part_grid_ and insert dot-like noise into period_grid_.
+ // It resizes the grids in this object to the dimensions of grid.
+ void InsertCleanPartitions(ColPartitionGrid* grid, TO_BLOCK* block);
+
+ // High level function to perform table detection
+ // Finds tables and updates the grid object with new partitions for the
+ // tables. The columns and width callbacks are used to merge tables.
+ // The reskew argument is only used to write the tables to the out.png
+ // if that feature is enabled.
+ void LocateTables(ColPartitionGrid* grid,
+ ColPartitionSet** columns,
+ WidthCallback width_cb,
+ const FCOORD& reskew);
+
+ protected:
+ // Access for the grid dimensions.
+ // The results will not be correct until InsertCleanPartitions
+ // has been called. The values are taken from the grid passed as an argument
+ // to that function.
+ int gridsize() const;
+ int gridwidth() const;
+ int gridheight() const;
+ const ICOORD& bleft() const;
+ const ICOORD& tright() const;
+
+ // Makes a window for debugging, see BBGrid
+ ScrollView* MakeWindow(int x, int y, const char* window_name);
+
+ //////// Functions to insert objects from the grid into the table finder.
+ //////// In all cases, ownership is transferred to the table finder.
+ // Inserts text into the table finder.
+ void InsertTextPartition(ColPartition* part);
+ void InsertFragmentedTextPartition(ColPartition* part);
+ void InsertLeaderPartition(ColPartition* part);
+ void InsertRulingPartition(ColPartition* part);
+ void InsertImagePartition(ColPartition* part);
+ void SplitAndInsertFragmentedTextPartition(ColPartition* part);
+ bool AllowTextPartition(const ColPartition& part) const;
+ bool AllowBlob(const BLOBNBOX& blob) const;
+
+ //////// Functions that manipulate ColPartitions in the part_grid_ /////
+ //////// to find tables.
+ ////////
+
+ // Utility function to move segments to col_seg_grid
+ // Note: Move includes ownership,
+ // so segments will be be owned by col_seg_grid
+ void MoveColSegmentsToGrid(ColSegment_LIST* segments,
+ ColSegmentGrid* col_seg_grid);
+
+ //////// Set up code to run during table detection to correctly
+ //////// initialize variables on column partitions that are used later.
+ ////////
+
+ // Initialize the grid and partitions
+ void InitializePartitions(ColPartitionSet** all_columns);
+
+ // Set left, right and top, bottom spacings of each colpartition.
+ // Left/right spacings are w.r.t the column boundaries
+ // Top/bottom spacings are w.r.t. previous and next colpartitions
+ static void SetPartitionSpacings(ColPartitionGrid* grid,
+ ColPartitionSet** all_columns);
+
+ // Set spacing and closest neighbors above and below a given colpartition.
+ void SetVerticalSpacing(ColPartition* part);
+
+ // Set global spacing estimates. This function is dependent on the
+ // partition spacings. So make sure SetPartitionSpacings is called
+ // on the same grid before this.
+ void SetGlobalSpacings(ColPartitionGrid* grid);
+ // Access to the global median xheight. The xheight is the height
+ // of a lowercase 'x' character on the page. This can be viewed as the
+ // average height of a lowercase letter in a textline. As a result
+ // it is used to make assumptions about spacing between words and
+ // table cells.
+ void set_global_median_xheight(int xheight);
+ // Access to the global median blob width. The width is useful
+ // when deciding if a partition is noise.
+ void set_global_median_blob_width(int width);
+ // Access to the global median ledding. The ledding is the distance between
+ // two adjacent text lines. This value can be used to get a rough estimate
+ // for the amount of space between two lines of text. As a result, it
+ // is used to calculate appropriate spacing between adjacent rows of text.
+ void set_global_median_ledding(int ledding);
+
+ // Updates the nearest neighbors for each ColPartition in clean_part_grid_.
+ // The neighbors are most likely SingletonPartner calls after the neighbors
+ // are assigned. This is hear until it is decided to remove the
+ // nearest_neighbor code in ColPartition
+ void FindNeighbors();
+
+ //////// Functions to mark candidate column partitions as tables.
+ //////// Tables are marked as described in
+ //////// Table Detection in Heterogeneous Documents (2010, Shafait & Smith)
+ ////////
+
+ // High level function to mark partitions as table rows/cells.
+ // When this function is done, the column partitions in clean_part_grid_
+ // should mostly be marked as tables.
+ void MarkTablePartitions();
+ // Marks partitions given a local view of a single partition
+ void MarkPartitionsUsingLocalInformation();
+ /////// Heuristics for local marking
+ // Check if the partition has at least one large gap between words or no
+ // significant gap at all
+ // TODO(nbeato): Make const, prevented because blobnbox array access
+ bool HasWideOrNoInterWordGap(ColPartition* part) const;
+ // Checks if a partition is adjacent to leaders on the page
+ bool HasLeaderAdjacent(const ColPartition& part);
+ // Filter individual text partitions marked as table partitions
+ // consisting of paragraph endings, small section headings, and
+ // headers and footers.
+ void FilterFalseAlarms();
+ void FilterParagraphEndings();
+ void FilterHeaderAndFooter();
+ // Mark all ColPartitions as table cells that have a table cell above
+ // and below them
+ void SmoothTablePartitionRuns();
+
+ //////// Functions to create bounding boxes (ColSegment) objects for
+ //////// the columns on the page. The columns are not necessarily
+ //////// vertical lines, meaning if tab stops strongly suggests that
+ //////// a column changes horizontal position, as in the case below,
+ //////// The ColSegment objects will respect that after processing.
+ ////////
+ //////// _____________
+ //////// Ex. | | |
+ //////// |_____|______| 5 boxes: 2 on this line
+ //////// | | | | 3 on this line
+ //////// |___|____|___|
+ ////////
+
+ // Get Column segments from best_columns_
+ void GetColumnBlocks(ColPartitionSet** columns,
+ ColSegment_LIST *col_segments);
+
+ // Group Column segments into consecutive single column regions.
+ void GroupColumnBlocks(ColSegment_LIST *current_segments,
+ ColSegment_LIST *col_segments);
+
+ // Check if two boxes are consecutive within the same column
+ bool ConsecutiveBoxes(const TBOX &b1, const TBOX &b2);
+
+ // Set the ratio of candidate table partitions in each column
+ void SetColumnsType(ColSegment_LIST* col_segments);
+
+ // Merge Column Blocks that were split due to the presence of a table
+ void GridMergeColumnBlocks();
+
+ //////// Functions to turn marked ColPartitions into candidate tables
+ //////// using a modified T-Recs++ algorithm described in
+ //////// Applying The T-Recs Table Recognition System
+ //////// To The Business Letter Domain (2001, Kieninger & Dengel)
+ ////////
+
+ // Merge partititons cells into table columns
+ // Differs from paper by just looking at marked table partitions
+ // instead of similarity metric.
+ // Modified section 4.1 of paper.
+ void GetTableColumns(ColSegment_LIST *table_columns);
+
+ // Finds regions within a column that potentially contain a table.
+ // Ie, the table columns from GetTableColumns are turned into boxes
+ // that span the entire page column (using ColumnBlocks found in
+ // earlier functions) in the x direction and the min/max extent of
+ // overlapping table columns in the y direction.
+ // Section 4.2 of paper.
+ void GetTableRegions(ColSegment_LIST *table_columns,
+ ColSegment_LIST *table_regions);
+
+
+ //////// Functions to "patch up" found tables
+ ////////
+
+ // Merge table regions corresponding to tables spanning multiple columns
+ void GridMergeTableRegions();
+ bool BelongToOneTable(const TBOX &box1, const TBOX &box2);
+
+ // Adjust table boundaries by building a tight bounding box around all
+ // ColPartitions contained in it.
+ void AdjustTableBoundaries();
+
+ // Grows a table to include partitions that are partially covered
+ // by the table. This includes lines and text. It does not include
+ // noise or images.
+ // On entry, result_box is the minimum size of the result. The results of the
+ // function will union the actual result with result_box.
+ void GrowTableBox(const TBOX& table_box, TBOX* result_box);
+ // Grow a table by increasing the size of the box to include
+ // partitions with significant overlap with the table.
+ void GrowTableToIncludePartials(const TBOX& table_box,
+ const TBOX& search_range,
+ TBOX* result_box);
+ // Grow a table by expanding to the extents of significantly
+ // overlapping lines.
+ void GrowTableToIncludeLines(const TBOX& table_box, const TBOX& search_range,
+ TBOX* result_box);
+ // Checks whether the horizontal line belong to the table by looking at the
+ // side spacing of extra ColParitions that will be included in the table
+ // due to expansion
+ bool HLineBelongsToTable(const ColPartition& part, const TBOX& table_box);
+
+ // Look for isolated column headers above the given table box and
+ // include them in the table
+ void IncludeLeftOutColumnHeaders(TBOX* table_box);
+
+ // Remove false alarms consisting of a single column
+ void DeleteSingleColumnTables();
+
+ // Return true if at least one gap larger than the global x-height
+ // exists in the horizontal projection
+ bool GapInXProjection(int* xprojection, int length);
+
+ //////// Recognize the tables.
+ ////////
+ // This function will run the table recognizer and try to find better
+ // bounding boxes. The structures of the tables never leave this function
+ // right now. It just tries to prune and merge tables based on info it
+ // has available.
+ void RecognizeTables();
+
+ //////// Debugging functions. Render different structures to GUI
+ //////// for visual debugging / intuition.
+ ////////
+
+ // Displays Colpartitions marked as table row. Overlays them on top of
+ // part_grid_.
+ void DisplayColSegments(ScrollView* win, ColSegment_LIST *cols,
+ ScrollView::Color color);
+
+ // Displays the colpartitions using a new coloring on an existing window.
+ // Note: This method is only for debug purpose during development and
+ // would not be part of checked in code
+ void DisplayColPartitions(ScrollView* win, ColPartitionGrid* grid,
+ ScrollView::Color text_color,
+ ScrollView::Color table_color);
+ void DisplayColPartitions(ScrollView* win, ColPartitionGrid* grid,
+ ScrollView::Color default_color);
+ void DisplayColPartitionConnections(ScrollView* win,
+ ColPartitionGrid* grid,
+ ScrollView::Color default_color);
+
+ // Merge all colpartitions in table regions to make them a single
+ // colpartition and revert types of isolated table cells not
+ // assigned to any table to their original types.
+ void MakeTableBlocks(ColPartitionGrid* grid,
+ ColPartitionSet** columns,
+ WidthCallback width_cb);
+
+ /////////////////////////////////////////////////
+ // Useful objects used during table find process.
+ /////////////////////////////////////////////////
+ // Resolution of the connected components in ppi.
+ int resolution_;
+ // Estimate of median x-height over the page
+ int global_median_xheight_;
+ // Estimate of the median blob width on the page
+ int global_median_blob_width_;
+ // Estimate of median leading on the page
+ int global_median_ledding_;
+ // Grid to hold cleaned colpartitions after removing all
+ // colpartitions that consist of only noise blobs, and removing
+ // noise blobs from remaining colpartitions.
+ ColPartitionGrid clean_part_grid_;
+ // Grid contains the leaders and ruling lines.
+ ColPartitionGrid leader_and_ruling_grid_;
+ // Grid contains the broken down column partitions. It can be thought
+ // of as a "word" grid. However, it usually doesn't break apart text lines.
+ // It does break apart table data (most of the time).
+ ColPartitionGrid fragmented_text_grid_;
+ // Grid of page column blocks
+ ColSegmentGrid col_seg_grid_;
+ // Grid of detected tables
+ ColSegmentGrid table_grid_;
+ // The reading order of text. Defaults to true, for languages such as English.
+ bool left_to_right_language_;
+};
+
+} // namespace tesseract.
+
+#endif // TESSERACT_TEXTORD_TABLEFIND_H_
diff --git a/tesseract/src/textord/tablerecog.cpp b/tesseract/src/textord/tablerecog.cpp
new file mode 100644
index 00000000..af565891
--- /dev/null
+++ b/tesseract/src/textord/tablerecog.cpp
@@ -0,0 +1,1067 @@
+///////////////////////////////////////////////////////////////////////
+// File: tablerecog.cpp
+// Description: Helper class to help structure table areas. Given an bounding
+// box from TableFinder, the TableRecognizer should give a
+// StructuredTable (maybe a list in the future) of "good" tables
+// in that area.
+// Author: Nicholas Beato
+// Created: Friday, Aug. 20, 2010
+//
+// (C) Copyright 2009, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "tablerecog.h"
+
+#include <algorithm>
+
+namespace tesseract {
+
+// The amount of space required between the ColPartitions in 2 columns
+// of a non-lined table as a multiple of the median width.
+const double kHorizontalSpacing = 0.30;
+// The amount of space required between the ColPartitions in 2 rows
+// of a non-lined table as multiples of the median height.
+const double kVerticalSpacing = -0.2;
+// The number of cells that the grid lines may intersect.
+// See FindCellSplitLocations for explanation.
+const int kCellSplitRowThreshold = 0;
+const int kCellSplitColumnThreshold = 0;
+// For "lined tables", the number of required lines. Currently a guess.
+const int kLinedTableMinVerticalLines = 3;
+const int kLinedTableMinHorizontalLines = 3;
+// Number of columns required, as a fraction of the most columns found.
+// None of these are tweaked at all.
+const double kRequiredColumns = 0.7;
+// The tolerance for comparing margins of potential tables.
+const double kMarginFactor = 1.1;
+// The first and last row should be consistent cell height.
+// This factor is the first and last row cell height max.
+const double kMaxRowSize = 2.5;
+// Number of filled columns required to form a strong table row.
+// For small tables, this is an absolute number.
+const double kGoodRowNumberOfColumnsSmall[] = { 2, 2, 2, 2, 2, 3, 3 };
+const int kGoodRowNumberOfColumnsSmallSize =
+ sizeof(kGoodRowNumberOfColumnsSmall) / sizeof(double) - 1;
+// For large tables, it is a relative number
+const double kGoodRowNumberOfColumnsLarge = 0.7;
+// The amount of area that must be covered in a cell by ColPartitions to
+// be considered "filled"
+const double kMinFilledArea = 0.35;
+
+////////
+//////// StructuredTable Class
+////////
+
+StructuredTable::StructuredTable()
+ : text_grid_(nullptr),
+ line_grid_(nullptr),
+ is_lined_(false),
+ space_above_(0),
+ space_below_(0),
+ space_left_(0),
+ space_right_(0),
+ median_cell_height_(0),
+ median_cell_width_(0),
+ max_text_height_(INT32_MAX) {
+}
+
+void StructuredTable::Init() {
+}
+
+void StructuredTable::set_text_grid(ColPartitionGrid* text_grid) {
+ text_grid_ = text_grid;
+}
+void StructuredTable::set_line_grid(ColPartitionGrid* line_grid) {
+ line_grid_ = line_grid;
+}
+void StructuredTable::set_max_text_height(int height) {
+ max_text_height_ = height;
+}
+bool StructuredTable::is_lined() const {
+ return is_lined_;
+}
+int StructuredTable::row_count() const {
+ return cell_y_.size() == 0 ? 0 : cell_y_.size() - 1;
+}
+int StructuredTable::column_count() const {
+ return cell_x_.size() == 0 ? 0 : cell_x_.size() - 1;
+}
+int StructuredTable::cell_count() const {
+ return row_count() * column_count();
+}
+void StructuredTable::set_bounding_box(const TBOX& box) {
+ bounding_box_ = box;
+}
+const TBOX& StructuredTable::bounding_box() const {
+ return bounding_box_;
+}
+int StructuredTable::median_cell_height() {
+ return median_cell_height_;
+}
+int StructuredTable::median_cell_width() {
+ return median_cell_width_;
+}
+int StructuredTable::row_height(int row) const {
+ ASSERT_HOST(0 <= row && row < row_count());
+ return cell_y_[row + 1] - cell_y_[row];
+}
+int StructuredTable::column_width(int column) const {
+ ASSERT_HOST(0 <= column && column < column_count());
+ return cell_x_[column + 1] - cell_x_[column];
+}
+int StructuredTable::space_above() const {
+ return space_above_;
+}
+int StructuredTable::space_below() const {
+ return space_below_;
+}
+
+// At this point, we know that the lines are contained
+// by the box (by FindLinesBoundingBox).
+// So try to find the cell structure and make sure it works out.
+// The assumption is that all lines span the table. If this
+// assumption fails, the VerifyLinedTable method will
+// abort the lined table. The TableRecognizer will fall
+// back on FindWhitespacedStructure.
+bool StructuredTable::FindLinedStructure() {
+ ClearStructure();
+
+ // Search for all of the lines in the current box.
+ // Update the cellular structure with the exact lines.
+ ColPartitionGridSearch box_search(line_grid_);
+ box_search.SetUniqueMode(true);
+ box_search.StartRectSearch(bounding_box_);
+ ColPartition* line = nullptr;
+
+ while ((line = box_search.NextRectSearch()) != nullptr) {
+ if (line->IsHorizontalLine())
+ cell_y_.push_back(line->MidY());
+ if (line->IsVerticalLine())
+ cell_x_.push_back(line->MidX());
+ }
+
+ // HasSignificantLines should guarantee cells.
+ // Because that code is a different class, just gracefully
+ // return false. This could be an assert.
+ if (cell_x_.size() < 3 || cell_y_.size() < 3)
+ return false;
+
+ cell_x_.sort();
+ cell_y_.sort();
+
+ // Remove duplicates that may have occurred due to split lines.
+ cell_x_.compact_sorted();
+ cell_y_.compact_sorted();
+
+ // The border should be the extents of line boxes, not middle.
+ cell_x_[0] = bounding_box_.left();
+ cell_x_[cell_x_.size() - 1] = bounding_box_.right();
+ cell_y_[0] = bounding_box_.bottom();
+ cell_y_[cell_y_.size() - 1] = bounding_box_.top();
+
+ // Remove duplicates that may have occurred due to moving the borders.
+ cell_x_.compact_sorted();
+ cell_y_.compact_sorted();
+
+ CalculateMargins();
+ CalculateStats();
+ is_lined_ = VerifyLinedTableCells();
+ return is_lined_;
+}
+
+// Finds the cellular structure given a particular box.
+bool StructuredTable::FindWhitespacedStructure() {
+ ClearStructure();
+ FindWhitespacedColumns();
+ FindWhitespacedRows();
+
+ if (!VerifyWhitespacedTable()) {
+ return false;
+ } else {
+ bounding_box_.set_left(cell_x_[0]);
+ bounding_box_.set_right(cell_x_[cell_x_.size() - 1]);
+ bounding_box_.set_bottom(cell_y_[0]);
+ bounding_box_.set_top(cell_y_[cell_y_.size() - 1]);
+ AbsorbNearbyLines();
+ CalculateMargins();
+ CalculateStats();
+ return true;
+ }
+}
+
+// Tests if a partition fits inside the table structure.
+// Partitions must fully span a grid line in order to intersect it.
+// This means that a partition does not intersect a line
+// that it "just" touches. This is mainly because the assumption
+// throughout the code is that "0" distance is a very very small space.
+bool StructuredTable::DoesPartitionFit(const ColPartition& part) const {
+ const TBOX& box = part.bounding_box();
+ for (int i = 0; i < cell_x_.size(); ++i)
+ if (box.left() < cell_x_[i] && cell_x_[i] < box.right())
+ return false;
+ for (int i = 0; i < cell_y_.size(); ++i)
+ if (box.bottom() < cell_y_[i] && cell_y_[i] < box.top())
+ return false;
+ return true;
+}
+
+// Checks if a sub-table has multiple data cells filled.
+int StructuredTable::CountFilledCells() {
+ return CountFilledCells(0, row_count() - 1, 0, column_count() - 1);
+}
+int StructuredTable::CountFilledCellsInRow(int row) {
+ return CountFilledCells(row, row, 0, column_count() - 1);
+}
+int StructuredTable::CountFilledCellsInColumn(int column) {
+ return CountFilledCells(0, row_count() - 1, column, column);
+}
+int StructuredTable::CountFilledCells(int row_start, int row_end,
+ int column_start, int column_end) {
+ ASSERT_HOST(0 <= row_start && row_start <= row_end && row_end < row_count());
+ ASSERT_HOST(0 <= column_start && column_start <= column_end &&
+ column_end < column_count());
+ int cell_count = 0;
+ TBOX cell_box;
+ for (int row = row_start; row <= row_end; ++row) {
+ cell_box.set_bottom(cell_y_[row]);
+ cell_box.set_top(cell_y_[row + 1]);
+ for (int col = column_start; col <= column_end; ++col) {
+ cell_box.set_left(cell_x_[col]);
+ cell_box.set_right(cell_x_[col + 1]);
+ if (CountPartitions(cell_box) > 0)
+ ++cell_count;
+ }
+ }
+ return cell_count;
+}
+
+// Makes sure that at least one cell in a row has substantial area filled.
+// This can filter out large whitespace caused by growing tables too far
+// and page numbers.
+bool StructuredTable::VerifyRowFilled(int row) {
+ for (int i = 0; i < column_count(); ++i) {
+ double area_filled = CalculateCellFilledPercentage(row, i);
+ if (area_filled >= kMinFilledArea)
+ return true;
+ }
+ return false;
+}
+
+// Finds the filled area in a cell.
+// Assume ColPartitions do not overlap for simplicity (even though they do).
+double StructuredTable::CalculateCellFilledPercentage(int row, int column) {
+ ASSERT_HOST(0 <= row && row <= row_count());
+ ASSERT_HOST(0 <= column && column <= column_count());
+ const TBOX kCellBox(cell_x_[column], cell_y_[row],
+ cell_x_[column + 1], cell_y_[row + 1]);
+ ASSERT_HOST(!kCellBox.null_box());
+
+ ColPartitionGridSearch gsearch(text_grid_);
+ gsearch.SetUniqueMode(true);
+ gsearch.StartRectSearch(kCellBox);
+ double area_covered = 0;
+ ColPartition* text = nullptr;
+ while ((text = gsearch.NextRectSearch()) != nullptr) {
+ if (text->IsTextType())
+ area_covered += text->bounding_box().intersection(kCellBox).area();
+ }
+ const int32_t current_area = kCellBox.area();
+ if (current_area == 0) {
+ return 1.0;
+ }
+ return std::min(1.0, area_covered / current_area);
+}
+
+#ifndef GRAPHICS_DISABLED
+
+void StructuredTable::Display(ScrollView* window, ScrollView::Color color) {
+ window->Brush(ScrollView::NONE);
+ window->Pen(color);
+ window->Rectangle(bounding_box_.left(), bounding_box_.bottom(),
+ bounding_box_.right(), bounding_box_.top());
+ for (int i = 0; i < cell_x_.size(); i++) {
+ window->Line(cell_x_[i], bounding_box_.bottom(),
+ cell_x_[i], bounding_box_.top());
+ }
+ for (int i = 0; i < cell_y_.size(); i++) {
+ window->Line(bounding_box_.left(), cell_y_[i],
+ bounding_box_.right(), cell_y_[i]);
+ }
+ window->UpdateWindow();
+}
+
+#endif
+
+// Clear structure information.
+void StructuredTable::ClearStructure() {
+ cell_x_.clear();
+ cell_y_.clear();
+ is_lined_ = false;
+ space_above_ = 0;
+ space_below_ = 0;
+ space_left_ = 0;
+ space_right_ = 0;
+ median_cell_height_ = 0;
+ median_cell_width_ = 0;
+}
+
+// When a table has lines, the lines should not intersect any partitions.
+// The following function makes sure the previous assumption is met.
+bool StructuredTable::VerifyLinedTableCells() {
+ // Function only called when lines exist.
+ ASSERT_HOST(cell_y_.size() >= 2 && cell_x_.size() >= 2);
+ for (int i = 0; i < cell_y_.size(); ++i) {
+ if (CountHorizontalIntersections(cell_y_[i]) > 0)
+ return false;
+ }
+ for (int i = 0; i < cell_x_.size(); ++i) {
+ if (CountVerticalIntersections(cell_x_[i]) > 0)
+ return false;
+ }
+ return true;
+}
+
+// TODO(nbeato): Could be much better than this.
+// Examples:
+// - Caclulate the percentage of filled cells.
+// - Calculate the average number of ColPartitions per cell.
+// - Calculate the number of cells per row with partitions.
+// - Check if ColPartitions in adjacent cells are similar.
+// - Check that all columns are at least a certain width.
+// - etc.
+bool StructuredTable::VerifyWhitespacedTable() {
+ // criteria for a table, must be at least 2x3 or 3x2
+ return row_count() >= 2 && column_count() >= 2 && cell_count() >= 6;
+}
+
+// Finds vertical splits in the ColPartitions of text_grid_ by considering
+// all possible "good" guesses. A good guess is just the left/right sides of
+// the partitions, since these locations will uniquely define where the
+// extremal values where the splits can occur. The split happens
+// in the middle of the two nearest partitions.
+void StructuredTable::FindWhitespacedColumns() {
+ // Set of the extents of all partitions on the page.
+ GenericVector<int> left_sides;
+ GenericVector<int> right_sides;
+
+ // Look at each text partition. We want to find the partitions
+ // that have extremal left/right sides. These will give us a basis
+ // for the table columns.
+ ColPartitionGridSearch gsearch(text_grid_);
+ gsearch.SetUniqueMode(true);
+ gsearch.StartRectSearch(bounding_box_);
+ ColPartition* text = nullptr;
+ while ((text = gsearch.NextRectSearch()) != nullptr) {
+ if (!text->IsTextType())
+ continue;
+
+ ASSERT_HOST(text->bounding_box().left() < text->bounding_box().right());
+ int spacing = static_cast<int>(text->median_width() *
+ kHorizontalSpacing / 2.0 + 0.5);
+ left_sides.push_back(text->bounding_box().left() - spacing);
+ right_sides.push_back(text->bounding_box().right() + spacing);
+ }
+ // It causes disaster below, so avoid it!
+ if (left_sides.size() == 0 || right_sides.size() == 0)
+ return;
+
+ // Since data may be inserted in grid order, we sort the left/right sides.
+ left_sides.sort();
+ right_sides.sort();
+
+ // At this point, in the "merged list", we expect to have a left side,
+ // followed by either more left sides or a right side. The last number
+ // should be a right side. We find places where the splits occur by looking
+ // for "valleys". If we want to force gap sizes or allow overlap, change
+ // the spacing above. If you want to let lines "slice" partitions as long
+ // as it is infrequent, change the following function.
+ FindCellSplitLocations(left_sides, right_sides, kCellSplitColumnThreshold,
+ &cell_x_);
+}
+
+// Finds horizontal splits in the ColPartitions of text_grid_ by considering
+// all possible "good" guesses. A good guess is just the bottom/top sides of
+// the partitions, since these locations will uniquely define where the
+// extremal values where the splits can occur. The split happens
+// in the middle of the two nearest partitions.
+void StructuredTable::FindWhitespacedRows() {
+ // Set of the extents of all partitions on the page.
+ GenericVector<int> bottom_sides;
+ GenericVector<int> top_sides;
+ // We will be "shrinking" partitions, so keep the min/max around to
+ // make sure the bottom/top lines do not intersect text.
+ int min_bottom = INT32_MAX;
+ int max_top = INT32_MIN;
+
+ // Look at each text partition. We want to find the partitions
+ // that have extremal bottom/top sides. These will give us a basis
+ // for the table rows. Because the textlines can be skewed and close due
+ // to warping, the height of the partitions is toned down a little bit.
+ ColPartitionGridSearch gsearch(text_grid_);
+ gsearch.SetUniqueMode(true);
+ gsearch.StartRectSearch(bounding_box_);
+ ColPartition* text = nullptr;
+ while ((text = gsearch.NextRectSearch()) != nullptr) {
+ if (!text->IsTextType())
+ continue;
+
+ ASSERT_HOST(text->bounding_box().bottom() < text->bounding_box().top());
+ min_bottom = std::min(min_bottom, static_cast<int>(text->bounding_box().bottom()));
+ max_top = std::max(max_top, static_cast<int>(text->bounding_box().top()));
+
+ // Ignore "tall" text partitions, as these are usually false positive
+ // vertical text or multiple lines pulled together.
+ if (text->bounding_box().height() > max_text_height_)
+ continue;
+
+ int spacing = static_cast<int>(text->bounding_box().height() *
+ kVerticalSpacing / 2.0 + 0.5);
+ int bottom = text->bounding_box().bottom() - spacing;
+ int top = text->bounding_box().top() + spacing;
+ // For horizontal text, the factor can be negative. This should
+ // probably cause a warning or failure. I haven't actually checked if
+ // it happens.
+ if (bottom >= top)
+ continue;
+
+ bottom_sides.push_back(bottom);
+ top_sides.push_back(top);
+ }
+ // It causes disaster below, so avoid it!
+ if (bottom_sides.size() == 0 || top_sides.size() == 0)
+ return;
+
+ // Since data may be inserted in grid order, we sort the bottom/top sides.
+ bottom_sides.sort();
+ top_sides.sort();
+
+ // At this point, in the "merged list", we expect to have a bottom side,
+ // followed by either more bottom sides or a top side. The last number
+ // should be a top side. We find places where the splits occur by looking
+ // for "valleys". If we want to force gap sizes or allow overlap, change
+ // the spacing above. If you want to let lines "slice" partitions as long
+ // as it is infrequent, change the following function.
+ FindCellSplitLocations(bottom_sides, top_sides, kCellSplitRowThreshold,
+ &cell_y_);
+
+ // Recover the min/max correctly since it was shifted.
+ cell_y_[0] = min_bottom;
+ cell_y_[cell_y_.size() - 1] = max_top;
+}
+
+void StructuredTable::CalculateMargins() {
+ space_above_ = INT32_MAX;
+ space_below_ = INT32_MAX;
+ space_right_ = INT32_MAX;
+ space_left_ = INT32_MAX;
+ UpdateMargins(text_grid_);
+ UpdateMargins(line_grid_);
+}
+// Finds the nearest partition in grid to the table
+// boundaries and updates the margin.
+void StructuredTable::UpdateMargins(ColPartitionGrid* grid) {
+ int below = FindVerticalMargin(grid, bounding_box_.bottom(), true);
+ space_below_ = std::min(space_below_, below);
+ int above = FindVerticalMargin(grid, bounding_box_.top(), false);
+ space_above_ = std::min(space_above_, above);
+ int left = FindHorizontalMargin(grid, bounding_box_.left(), true);
+ space_left_ = std::min(space_left_, left);
+ int right = FindHorizontalMargin(grid, bounding_box_.right(), false);
+ space_right_ = std::min(space_right_, right);
+}
+int StructuredTable::FindVerticalMargin(ColPartitionGrid* grid, int border,
+ bool decrease) const {
+ ColPartitionGridSearch gsearch(grid);
+ gsearch.SetUniqueMode(true);
+ gsearch.StartVerticalSearch(bounding_box_.left(), bounding_box_.right(),
+ border);
+ ColPartition* part = nullptr;
+ while ((part = gsearch.NextVerticalSearch(decrease)) != nullptr) {
+ if (!part->IsTextType() && !part->IsHorizontalLine())
+ continue;
+ int distance = decrease ? border - part->bounding_box().top()
+ : part->bounding_box().bottom() - border;
+ if (distance >= 0)
+ return distance;
+ }
+ return INT32_MAX;
+}
+int StructuredTable::FindHorizontalMargin(ColPartitionGrid* grid, int border,
+ bool decrease) const {
+ ColPartitionGridSearch gsearch(grid);
+ gsearch.SetUniqueMode(true);
+ gsearch.StartSideSearch(border, bounding_box_.bottom(), bounding_box_.top());
+ ColPartition* part = nullptr;
+ while ((part = gsearch.NextSideSearch(decrease)) != nullptr) {
+ if (!part->IsTextType() && !part->IsVerticalLine())
+ continue;
+ int distance = decrease ? border - part->bounding_box().right()
+ : part->bounding_box().left() - border;
+ if (distance >= 0)
+ return distance;
+ }
+ return INT32_MAX;
+}
+
+void StructuredTable::CalculateStats() {
+ const int kMaxCellHeight = 1000;
+ const int kMaxCellWidth = 1000;
+ STATS height_stats(0, kMaxCellHeight + 1);
+ STATS width_stats(0, kMaxCellWidth + 1);
+
+ for (int i = 0; i < row_count(); ++i)
+ height_stats.add(row_height(i), column_count());
+ for (int i = 0; i < column_count(); ++i)
+ width_stats.add(column_width(i), row_count());
+
+ median_cell_height_ = static_cast<int>(height_stats.median() + 0.5);
+ median_cell_width_ = static_cast<int>(width_stats.median() + 0.5);
+}
+
+// Looks for grid lines near the current bounding box and
+// grows the bounding box to include them if no intersections
+// will occur as a result. This is necessary because the margins
+// are calculated relative to the closest line/text. If the
+// line isn't absorbed, the margin will be the distance to the line.
+void StructuredTable::AbsorbNearbyLines() {
+ ColPartitionGridSearch gsearch(line_grid_);
+ gsearch.SetUniqueMode(true);
+
+ // Is the closest line above good? Loop multiple times for tables with
+ // multi-line (sometimes 2) borders. Limit the number of lines by
+ // making sure they stay within a table cell or so.
+ ColPartition* line = nullptr;
+ gsearch.StartVerticalSearch(bounding_box_.left(), bounding_box_.right(),
+ bounding_box_.top());
+ while ((line = gsearch.NextVerticalSearch(false)) != nullptr) {
+ if (!line->IsHorizontalLine())
+ break;
+ TBOX text_search(bounding_box_.left(), bounding_box_.top() + 1,
+ bounding_box_.right(), line->MidY());
+ if (text_search.height() > median_cell_height_ * 2)
+ break;
+ if (CountPartitions(text_search) > 0)
+ break;
+ bounding_box_.set_top(line->MidY());
+ }
+ // As above, is the closest line below good?
+ line = nullptr;
+ gsearch.StartVerticalSearch(bounding_box_.left(), bounding_box_.right(),
+ bounding_box_.bottom());
+ while ((line = gsearch.NextVerticalSearch(true)) != nullptr) {
+ if (!line->IsHorizontalLine())
+ break;
+ TBOX text_search(bounding_box_.left(), line->MidY(),
+ bounding_box_.right(), bounding_box_.bottom() - 1);
+ if (text_search.height() > median_cell_height_ * 2)
+ break;
+ if (CountPartitions(text_search) > 0)
+ break;
+ bounding_box_.set_bottom(line->MidY());
+ }
+ // TODO(nbeato): vertical lines
+}
+
+
+// This function will find all "0 valleys" (of any length) given two
+// arrays. The arrays are the mins and maxes of partitions (either
+// left and right or bottom and top). Since the min/max lists are generated
+// with pairs of increasing integers, we can make some assumptions in
+// the function about ordering of the overall list, which are shown in the
+// asserts.
+// The algorithm works as follows:
+// While there are numbers to process, take the smallest number.
+// If it is from the min_list, increment the "hill" counter.
+// Otherwise, decrement the "hill" counter.
+// In the process of doing this, keep track of "crossing" the
+// desired height.
+// The first/last items are extremal values of the list and known.
+// NOTE: This function assumes the lists are sorted!
+void StructuredTable::FindCellSplitLocations(const GenericVector<int>& min_list,
+ const GenericVector<int>& max_list,
+ int max_merged,
+ GenericVector<int>* locations) {
+ locations->clear();
+ ASSERT_HOST(min_list.size() == max_list.size());
+ if (min_list.size() == 0)
+ return;
+ ASSERT_HOST(min_list.get(0) < max_list.get(0));
+ ASSERT_HOST(min_list.get(min_list.size() - 1) <
+ max_list.get(max_list.size() - 1));
+
+ locations->push_back(min_list.get(0));
+ int min_index = 0;
+ int max_index = 0;
+ int stacked_partitions = 0;
+ int last_cross_position = INT32_MAX;
+ // max_index will expire after min_index.
+ // However, we can't "increase" the hill size if min_index expired.
+ // So finish processing when min_index expires.
+ while (min_index < min_list.size()) {
+ // Increase the hill count.
+ if (min_list[min_index] < max_list[max_index]) {
+ ++stacked_partitions;
+ if (last_cross_position != INT32_MAX &&
+ stacked_partitions > max_merged) {
+ int mid = (last_cross_position + min_list[min_index]) / 2;
+ locations->push_back(mid);
+ last_cross_position = INT32_MAX;
+ }
+ ++min_index;
+ } else {
+ // Decrease the hill count.
+ --stacked_partitions;
+ if (last_cross_position == INT32_MAX &&
+ stacked_partitions <= max_merged) {
+ last_cross_position = max_list[max_index];
+ }
+ ++max_index;
+ }
+ }
+ locations->push_back(max_list.get(max_list.size() - 1));
+}
+
+// Counts the number of partitions in the table
+// box that intersection the given x value.
+int StructuredTable::CountVerticalIntersections(int x) {
+ int count = 0;
+ // Make a small box to keep the search time down.
+ const int kGridSize = text_grid_->gridsize();
+ TBOX vertical_box = bounding_box_;
+ vertical_box.set_left(x - kGridSize);
+ vertical_box.set_right(x + kGridSize);
+
+ ColPartitionGridSearch gsearch(text_grid_);
+ gsearch.SetUniqueMode(true);
+ gsearch.StartRectSearch(vertical_box);
+ ColPartition* text = nullptr;
+ while ((text = gsearch.NextRectSearch()) != nullptr) {
+ if (!text->IsTextType())
+ continue;
+ const TBOX& box = text->bounding_box();
+ if (box.left() < x && x < box.right())
+ ++count;
+ }
+ return count;
+}
+
+// Counts the number of partitions in the table
+// box that intersection the given y value.
+int StructuredTable::CountHorizontalIntersections(int y) {
+ int count = 0;
+ // Make a small box to keep the search time down.
+ const int kGridSize = text_grid_->gridsize();
+ TBOX horizontal_box = bounding_box_;
+ horizontal_box.set_bottom(y - kGridSize);
+ horizontal_box.set_top(y + kGridSize);
+
+ ColPartitionGridSearch gsearch(text_grid_);
+ gsearch.SetUniqueMode(true);
+ gsearch.StartRectSearch(horizontal_box);
+ ColPartition* text = nullptr;
+ while ((text = gsearch.NextRectSearch()) != nullptr) {
+ if (!text->IsTextType())
+ continue;
+
+ const TBOX& box = text->bounding_box();
+ if (box.bottom() < y && y < box.top())
+ ++count;
+ }
+ return count;
+}
+
+// Counts how many text partitions are in this box.
+// This is used to count partitons in cells, as that can indicate
+// how "strong" a potential table row/column (or even full table) actually is.
+int StructuredTable::CountPartitions(const TBOX& box) {
+ ColPartitionGridSearch gsearch(text_grid_);
+ gsearch.SetUniqueMode(true);
+ gsearch.StartRectSearch(box);
+ int count = 0;
+ ColPartition* text = nullptr;
+ while ((text = gsearch.NextRectSearch()) != nullptr) {
+ if (text->IsTextType())
+ ++count;
+ }
+ return count;
+}
+
+////////
+//////// TableRecognizer Class
+////////
+
+TableRecognizer::TableRecognizer()
+ : text_grid_(nullptr),
+ line_grid_(nullptr),
+ min_height_(0),
+ min_width_(0),
+ max_text_height_(INT32_MAX) {
+}
+
+TableRecognizer::~TableRecognizer() {
+}
+
+void TableRecognizer::Init() {
+}
+
+void TableRecognizer::set_text_grid(ColPartitionGrid* text_grid) {
+ text_grid_ = text_grid;
+}
+void TableRecognizer::set_line_grid(ColPartitionGrid* line_grid) {
+ line_grid_ = line_grid;
+}
+void TableRecognizer::set_min_height(int height) {
+ min_height_ = height;
+}
+void TableRecognizer::set_min_width(int width) {
+ min_width_ = width;
+}
+void TableRecognizer::set_max_text_height(int height) {
+ max_text_height_ = height;
+}
+
+StructuredTable* TableRecognizer::RecognizeTable(const TBOX& guess) {
+ auto* table = new StructuredTable();
+ table->Init();
+ table->set_text_grid(text_grid_);
+ table->set_line_grid(line_grid_);
+ table->set_max_text_height(max_text_height_);
+
+ // Try to solve this simple case, a table with *both*
+ // vertical and horizontal lines.
+ if (RecognizeLinedTable(guess, table))
+ return table;
+
+ // Fallback to whitespace if that failed.
+ // TODO(nbeato): Break this apart to take advantage of horizontal
+ // lines or vertical lines when present.
+ if (RecognizeWhitespacedTable(guess, table))
+ return table;
+
+ // No table found...
+ delete table;
+ return nullptr;
+}
+
+bool TableRecognizer::RecognizeLinedTable(const TBOX& guess_box,
+ StructuredTable* table) {
+ if (!HasSignificantLines(guess_box))
+ return false;
+ TBOX line_bound = guess_box;
+ if (!FindLinesBoundingBox(&line_bound))
+ return false;
+ table->set_bounding_box(line_bound);
+ return table->FindLinedStructure();
+}
+
+// Quick implementation. Just count the number of lines in the box.
+// A better implementation would counter intersections and look for connected
+// components. It could even go as far as finding similar length lines.
+// To account for these possible issues, the VerifyLinedTableCells function
+// will reject lined tables that cause intersections with text on the page.
+// TODO(nbeato): look for "better" lines
+bool TableRecognizer::HasSignificantLines(const TBOX& guess) {
+ ColPartitionGridSearch box_search(line_grid_);
+ box_search.SetUniqueMode(true);
+ box_search.StartRectSearch(guess);
+ ColPartition* line = nullptr;
+ int vertical_count = 0;
+ int horizontal_count = 0;
+
+ while ((line = box_search.NextRectSearch()) != nullptr) {
+ if (line->IsHorizontalLine())
+ ++horizontal_count;
+ if (line->IsVerticalLine())
+ ++vertical_count;
+ }
+
+ return vertical_count >= kLinedTableMinVerticalLines &&
+ horizontal_count >= kLinedTableMinHorizontalLines;
+}
+
+// Given a bounding box with a bunch of horizontal / vertical lines,
+// we just find the extents of all of these lines iteratively.
+// The box will be at least as large as guess. This
+// could possibly be a bad assumption.
+// It is guaranteed to halt in at least O(n * gridarea) where n
+// is the number of lines.
+// The assumption is that growing the box iteratively will add lines
+// several times, but eventually we'll find the extents.
+//
+// For tables, the approach is a bit aggressive, a single line (which could be
+// noise or a column ruling) can destroy the table inside.
+//
+// TODO(nbeato): This is a quick first implementation.
+// A better implementation would actually look for consistency
+// in extents of the lines and find the extents using lines
+// that clearly describe the table. This would allow the
+// lines to "vote" for height/width. An approach like
+// this would solve issues with page layout rulings.
+// I haven't looked for these issues yet, so I can't even
+// say they happen confidently.
+bool TableRecognizer::FindLinesBoundingBox(TBOX* bounding_box) {
+ // The first iteration will tell us if there are lines
+ // present and shrink the box to a minimal iterative size.
+ if (!FindLinesBoundingBoxIteration(bounding_box))
+ return false;
+
+ // Keep growing until the area of the table stabilizes.
+ // The box can only get bigger, increasing area.
+ bool changed = true;
+ while (changed) {
+ changed = false;
+ int old_area = bounding_box->area();
+ bool check = FindLinesBoundingBoxIteration(bounding_box);
+ // At this point, the function will return true.
+ ASSERT_HOST(check);
+ ASSERT_HOST(bounding_box->area() >= old_area);
+ changed = (bounding_box->area() > old_area);
+ }
+
+ return true;
+}
+
+bool TableRecognizer::FindLinesBoundingBoxIteration(TBOX* bounding_box) {
+ // Search for all of the lines in the current box, keeping track of extents.
+ ColPartitionGridSearch box_search(line_grid_);
+ box_search.SetUniqueMode(true);
+ box_search.StartRectSearch(*bounding_box);
+ ColPartition* line = nullptr;
+ bool first_line = true;
+
+ while ((line = box_search.NextRectSearch()) != nullptr) {
+ if (line->IsLineType()) {
+ if (first_line) {
+ // The first iteration can shrink the box.
+ *bounding_box = line->bounding_box();
+ first_line = false;
+ } else {
+ *bounding_box += line->bounding_box();
+ }
+ }
+ }
+ return !first_line;
+}
+
+// The goal of this function is to move the table boundaries around and find
+// a table that maximizes the whitespace around the table while maximizing
+// the cellular structure. As a result, it gets confused by headers, footers,
+// and merged columns (text that crosses columns). There is a tolerance
+// that allows a few partitions to count towards potential cell merges.
+// It's the max_merged parameter to FindPartitionLocations.
+// It can work, but it needs some false positive remove on boundaries.
+// For now, the grid structure must not intersect any partitions.
+// Also, small tolerance is added to the horizontal lines for tightly packed
+// tables. The tolerance is added by adjusting the bounding boxes of the
+// partitions (in FindHorizontalPartitions). The current implementation
+// only adjusts the vertical extents of the table.
+//
+// Also note. This was hacked at a lot. It could probably use some
+// more hacking at to find a good set of border conditions and then a
+// nice clean up.
+bool TableRecognizer::RecognizeWhitespacedTable(const TBOX& guess_box,
+ StructuredTable* table) {
+ TBOX best_box = guess_box; // Best borders known.
+ int best_below = 0; // Margin size above best table.
+ int best_above = 0; // Margin size below best table.
+ TBOX adjusted = guess_box; // The search box.
+
+ // We assume that the guess box is somewhat accurate, so we don't allow
+ // the adjusted border to pass half of the guessed area. This prevents
+ // "negative" tables from forming.
+ const int kMidGuessY = (guess_box.bottom() + guess_box.top()) / 2;
+ // Keeps track of the most columns in an accepted table. The resulting table
+ // may be less than the max, but we don't want to stray too far.
+ int best_cols = 0;
+ // Make sure we find a good border.
+ bool found_good_border = false;
+
+ // Find the bottom of the table by trying a few different locations. For
+ // each location, the top, left, and right are fixed. We start the search
+ // in a smaller table to favor best_cols getting a good estimate sooner.
+ int last_bottom = INT32_MAX;
+ int bottom = NextHorizontalSplit(guess_box.left(), guess_box.right(),
+ kMidGuessY - min_height_ / 2, true);
+ int top = NextHorizontalSplit(guess_box.left(), guess_box.right(),
+ kMidGuessY + min_height_ / 2, false);
+ adjusted.set_top(top);
+
+ // Headers/footers can be spaced far from everything.
+ // Make sure that the space below is greater than the space above
+ // the lowest row.
+ int previous_below = 0;
+ const int kMaxChances = 10;
+ int chances = kMaxChances;
+ while (bottom != last_bottom) {
+ adjusted.set_bottom(bottom);
+
+ if (adjusted.height() >= min_height_) {
+ // Try to fit the grid on the current box. We give it a chance
+ // if the number of columns didn't significantly drop.
+ table->set_bounding_box(adjusted);
+ if (table->FindWhitespacedStructure() &&
+ table->column_count() >= best_cols * kRequiredColumns) {
+ if (false && IsWeakTableRow(table, 0)) {
+ // Currently buggy, but was looking promising so disabled.
+ --chances;
+ } else {
+ // We favor 2 things,
+ // 1- Adding rows that have partitioned data.
+ // 2- Better margins (to find header/footer).
+ // For better tables, we just look for multiple cells in the
+ // bottom row with data in them.
+ // For margins, the space below the last row should
+ // be better than a table with the last row removed.
+ chances = kMaxChances;
+ double max_row_height = kMaxRowSize * table->median_cell_height();
+ if ((table->space_below() * kMarginFactor >= best_below &&
+ table->space_below() >= previous_below) ||
+ (table->CountFilledCellsInRow(0) > 1 &&
+ table->row_height(0) < max_row_height)) {
+ best_box.set_bottom(bottom);
+ best_below = table->space_below();
+ best_cols = std::max(table->column_count(), best_cols);
+ found_good_border = true;
+ }
+ }
+ previous_below = table->space_below();
+ } else {
+ --chances;
+ }
+ }
+ if (chances <= 0)
+ break;
+
+ last_bottom = bottom;
+ bottom = NextHorizontalSplit(guess_box.left(), guess_box.right(),
+ last_bottom, true);
+ }
+ if (!found_good_border)
+ return false;
+
+ // TODO(nbeato) comments: follow modified code above... put it in a function!
+ found_good_border = false;
+ int last_top = INT32_MIN;
+ top = NextHorizontalSplit(guess_box.left(), guess_box.right(),
+ kMidGuessY + min_height_ / 2, false);
+ int previous_above = 0;
+ chances = kMaxChances;
+
+ adjusted.set_bottom(best_box.bottom());
+ while (last_top != top) {
+ adjusted.set_top(top);
+ if (adjusted.height() >= min_height_) {
+ table->set_bounding_box(adjusted);
+ if (table->FindWhitespacedStructure() &&
+ table->column_count() >= best_cols * kRequiredColumns) {
+ int last_row = table->row_count() - 1;
+ if (false && IsWeakTableRow(table, last_row)) {
+ // Currently buggy, but was looking promising so disabled.
+ --chances;
+ } else {
+ chances = kMaxChances;
+ double max_row_height = kMaxRowSize * table->median_cell_height();
+ if ((table->space_above() * kMarginFactor >= best_above &&
+ table->space_above() >= previous_above) ||
+ (table->CountFilledCellsInRow(last_row) > 1 &&
+ table->row_height(last_row) < max_row_height)) {
+ best_box.set_top(top);
+ best_above = table->space_above();
+ best_cols = std::max(table->column_count(), best_cols);
+ found_good_border = true;
+ }
+ }
+ previous_above = table->space_above();
+ } else {
+ --chances;
+ }
+ }
+ if (chances <= 0)
+ break;
+
+ last_top = top;
+ top = NextHorizontalSplit(guess_box.left(), guess_box.right(),
+ last_top, false);
+ }
+
+ if (!found_good_border)
+ return false;
+
+ // If we get here, this shouldn't happen. It can be an assert, but
+ // I haven't tested it enough to make it crash things.
+ if (best_box.null_box())
+ return false;
+
+ // Given the best locations, fit the box to those locations.
+ table->set_bounding_box(best_box);
+ return table->FindWhitespacedStructure();
+}
+
+// Finds the closest value to y that can safely cause a horizontal
+// split in the partitions.
+// This function has been buggy and not as reliable as I would've
+// liked. I suggest finding all of the splits using the
+// FindPartitionLocations once and then just keeping the results
+// of that function cached somewhere.
+int TableRecognizer::NextHorizontalSplit(int left, int right, int y,
+ bool top_to_bottom) {
+ ColPartitionGridSearch gsearch(text_grid_);
+ gsearch.SetUniqueMode(true);
+ gsearch.StartVerticalSearch(left, right, y);
+ ColPartition* text = nullptr;
+ int last_y = y;
+ while ((text = gsearch.NextVerticalSearch(top_to_bottom)) != nullptr) {
+ if (!text->IsTextType() || !text->IsHorizontalType())
+ continue;
+ if (text->bounding_box().height() > max_text_height_)
+ continue;
+
+ const TBOX& text_box = text->bounding_box();
+ if (top_to_bottom && (last_y >= y || last_y <= text_box.top())) {
+ last_y = std::min(last_y, static_cast<int>(text_box.bottom()));
+ continue;
+ }
+ if (!top_to_bottom && (last_y <= y || last_y >= text_box.bottom())) {
+ last_y = std::max(last_y, static_cast<int>(text_box.top()));
+ continue;
+ }
+
+ return last_y;
+ }
+ // If none is found, we at least want to preserve the min/max,
+ // which defines the overlap of y with the last partition in the grid.
+ return last_y;
+}
+
+// Code is buggy right now. It is disabled in the calling function.
+// It seems like sometimes the row that is passed in is not correct
+// sometimes (like a phantom row is introduced). There's something going
+// on in the cell_y_ data member before this is called... not certain.
+bool TableRecognizer::IsWeakTableRow(StructuredTable* table, int row) {
+ if (!table->VerifyRowFilled(row))
+ return false;
+
+ double threshold = 0.0;
+ if (table->column_count() > kGoodRowNumberOfColumnsSmallSize)
+ threshold = table->column_count() * kGoodRowNumberOfColumnsLarge;
+ else
+ threshold = kGoodRowNumberOfColumnsSmall[table->column_count()];
+
+ return table->CountFilledCellsInRow(row) < threshold;
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/textord/tablerecog.h b/tesseract/src/textord/tablerecog.h
new file mode 100644
index 00000000..eb8f0543
--- /dev/null
+++ b/tesseract/src/textord/tablerecog.h
@@ -0,0 +1,378 @@
+///////////////////////////////////////////////////////////////////////
+// File: tablerecog.h
+// Description: Functions to detect structure of tables.
+// Author: Nicholas Beato
+// Created: Aug 17, 2010
+//
+// (C) Copyright 2010, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TABLERECOG_H_
+#define TABLERECOG_H_
+
+#include "colpartitiongrid.h"
+#include "genericvector.h"
+
+namespace tesseract {
+
+// There are 2 classes in this file. They have 2 different purposes.
+// - StructuredTable contains the methods to find the structure given
+// a specific bounding box and grow that structure.
+// - TableRecognizer contains the methods to adjust the possible positions
+// of a table without worrying about structure.
+//
+// To use these classes, the assumption is that the TableFinder will
+// have a guess of the location of a table (or possibly over/undersegmented
+// tables). The TableRecognizer is responsible for finding the table boundaries
+// at a high level. The StructuredTable class is responsible for determining
+// the structure of the table and trying to maximize its bounds while retaining
+// the structure.
+// (The latter part is not implemented yet, but that was the goal).
+//
+// While on the boundary discussion, keep in mind that this is a first pass.
+// There should eventually be some things like internal structure checks,
+// and, more importantly, surrounding text flow checks.
+//
+
+// Usage:
+// The StructuredTable class contains methods to query a potential table.
+// It has functions to find structure, count rows, find ColPartitions that
+// intersect gridlines, etc. It is not meant to blindly find a table. It
+// is meant to start with a known table location and enhance it.
+// Usage:
+// ColPartitionGrid text_grid, line_grid; // init
+// TBOX table_box; // known location of table location
+//
+// StructuredTable table;
+// table.Init(); // construction code
+// table.set_text_grid(/* text */); // These 2 grids can be the same!
+// table.set_line_grid(/* lines */);
+// table.set_min_text_height(10); // Filter vertical and tall text.
+// // IMPORTANT! The table needs to be told where it is!
+// table.set_bounding_box(table_box); // Set initial table location.
+// if (table.FindWhitespacedStructure()) {
+// // process table
+// table.column_count(); // number of columns
+// table.row_count(); // number of rows
+// table.cells_count(); // number of cells
+// table.bounding_box(); // updated bounding box
+// // etc.
+// }
+//
+class TESS_API StructuredTable {
+ public:
+ StructuredTable();
+ ~StructuredTable() = default;
+
+ // Initialization code. Must be called after the constructor.
+ void Init();
+
+ // Sets the grids used by the table. These can be changed between
+ // calls to Recognize. They are treated as read-only data.
+ void set_text_grid(ColPartitionGrid* text);
+ void set_line_grid(ColPartitionGrid* lines);
+ // Filters text partitions that are ridiculously tall to prevent
+ // merging rows.
+ void set_max_text_height(int height);
+
+ // Basic accessors. Some are treated as attributes despite having indirect
+ // representation.
+ bool is_lined() const;
+ int row_count() const;
+ int column_count() const;
+ int cell_count() const;
+ void set_bounding_box(const TBOX& box);
+ const TBOX& bounding_box() const;
+ int median_cell_height();
+ int median_cell_width();
+ int row_height(int row) const;
+ int column_width(int column) const;
+ int space_above() const;
+ int space_below() const;
+
+ // Given enough horizontal and vertical lines in a region, create this table
+ // based on the structure given by the lines. Return true if it worked out.
+ // Code assumes the lines exist. It is the caller's responsibility to check
+ // for lines and find an appropriate bounding box.
+ bool FindLinedStructure();
+
+ // The main subroutine for finding generic table structure. The function
+ // finds the grid structure in the given box. Returns true if a good grid
+ // exists, implying that "this" table is valid.
+ bool FindWhitespacedStructure();
+
+ ////////
+ //////// Functions to query table info.
+ ////////
+
+ // Returns true if inserting part into the table does not cause any
+ // cell merges.
+ bool DoesPartitionFit(const ColPartition& part) const;
+ // Checks if a sub-table has multiple data cells filled.
+ int CountFilledCells();
+ int CountFilledCellsInRow(int row);
+ int CountFilledCellsInColumn(int column);
+ int CountFilledCells(int row_start, int row_end,
+ int column_start, int column_end);
+
+ // Makes sure that at least one cell in a row has substantial area filled.
+ // This can filter out large whitespace caused by growing tables too far
+ // and page numbers.
+ // (currently bugged for some reason).
+ bool VerifyRowFilled(int row);
+ // Finds the filled area in a cell.
+ double CalculateCellFilledPercentage(int row, int column);
+
+ // Debug display, draws the table in the given color. If the table is not
+ // valid, the table and "best" grid lines are still drawn in the given color.
+ void Display(ScrollView* window, ScrollView::Color color);
+
+ protected:
+ // Clear the structure information.
+ void ClearStructure();
+
+ ////////
+ //////// Lined tables
+ ////////
+
+ // Verifies the lines do not intersect partitions. This happens when
+ // the lines are in column boundaries and extend the full page. As a result,
+ // the grid lines go through column text. The condition is detectable.
+ bool VerifyLinedTableCells();
+
+ ////////
+ //////// Tables with whitespace
+ ////////
+
+ // This is the function to change if you want to filter resulting tables
+ // better. Right now it just checks for a minimum cell count and such.
+ // You could add things like maximum number of ColPartitions per cell or
+ // similar.
+ bool VerifyWhitespacedTable();
+ // Find the columns of a table using whitespace.
+ void FindWhitespacedColumns();
+ // Find the rows of a table using whitespace.
+ void FindWhitespacedRows();
+
+ ////////
+ //////// Functions to provide information about the table.
+ ////////
+
+ // Calculates the whitespace around the table using the table boundary and
+ // the supplied grids (set_text_grid and set_line_grid).
+ void CalculateMargins();
+ // Update the table margins with the supplied grid. This is
+ // only called by calculate margins to use multiple grid sources.
+ void UpdateMargins(ColPartitionGrid* grid);
+ int FindVerticalMargin(ColPartitionGrid* grid, int start_x,
+ bool decrease) const;
+ int FindHorizontalMargin(ColPartitionGrid* grid, int start_y,
+ bool decrease) const;
+ // Calculates stats on the table, namely the median cell height and width.
+ void CalculateStats();
+
+ ////////
+ //////// Functions to try to "fix" some table errors.
+ ////////
+
+ // Given a whitespaced table, this looks for bordering lines that might
+ // be page layout boxes around the table. It is necessary to get the margins
+ // correct on the table. If the lines are not joined, the margins will be
+ // the distance to the line, which is not right.
+ void AbsorbNearbyLines();
+
+ // Nice utility function for finding partition gaps. You feed it a sorted
+ // list of all of the mins/maxes of the partitions in the table, and it gives
+ // you the gaps (middle). This works for both vertical and horizontal
+ // gaps.
+ //
+ // If you want to allow slight overlap in the division and the partitions,
+ // just scale down the partitions before inserting them in the list.
+ // Likewise, you can force at least some space between partitions.
+ // This trick is how the horizontal partitions are done (since the page
+ // skew could make it hard to find splits in the text).
+ //
+ // As a result, "0 distance" between closest partitions causes a gap.
+ // This is not a programmatic assumption. It is intentional and simplifies
+ // things.
+ //
+ // "max_merged" indicates both the minimum number of stacked partitions
+ // to cause a cell (add 1 to it), and the maximum number of partitions that
+ // a grid line can intersect. For example, if max_merged is 0, then lines
+ // are inserted wherever space exists between partitions. If it is 2,
+ // lines may intersect 2 partitions at most, but you also need at least
+ // 2 partitions to generate a line.
+ static void FindCellSplitLocations(const GenericVector<int>& min_list,
+ const GenericVector<int>& max_list,
+ int max_merged,
+ GenericVector<int>* locations);
+
+ ////////
+ //////// Utility function for table queries
+ ////////
+
+ // Counts the number of ColPartitions that intersect vertical cell
+ // division at this x value. Used by VerifyLinedTable.
+ int CountVerticalIntersections(int x);
+ int CountHorizontalIntersections(int y);
+
+ // Counts how many text partitions are in this box.
+ int CountPartitions(const TBOX& box);
+
+ ////////
+ //////// Data members.
+ ////////
+
+ // Input data, used as read only data to make decisions.
+ ColPartitionGrid* text_grid_; // Text ColPartitions
+ ColPartitionGrid* line_grid_; // Line ColPartitions
+ // Table structure.
+ // bounding box is a convenient external representation.
+ // cell_x_ and cell_y_ indicate the grid lines.
+ TBOX bounding_box_; // Bounding box
+ GenericVector<int> cell_x_; // Locations of vertical divisions (sorted)
+ GenericVector<int> cell_y_; // Locations of horizontal divisions (sorted)
+ bool is_lined_; // Is the table backed up by a line structure
+ // Table margins, set via CalculateMargins
+ int space_above_;
+ int space_below_;
+ int space_left_;
+ int space_right_;
+ int median_cell_height_;
+ int median_cell_width_;
+ // Filters, used to prevent awkward partitions from destroying structure.
+ int max_text_height_;
+};
+
+class TESS_API TableRecognizer {
+ public:
+ TableRecognizer();
+ ~TableRecognizer();
+
+ // Initialization code. Must be called after the constructor.
+ void Init();
+
+ ////////
+ //////// Pre-recognize methods to initial table constraints.
+ ////////
+
+ // Sets the grids used by the table. These can be changed between
+ // calls to Recognize. They are treated as read-only data.
+ void set_text_grid(ColPartitionGrid* text);
+ void set_line_grid(ColPartitionGrid* lines);
+ // Sets some additional constraints on the table.
+ void set_min_height(int height);
+ void set_min_width(int width);
+ // Filters text partitions that are ridiculously tall to prevent
+ // merging rows. Note that "filters" refers to allowing horizontal
+ // cells to slice through them on the premise that they were
+ // merged text rows during previous layout.
+ void set_max_text_height(int height);
+
+ // Given a guess location, the RecognizeTable function will try to find a
+ // structured grid in the area. On success, it will return a new
+ // StructuredTable (and assumes you will delete it). Otherwise,
+ // nullptr is returned.
+ //
+ // Keep in mind, this may "overgrow" or "undergrow" the size of guess.
+ // Ideally, there is a either a one-to-one correspondence between
+ // the guess and table or no table at all. This is not the best of
+ // assumptions right now, but was made to try to keep things simple in
+ // the first pass.
+ //
+ // If a line structure is available on the page in the given region,
+ // the table will use the linear structure as it is.
+ // Otherwise, it will try to maximize the whitespace around it while keeping
+ // a grid structure. This is somewhat working.
+ //
+ // Since the combination of adjustments can get high, effort was
+ // originally made to keep the number of adjustments linear in the number
+ // of partitions. The underlying structure finding code used to be
+ // much more complex. I don't know how necessary this constraint is anymore.
+ // The evaluation of a possible table is kept within O(nlogn) in the size of
+ // the table (where size is the number of partitions in the table).
+ // As a result, the algorithm is capable of O(n^2 log n). Depending
+ // on the grid search size, it may be higher.
+ //
+ // Last note: it is possible to just try all partition boundaries at a high
+ // level O(n^4) and do a verification scheme (at least O(nlogn)). If there
+ // area 200 partitions on a page, this could be too costly. Effort could go
+ // into pruning the search, but I opted for something quicker. I'm confident
+ // that the independent adjustments can get similar results and keep the
+ // complextiy down. However, the other approach could work without using
+ // TableFinder at all if it is fast enough. It comes down to properly
+ // deciding what is a table. The code currently relies on TableFinder's
+ // guess to the location of a table for that.
+ StructuredTable* RecognizeTable(const TBOX& guess_box);
+
+ protected:
+ ////////
+ //////// Lined tables
+ ////////
+
+ // Returns true if the given box has a lined table within it. The
+ // table argument will be updated with the table if the table exists.
+ bool RecognizeLinedTable(const TBOX& guess_box, StructuredTable* table);
+ // Returns true if the given box has a large number of horizontal and
+ // vertical lines present. If so, we assume the extent of these lines
+ // uniquely defines a table and find that table via SolveLinedTable.
+ bool HasSignificantLines(const TBOX& guess);
+
+ // Given enough horizontal and vertical lines in a region, find a bounding
+ // box that encloses all of them (as well as newly introduced lines).
+ // The bounding box is the smallest box that encloses the lines in guess
+ // without having any lines sticking out of it.
+ // bounding_box is an in/out parameter.
+ // On input, it in the extents of the box to search.
+ // On output, it is the resulting bounding box.
+ bool FindLinesBoundingBox(TBOX* bounding_box);
+ // Iteration in above search.
+ // bounding_box is an in/out parameter.
+ // On input, it in the extents of the box to search.
+ // On output, it is the resulting bounding box.
+ bool FindLinesBoundingBoxIteration(TBOX* bounding_box);
+
+ ////////
+ //////// Generic "whitespaced" tables
+ ////////
+
+ // Returns true if the given box has a whitespaced table within it. The
+ // table argument will be updated if the table exists. Also note
+ // that this method will fail if the guess_box center is not
+ // mostly within the table.
+ bool RecognizeWhitespacedTable(const TBOX& guess_box, StructuredTable* table);
+
+ // Finds the location of a horizontal split relative to y.
+ // This function is mostly unused now. If the SolveWhitespacedTable
+ // changes much, it can be removed. Note, it isn't really as reliable
+ // as I thought. I went with alternatives for most of the other uses.
+ int NextHorizontalSplit(int left, int right, int y, bool top_to_bottom);
+
+ // Indicates that a table row is weak. This means that it has
+ // many missing data cells or very large cell heights compared.
+ // to the rest of the table.
+ static bool IsWeakTableRow(StructuredTable* table, int row);
+
+ // Input data, used as read only data to make decisions.
+ ColPartitionGrid* text_grid_; // Text ColPartitions
+ ColPartitionGrid* line_grid_; // Line ColPartitions
+ // Table constraints, a "good" table must satisfy these.
+ int min_height_;
+ int min_width_;
+ // Filters, used to prevent awkward partitions from destroying structure.
+ int max_text_height_; // Horizontal lines may intersect taller text.
+};
+
+} // namespace tesseract
+
+#endif /* TABLERECOG_H_ */
diff --git a/tesseract/src/textord/tabvector.cpp b/tesseract/src/textord/tabvector.cpp
new file mode 100644
index 00000000..95e75a38
--- /dev/null
+++ b/tesseract/src/textord/tabvector.cpp
@@ -0,0 +1,982 @@
+///////////////////////////////////////////////////////////////////////
+// File: tabvector.cpp
+// Description: Class to hold a near-vertical vector representing a tab-stop.
+// Author: Ray Smith
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "tabvector.h"
+#include "blobbox.h"
+#include "colfind.h"
+#include "colpartitionset.h"
+#include "detlinefit.h"
+#include "statistc.h"
+
+#include <algorithm>
+
+namespace tesseract {
+
+// Multiple of height used as a gutter for evaluation search.
+const int kGutterMultiple = 4;
+// Multiple of neighbour gap that we expect the gutter gap to be at minimum.
+const int kGutterToNeighbourRatio = 3;
+// Pixel distance for tab vectors to be considered the same.
+const int kSimilarVectorDist = 10;
+// Pixel distance for ragged tab vectors to be considered the same if there
+// is nothing in the overlap box
+const int kSimilarRaggedDist = 50;
+// Max multiple of height to allow filling in between blobs when evaluating.
+const int kMaxFillinMultiple = 11;
+// Min fraction of mean gutter size to allow a gutter on a good tab blob.
+const double kMinGutterFraction = 0.5;
+// Multiple of 1/n lines as a minimum gutter in evaluation.
+const double kLineCountReciprocal = 4.0;
+// Constant add-on for minimum gutter for aligned tabs.
+const double kMinAlignedGutter = 0.25;
+// Constant add-on for minimum gutter for ragged tabs.
+const double kMinRaggedGutter = 1.5;
+
+double_VAR(textord_tabvector_vertical_gap_fraction, 0.5,
+ "max fraction of mean blob width allowed for vertical gaps in vertical text");
+
+double_VAR(textord_tabvector_vertical_box_ratio, 0.5,
+ "Fraction of box matches required to declare a line vertical");
+
+ELISTIZE(TabConstraint)
+
+// Create a constraint for the top or bottom of this TabVector.
+void TabConstraint::CreateConstraint(TabVector* vector, bool is_top) {
+ auto* constraint = new TabConstraint(vector, is_top);
+ auto* constraints = new TabConstraint_LIST;
+ TabConstraint_IT it(constraints);
+ it.add_to_end(constraint);
+ if (is_top)
+ vector->set_top_constraints(constraints);
+ else
+ vector->set_bottom_constraints(constraints);
+}
+
+// Test to see if the constraints are compatible enough to merge.
+bool TabConstraint::CompatibleConstraints(TabConstraint_LIST* list1,
+ TabConstraint_LIST* list2) {
+ if (list1 == list2)
+ return false;
+ int y_min = -INT32_MAX;
+ int y_max = INT32_MAX;
+ if (textord_debug_tabfind > 3)
+ tprintf("Testing constraint compatibility\n");
+ GetConstraints(list1, &y_min, &y_max);
+ GetConstraints(list2, &y_min, &y_max);
+ if (textord_debug_tabfind > 3)
+ tprintf("Resulting range = [%d,%d]\n", y_min, y_max);
+ return y_max >= y_min;
+}
+
+// Merge the lists of constraints and update the TabVector pointers.
+// The second list is deleted.
+void TabConstraint::MergeConstraints(TabConstraint_LIST* list1,
+ TabConstraint_LIST* list2) {
+ if (list1 == list2)
+ return;
+ TabConstraint_IT it(list2);
+ if (textord_debug_tabfind > 3)
+ tprintf("Merging constraints\n");
+ // The vectors of all constraints on list2 are now going to be on list1.
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ TabConstraint* constraint = it.data();
+ if (textord_debug_tabfind> 3)
+ constraint->vector_->Print("Merge");
+ if (constraint->is_top_)
+ constraint->vector_->set_top_constraints(list1);
+ else
+ constraint->vector_->set_bottom_constraints(list1);
+ }
+ it = list1;
+ it.add_list_before(list2);
+ delete list2;
+}
+
+// Set all the tops and bottoms as appropriate to a mean of the
+// constrained range. Delete all the constraints and list.
+void TabConstraint::ApplyConstraints(TabConstraint_LIST* constraints) {
+ int y_min = -INT32_MAX;
+ int y_max = INT32_MAX;
+ GetConstraints(constraints, &y_min, &y_max);
+ int y = (y_min + y_max) / 2;
+ TabConstraint_IT it(constraints);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ TabConstraint* constraint = it.data();
+ TabVector* v = constraint->vector_;
+ if (constraint->is_top_) {
+ v->SetYEnd(y);
+ v->set_top_constraints(nullptr);
+ } else {
+ v->SetYStart(y);
+ v->set_bottom_constraints(nullptr);
+ }
+ }
+ delete constraints;
+}
+
+TabConstraint::TabConstraint(TabVector* vector, bool is_top)
+ : vector_(vector), is_top_(is_top) {
+ if (is_top) {
+ y_min_ = vector->endpt().y();
+ y_max_ = vector->extended_ymax();
+ } else {
+ y_max_ = vector->startpt().y();
+ y_min_ = vector->extended_ymin();
+ }
+}
+
+// Get the max of the mins and the min of the maxes.
+void TabConstraint::GetConstraints(TabConstraint_LIST* constraints,
+ int* y_min, int* y_max) {
+ TabConstraint_IT it(constraints);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ TabConstraint* constraint = it.data();
+ if (textord_debug_tabfind > 3) {
+ tprintf("Constraint is [%d,%d]", constraint->y_min_, constraint->y_max_);
+ constraint->vector_->Print(" for");
+ }
+ *y_min = std::max(*y_min, constraint->y_min_);
+ *y_max = std::min(*y_max, constraint->y_max_);
+ }
+}
+
+ELIST2IZE(TabVector)
+CLISTIZE(TabVector)
+
+// The constructor is private. See the bottom of the file...
+
+
+// Public factory to build a TabVector from a list of boxes.
+// The TabVector will be of the given alignment type.
+// The input vertical vector is used in fitting, and the output
+// vertical_x, vertical_y have the resulting line vector added to them
+// if the alignment is not ragged.
+// The extended_start_y and extended_end_y are the maximum possible
+// extension to the line segment that can be used to align with others.
+// The input CLIST of BLOBNBOX good_points is consumed and taken over.
+TabVector* TabVector::FitVector(TabAlignment alignment, ICOORD vertical,
+ int extended_start_y, int extended_end_y,
+ BLOBNBOX_CLIST* good_points,
+ int* vertical_x, int* vertical_y) {
+ auto* vector = new TabVector(extended_start_y, extended_end_y,
+ alignment, good_points);
+ if (!vector->Fit(vertical, false)) {
+ delete vector;
+ return nullptr;
+ }
+ if (!vector->IsRagged()) {
+ vertical = vector->endpt_ - vector->startpt_;
+ int weight = vector->BoxCount();
+ *vertical_x += vertical.x() * weight;
+ *vertical_y += vertical.y() * weight;
+ }
+ return vector;
+}
+
+// Build a ragged TabVector by copying another's direction, shifting it
+// to match the given blob, and making its initial extent the height
+// of the blob, but its extended bounds from the bounds of the original.
+TabVector::TabVector(const TabVector& src, TabAlignment alignment,
+ const ICOORD& vertical_skew, BLOBNBOX* blob)
+ : extended_ymin_(src.extended_ymin_), extended_ymax_(src.extended_ymax_),
+ needs_refit_(true), needs_evaluation_(true),
+ alignment_(alignment) {
+ BLOBNBOX_C_IT it(&boxes_);
+ it.add_to_end(blob);
+ TBOX box = blob->bounding_box();
+ if (IsLeftTab()) {
+ startpt_ = box.botleft();
+ endpt_ = box.topleft();
+ } else {
+ startpt_ = box.botright();
+ endpt_ = box.topright();
+ }
+ sort_key_ = SortKey(vertical_skew,
+ (startpt_.x() + endpt_.x()) / 2,
+ (startpt_.y() + endpt_.y()) / 2);
+ if (textord_debug_tabfind > 3)
+ Print("Constructed a new tab vector:");
+}
+
+// Copies basic attributes of a tab vector for simple operations.
+// Copies things such startpt, endpt, range.
+// Does not copy things such as partners, boxes, or constraints.
+// This is useful if you only need vector information for processing, such
+// as in the table detection code.
+TabVector* TabVector::ShallowCopy() const {
+ auto* copy = new TabVector();
+ copy->startpt_ = startpt_;
+ copy->endpt_ = endpt_;
+ copy->alignment_ = alignment_;
+ copy->extended_ymax_ = extended_ymax_;
+ copy->extended_ymin_ = extended_ymin_;
+ copy->intersects_other_lines_ = intersects_other_lines_;
+ return copy;
+}
+
+// Extend this vector to include the supplied blob if it doesn't
+// already have it.
+void TabVector::ExtendToBox(BLOBNBOX* new_blob) {
+ TBOX new_box = new_blob->bounding_box();
+ BLOBNBOX_C_IT it(&boxes_);
+ if (!it.empty()) {
+ BLOBNBOX* blob = it.data();
+ TBOX box = blob->bounding_box();
+ while (!it.at_last() && box.top() <= new_box.top()) {
+ if (blob == new_blob)
+ return; // We have it already.
+ it.forward();
+ blob = it.data();
+ box = blob->bounding_box();
+ }
+ if (box.top() >= new_box.top()) {
+ it.add_before_stay_put(new_blob);
+ needs_refit_ = true;
+ return;
+ }
+ }
+ needs_refit_ = true;
+ it.add_after_stay_put(new_blob);
+}
+
+// Set the ycoord of the start and move the xcoord to match.
+void TabVector::SetYStart(int start_y) {
+ startpt_.set_x(XAtY(start_y));
+ startpt_.set_y(start_y);
+}
+// Set the ycoord of the end and move the xcoord to match.
+void TabVector::SetYEnd(int end_y) {
+ endpt_.set_x(XAtY(end_y));
+ endpt_.set_y(end_y);
+}
+
+// Rotate the ends by the given vector. Auto flip start and end if needed.
+void TabVector::Rotate(const FCOORD& rotation) {
+ startpt_.rotate(rotation);
+ endpt_.rotate(rotation);
+ int dx = endpt_.x() - startpt_.x();
+ int dy = endpt_.y() - startpt_.y();
+ if ((dy < 0 && abs(dy) > abs(dx)) || (dx < 0 && abs(dx) > abs(dy))) {
+ // Need to flip start/end.
+ ICOORD tmp = startpt_;
+ startpt_ = endpt_;
+ endpt_ = tmp;
+ }
+}
+
+// Setup the initial constraints, being the limits of
+// the vector and the extended ends.
+void TabVector::SetupConstraints() {
+ TabConstraint::CreateConstraint(this, false);
+ TabConstraint::CreateConstraint(this, true);
+}
+
+// Setup the constraints between the partners of this TabVector.
+void TabVector::SetupPartnerConstraints() {
+ // With the first and last partner, we want a common bottom and top,
+ // respectively, and for each change of partner, we want a common
+ // top of first with bottom of next.
+ TabVector_C_IT it(&partners_);
+ TabVector* prev_partner = nullptr;
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ TabVector* partner = it.data();
+ if (partner->top_constraints_ == nullptr ||
+ partner->bottom_constraints_ == nullptr) {
+ partner->Print("Impossible: has no constraints");
+ Print("This vector has it as a partner");
+ continue;
+ }
+ if (prev_partner == nullptr) {
+ // This is the first partner, so common bottom.
+ if (TabConstraint::CompatibleConstraints(bottom_constraints_,
+ partner->bottom_constraints_))
+ TabConstraint::MergeConstraints(bottom_constraints_,
+ partner->bottom_constraints_);
+ } else {
+ // We need prev top to be common with partner bottom.
+ if (TabConstraint::CompatibleConstraints(prev_partner->top_constraints_,
+ partner->bottom_constraints_))
+ TabConstraint::MergeConstraints(prev_partner->top_constraints_,
+ partner->bottom_constraints_);
+ }
+ prev_partner = partner;
+ if (it.at_last()) {
+ // This is the last partner, so common top.
+ if (TabConstraint::CompatibleConstraints(top_constraints_,
+ partner->top_constraints_))
+ TabConstraint::MergeConstraints(top_constraints_,
+ partner->top_constraints_);
+ }
+ }
+}
+
+// Setup the constraints between this and its partner.
+void TabVector::SetupPartnerConstraints(TabVector* partner) {
+ if (TabConstraint::CompatibleConstraints(bottom_constraints_,
+ partner->bottom_constraints_))
+ TabConstraint::MergeConstraints(bottom_constraints_,
+ partner->bottom_constraints_);
+ if (TabConstraint::CompatibleConstraints(top_constraints_,
+ partner->top_constraints_))
+ TabConstraint::MergeConstraints(top_constraints_,
+ partner->top_constraints_);
+}
+
+// Use the constraints to modify the top and bottom.
+void TabVector::ApplyConstraints() {
+ if (top_constraints_ != nullptr)
+ TabConstraint::ApplyConstraints(top_constraints_);
+ if (bottom_constraints_ != nullptr)
+ TabConstraint::ApplyConstraints(bottom_constraints_);
+}
+
+// Merge close tab vectors of the same side that overlap.
+void TabVector::MergeSimilarTabVectors(const ICOORD& vertical,
+ TabVector_LIST* vectors,
+ BlobGrid* grid) {
+ TabVector_IT it1(vectors);
+ for (it1.mark_cycle_pt(); !it1.cycled_list(); it1.forward()) {
+ TabVector* v1 = it1.data();
+ TabVector_IT it2(it1);
+ for (it2.forward(); !it2.at_first(); it2.forward()) {
+ TabVector* v2 = it2.data();
+ if (v2->SimilarTo(vertical, *v1, grid)) {
+ // Merge into the forward one, in case the combined vector now
+ // overlaps one in between.
+ if (textord_debug_tabfind) {
+ v2->Print("Merging");
+ v1->Print("by deleting");
+ }
+ v2->MergeWith(vertical, it1.extract());
+ if (textord_debug_tabfind) {
+ v2->Print("Producing");
+ }
+ ICOORD merged_vector = v2->endpt();
+ merged_vector -= v2->startpt();
+ if (textord_debug_tabfind && abs(merged_vector.x()) > 100) {
+ v2->Print("Garbage result of merge?");
+ }
+ break;
+ }
+ }
+ }
+}
+
+// Return true if this vector is the same side, overlaps, and close
+// enough to the other to be merged.
+bool TabVector::SimilarTo(const ICOORD& vertical,
+ const TabVector& other, BlobGrid* grid) const {
+ if ((IsRightTab() && other.IsRightTab()) ||
+ (IsLeftTab() && other.IsLeftTab())) {
+ // If they don't overlap, at least in extensions, then there is no chance.
+ if (ExtendedOverlap(other.extended_ymax_, other.extended_ymin_) < 0)
+ return false;
+ // A fast approximation to the scale factor of the sort_key_.
+ int v_scale = abs(vertical.y());
+ if (v_scale == 0)
+ v_scale = 1;
+ // If they are close enough, then OK.
+ if (sort_key_ + kSimilarVectorDist * v_scale >= other.sort_key_ &&
+ sort_key_ - kSimilarVectorDist * v_scale <= other.sort_key_)
+ return true;
+ // Ragged tabs get a bigger threshold.
+ if (!IsRagged() || !other.IsRagged() ||
+ sort_key_ + kSimilarRaggedDist * v_scale < other.sort_key_ ||
+ sort_key_ - kSimilarRaggedDist * v_scale > other.sort_key_)
+ return false;
+ if (grid == nullptr) {
+ // There is nothing else to test!
+ return true;
+ }
+ // If there is nothing in the rectangle between the vector that is going to
+ // move, and the place it is moving to, then they can be merged.
+ // Setup a vertical search for any blob.
+ const TabVector* mover = (IsRightTab() &&
+ sort_key_ < other.sort_key_) ? this : &other;
+ int top_y = mover->endpt_.y();
+ int bottom_y = mover->startpt_.y();
+ int left = std::min(mover->XAtY(top_y), mover->XAtY(bottom_y));
+ int right = std::max(mover->XAtY(top_y), mover->XAtY(bottom_y));
+ int shift = abs(sort_key_ - other.sort_key_) / v_scale;
+ if (IsRightTab()) {
+ right += shift;
+ } else {
+ left -= shift;
+ }
+
+ GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> vsearch(grid);
+ vsearch.StartVerticalSearch(left, right, top_y);
+ BLOBNBOX* blob;
+ while ((blob = vsearch.NextVerticalSearch(true)) != nullptr) {
+ const TBOX& box = blob->bounding_box();
+ if (box.top() > bottom_y)
+ return true; // Nothing found.
+ if (box.bottom() < top_y)
+ continue; // Doesn't overlap.
+ int left_at_box = XAtY(box.bottom());
+ int right_at_box = left_at_box;
+ if (IsRightTab())
+ right_at_box += shift;
+ else
+ left_at_box -= shift;
+ if (std::min(right_at_box, static_cast<int>(box.right())) > std::max(left_at_box, static_cast<int>(box.left())))
+ return false;
+ }
+ return true; // Nothing found.
+ }
+ return false;
+}
+
+// Eat the other TabVector into this and delete it.
+void TabVector::MergeWith(const ICOORD& vertical, TabVector* other) {
+ extended_ymin_ = std::min(extended_ymin_, other->extended_ymin_);
+ extended_ymax_ = std::max(extended_ymax_, other->extended_ymax_);
+ if (other->IsRagged()) {
+ alignment_ = other->alignment_;
+ }
+ // Merge sort the two lists of boxes.
+ BLOBNBOX_C_IT it1(&boxes_);
+ BLOBNBOX_C_IT it2(&other->boxes_);
+ while (!it2.empty()) {
+ BLOBNBOX* bbox2 = it2.extract();
+ it2.forward();
+ TBOX box2 = bbox2->bounding_box();
+ BLOBNBOX* bbox1 = it1.data();
+ TBOX box1 = bbox1->bounding_box();
+ while (box1.bottom() < box2.bottom() && !it1.at_last()) {
+ it1.forward();
+ bbox1 = it1.data();
+ box1 = bbox1->bounding_box();
+ }
+ if (box1.bottom() < box2.bottom()) {
+ it1.add_to_end(bbox2);
+ } else if (bbox1 != bbox2) {
+ it1.add_before_stay_put(bbox2);
+ }
+ }
+ Fit(vertical, true);
+ other->Delete(this);
+}
+
+// Add a new element to the list of partner TabVectors.
+// Partners must be added in order of increasing y coordinate of the text line
+// that makes them partners.
+// Groups of identical partners are merged into one.
+void TabVector::AddPartner(TabVector* partner) {
+ if (IsSeparator() || partner->IsSeparator())
+ return;
+ TabVector_C_IT it(&partners_);
+ if (!it.empty()) {
+ it.move_to_last();
+ if (it.data() == partner)
+ return;
+ }
+ it.add_after_then_move(partner);
+}
+
+// Return true if other is a partner of this.
+bool TabVector::IsAPartner(const TabVector* other) {
+ TabVector_C_IT it(&partners_);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ if (it.data() == other)
+ return true;
+ }
+ return false;
+}
+
+// These names must be synced with the TabAlignment enum in tabvector.h.
+static const char* const kAlignmentNames[] = {
+ "Left Aligned",
+ "Left Ragged",
+ "Center",
+ "Right Aligned",
+ "Right Ragged",
+ "Separator"
+};
+
+// Print basic information about this tab vector.
+void TabVector::Print(const char* prefix) {
+ tprintf(
+ "%s %s (%d,%d)->(%d,%d) w=%d s=%d, sort key=%d, boxes=%d,"
+ " partners=%d\n",
+ prefix, kAlignmentNames[alignment_], startpt_.x(), startpt_.y(),
+ endpt_.x(), endpt_.y(), mean_width_, percent_score_, sort_key_,
+ boxes_.length(), partners_.length());
+}
+
+// Print basic information about this tab vector and every box in it.
+void TabVector::Debug(const char* prefix) {
+ Print(prefix);
+ BLOBNBOX_C_IT it(&boxes_);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ BLOBNBOX* bbox = it.data();
+ const TBOX& box = bbox->bounding_box();
+ tprintf("Box at (%d,%d)->(%d,%d)\n",
+ box.left(), box.bottom(), box.right(), box.top());
+ }
+}
+
+#ifndef GRAPHICS_DISABLED
+
+// Draw this tabvector in place in the given window.
+void TabVector::Display(ScrollView* tab_win) {
+ if (textord_debug_printable)
+ tab_win->Pen(ScrollView::BLUE);
+ else if (alignment_ == TA_LEFT_ALIGNED)
+ tab_win->Pen(ScrollView::LIME_GREEN);
+ else if (alignment_ == TA_LEFT_RAGGED)
+ tab_win->Pen(ScrollView::DARK_GREEN);
+ else if (alignment_ == TA_RIGHT_ALIGNED)
+ tab_win->Pen(ScrollView::PINK);
+ else if (alignment_ == TA_RIGHT_RAGGED)
+ tab_win->Pen(ScrollView::CORAL);
+ else
+ tab_win->Pen(ScrollView::WHITE);
+ tab_win->Line(startpt_.x(), startpt_.y(), endpt_.x(), endpt_.y());
+ tab_win->Pen(ScrollView::GREY);
+ tab_win->Line(startpt_.x(), startpt_.y(), startpt_.x(), extended_ymin_);
+ tab_win->Line(endpt_.x(), extended_ymax_, endpt_.x(), endpt_.y());
+ char score_buf[64];
+ snprintf(score_buf, sizeof(score_buf), "%d", percent_score_);
+ tab_win->TextAttributes("Times", 50, false, false, false);
+ tab_win->Text(startpt_.x(), startpt_.y(), score_buf);
+}
+
+#endif
+
+// Refit the line and/or re-evaluate the vector if the dirty flags are set.
+void TabVector::FitAndEvaluateIfNeeded(const ICOORD& vertical,
+ TabFind* finder) {
+ if (needs_refit_)
+ Fit(vertical, true);
+ if (needs_evaluation_)
+ Evaluate(vertical, finder);
+}
+
+// Evaluate the vector in terms of coverage of its length by good-looking
+// box edges. A good looking box is one where its nearest neighbour on the
+// inside is nearer than half the distance its nearest neighbour on the
+// outside of the putative column. Bad boxes are removed from the line.
+// A second pass then further filters boxes by requiring that the gutter
+// width be a minimum fraction of the mean gutter along the line.
+void TabVector::Evaluate(const ICOORD& vertical, TabFind* finder) {
+ bool debug = false;
+ needs_evaluation_ = false;
+ int length = endpt_.y() - startpt_.y();
+ if (length == 0 || boxes_.empty()) {
+ percent_score_ = 0;
+ Print("Zero length in evaluate");
+ return;
+ }
+ // Compute the mean box height.
+ BLOBNBOX_C_IT it(&boxes_);
+ int mean_height = 0;
+ int height_count = 0;
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ BLOBNBOX* bbox = it.data();
+ const TBOX& box = bbox->bounding_box();
+ int height = box.height();
+ mean_height += height;
+ ++height_count;
+ }
+ if (height_count > 0) mean_height /= height_count;
+ int max_gutter = kGutterMultiple * mean_height;
+ if (IsRagged()) {
+ // Ragged edges face a tougher test in that the gap must always be within
+ // the height of the blob.
+ max_gutter = kGutterToNeighbourRatio * mean_height;
+ }
+
+ STATS gutters(0, max_gutter + 1);
+ // Evaluate the boxes for their goodness, calculating the coverage as we go.
+ // Remove boxes that are not good and shorten the list to the first and
+ // last good boxes.
+ int num_deleted_boxes = 0;
+ bool text_on_image = false;
+ int good_length = 0;
+ const TBOX* prev_good_box = nullptr;
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ BLOBNBOX* bbox = it.data();
+ const TBOX& box = bbox->bounding_box();
+ int mid_y = (box.top() + box.bottom()) / 2;
+ if (TabFind::WithinTestRegion(2, XAtY(box.bottom()), box.bottom())) {
+ if (!debug) {
+ tprintf("After already deleting %d boxes, ", num_deleted_boxes);
+ Print("Starting evaluation");
+ }
+ debug = true;
+ }
+ // A good box is one where the nearest neighbour on the inside is closer
+ // than half the distance to the nearest neighbour on the outside
+ // (of the putative column).
+ bool left = IsLeftTab();
+ int tab_x = XAtY(mid_y);
+ int gutter_width;
+ int neighbour_gap;
+ finder->GutterWidthAndNeighbourGap(tab_x, mean_height, max_gutter, left,
+ bbox, &gutter_width, &neighbour_gap);
+ if (debug) {
+ tprintf("Box (%d,%d)->(%d,%d) has gutter %d, ndist %d\n",
+ box.left(), box.bottom(), box.right(), box.top(),
+ gutter_width, neighbour_gap);
+ }
+ // Now we can make the test.
+ if (neighbour_gap * kGutterToNeighbourRatio <= gutter_width) {
+ // A good box contributes its height to the good_length.
+ good_length += box.top() - box.bottom();
+ gutters.add(gutter_width, 1);
+ // Two good boxes together contribute the gap between them
+ // to the good_length as well, as long as the gap is not
+ // too big.
+ if (prev_good_box != nullptr) {
+ int vertical_gap = box.bottom() - prev_good_box->top();
+ double size1 = sqrt(static_cast<double>(prev_good_box->area()));
+ double size2 = sqrt(static_cast<double>(box.area()));
+ if (vertical_gap < kMaxFillinMultiple * std::min(size1, size2))
+ good_length += vertical_gap;
+ if (debug) {
+ tprintf("Box and prev good, gap=%d, target %g, goodlength=%d\n",
+ vertical_gap, kMaxFillinMultiple * std::min(size1, size2),
+ good_length);
+ }
+ } else {
+ // Adjust the start to the first good box.
+ SetYStart(box.bottom());
+ }
+ prev_good_box = &box;
+ if (bbox->flow() == BTFT_TEXT_ON_IMAGE)
+ text_on_image = true;
+ } else {
+ // Get rid of boxes that are not good.
+ if (debug) {
+ tprintf("Bad Box (%d,%d)->(%d,%d) with gutter %d, ndist %d\n",
+ box.left(), box.bottom(), box.right(), box.top(),
+ gutter_width, neighbour_gap);
+ }
+ it.extract();
+ ++num_deleted_boxes;
+ }
+ }
+ if (debug) {
+ Print("Evaluating:");
+ }
+ // If there are any good boxes, do it again, except this time get rid of
+ // boxes that have a gutter that is a small fraction of the mean gutter.
+ // This filters out ends that run into a coincidental gap in the text.
+ int search_top = endpt_.y();
+ int search_bottom = startpt_.y();
+ int median_gutter = IntCastRounded(gutters.median());
+ if (gutters.get_total() > 0) {
+ prev_good_box = nullptr;
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ BLOBNBOX* bbox = it.data();
+ const TBOX& box = bbox->bounding_box();
+ int mid_y = (box.top() + box.bottom()) / 2;
+ // A good box is one where the gutter width is at least some constant
+ // fraction of the mean gutter width.
+ bool left = IsLeftTab();
+ int tab_x = XAtY(mid_y);
+ int max_gutter = kGutterMultiple * mean_height;
+ if (IsRagged()) {
+ // Ragged edges face a tougher test in that the gap must always be
+ // within the height of the blob.
+ max_gutter = kGutterToNeighbourRatio * mean_height;
+ }
+ int gutter_width;
+ int neighbour_gap;
+ finder->GutterWidthAndNeighbourGap(tab_x, mean_height, max_gutter, left,
+ bbox, &gutter_width, &neighbour_gap);
+ // Now we can make the test.
+ if (gutter_width >= median_gutter * kMinGutterFraction) {
+ if (prev_good_box == nullptr) {
+ // Adjust the start to the first good box.
+ SetYStart(box.bottom());
+ search_bottom = box.top();
+ }
+ prev_good_box = &box;
+ search_top = box.bottom();
+ } else {
+ // Get rid of boxes that are not good.
+ if (debug) {
+ tprintf("Bad Box (%d,%d)->(%d,%d) with gutter %d, mean gutter %d\n",
+ box.left(), box.bottom(), box.right(), box.top(),
+ gutter_width, median_gutter);
+ }
+ it.extract();
+ ++num_deleted_boxes;
+ }
+ }
+ }
+ // If there has been a good box, adjust the end.
+ if (prev_good_box != nullptr) {
+ SetYEnd(prev_good_box->top());
+ // Compute the percentage of the vector that is occupied by good boxes.
+ int length = endpt_.y() - startpt_.y();
+ percent_score_ = 100 * good_length / length;
+ if (num_deleted_boxes > 0) {
+ needs_refit_ = true;
+ FitAndEvaluateIfNeeded(vertical, finder);
+ if (boxes_.empty())
+ return;
+ }
+ // Test the gutter over the whole vector, instead of just at the boxes.
+ int required_shift;
+ if (search_bottom > search_top) {
+ search_bottom = startpt_.y();
+ search_top = endpt_.y();
+ }
+ double min_gutter_width = kLineCountReciprocal / boxes_.length();
+ min_gutter_width += IsRagged() ? kMinRaggedGutter : kMinAlignedGutter;
+ min_gutter_width *= mean_height;
+ int max_gutter_width = IntCastRounded(min_gutter_width) + 1;
+ if (median_gutter > max_gutter_width)
+ max_gutter_width = median_gutter;
+ int gutter_width = finder->GutterWidth(search_bottom, search_top, *this,
+ text_on_image, max_gutter_width,
+ &required_shift);
+ if (gutter_width < min_gutter_width) {
+ if (debug) {
+ tprintf("Rejecting bad tab Vector with %d gutter vs %g min\n",
+ gutter_width, min_gutter_width);
+ }
+ boxes_.shallow_clear();
+ percent_score_ = 0;
+ } else if (debug) {
+ tprintf("Final gutter %d, vs limit of %g, required shift = %d\n",
+ gutter_width, min_gutter_width, required_shift);
+ }
+ } else {
+ // There are no good boxes left, so score is 0.
+ percent_score_ = 0;
+ }
+
+ if (debug) {
+ Print("Evaluation complete:");
+ }
+}
+
+// (Re)Fit a line to the stored points. Returns false if the line
+// is degenerate. Althougth the TabVector code mostly doesn't care about the
+// direction of lines, XAtY would give silly results for a horizontal line.
+// The class is mostly aimed at use for vertical lines representing
+// horizontal tab stops.
+bool TabVector::Fit(ICOORD vertical, bool force_parallel) {
+ needs_refit_ = false;
+ if (boxes_.empty()) {
+ // Don't refit something with no boxes, as that only happens
+ // in Evaluate, and we don't want to end up with a zero vector.
+ if (!force_parallel)
+ return false;
+ // If we are forcing parallel, then we just need to set the sort_key_.
+ ICOORD midpt = startpt_;
+ midpt += endpt_;
+ midpt /= 2;
+ sort_key_ = SortKey(vertical, midpt.x(), midpt.y());
+ return startpt_.y() != endpt_.y();
+ }
+ if (!force_parallel && !IsRagged()) {
+ // Use a fitted line as the vertical.
+ DetLineFit linepoints;
+ BLOBNBOX_C_IT it(&boxes_);
+ // Fit a line to all the boxes in the list.
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ BLOBNBOX* bbox = it.data();
+ const TBOX& box = bbox->bounding_box();
+ int x1 = IsRightTab() ? box.right() : box.left();
+ ICOORD boxpt(x1, box.bottom());
+ linepoints.Add(boxpt);
+ if (it.at_last()) {
+ ICOORD top_pt(x1, box.top());
+ linepoints.Add(top_pt);
+ }
+ }
+ linepoints.Fit(&startpt_, &endpt_);
+ if (startpt_.y() != endpt_.y()) {
+ vertical = endpt_;
+ vertical -= startpt_;
+ }
+ }
+ int start_y = startpt_.y();
+ int end_y = endpt_.y();
+ sort_key_ = IsLeftTab() ? INT32_MAX : -INT32_MAX;
+ BLOBNBOX_C_IT it(&boxes_);
+ // Choose a line parallel to the vertical such that all boxes are on the
+ // correct side of it.
+ mean_width_ = 0;
+ int width_count = 0;
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ BLOBNBOX* bbox = it.data();
+ const TBOX& box = bbox->bounding_box();
+ mean_width_ += box.width();
+ ++width_count;
+ int x1 = IsRightTab() ? box.right() : box.left();
+ // Test both the bottom and the top, as one will be more extreme, depending
+ // on the direction of skew.
+ int bottom_y = box.bottom();
+ int top_y = box.top();
+ int key = SortKey(vertical, x1, bottom_y);
+ if (IsLeftTab() == (key < sort_key_)) {
+ sort_key_ = key;
+ startpt_ = ICOORD(x1, bottom_y);
+ }
+ key = SortKey(vertical, x1, top_y);
+ if (IsLeftTab() == (key < sort_key_)) {
+ sort_key_ = key;
+ startpt_ = ICOORD(x1, top_y);
+ }
+ if (it.at_first())
+ start_y = bottom_y;
+ if (it.at_last())
+ end_y = top_y;
+ }
+ if (width_count > 0) {
+ mean_width_ = (mean_width_ + width_count - 1) / width_count;
+ }
+ endpt_ = startpt_ + vertical;
+ needs_evaluation_ = true;
+ if (start_y != end_y) {
+ // Set the ends of the vector to fully include the first and last blobs.
+ startpt_.set_x(XAtY(vertical, sort_key_, start_y));
+ startpt_.set_y(start_y);
+ endpt_.set_x(XAtY(vertical, sort_key_, end_y));
+ endpt_.set_y(end_y);
+ return true;
+ }
+ return false;
+}
+
+// Returns the singleton partner if there is one, or nullptr otherwise.
+TabVector* TabVector::GetSinglePartner() {
+ if (!partners_.singleton())
+ return nullptr;
+ TabVector_C_IT partner_it(&partners_);
+ TabVector* partner = partner_it.data();
+ return partner;
+}
+
+// Return the partner of this TabVector if the vector qualifies as
+// being a vertical text line, otherwise nullptr.
+TabVector* TabVector::VerticalTextlinePartner() {
+ if (!partners_.singleton())
+ return nullptr;
+ TabVector_C_IT partner_it(&partners_);
+ TabVector* partner = partner_it.data();
+ BLOBNBOX_C_IT box_it1(&boxes_);
+ BLOBNBOX_C_IT box_it2(&partner->boxes_);
+ // Count how many boxes are also in the other list.
+ // At the same time, gather the mean width and median vertical gap.
+ if (textord_debug_tabfind > 1) {
+ Print("Testing for vertical text");
+ partner->Print(" partner");
+ }
+ int num_matched = 0;
+ int num_unmatched = 0;
+ int total_widths = 0;
+ int width = startpt().x() - partner->startpt().x();
+ if (width < 0)
+ width = -width;
+ STATS gaps(0, width * 2);
+ BLOBNBOX* prev_bbox = nullptr;
+ box_it2.mark_cycle_pt();
+ for (box_it1.mark_cycle_pt(); !box_it1.cycled_list(); box_it1.forward()) {
+ BLOBNBOX* bbox = box_it1.data();
+ TBOX box = bbox->bounding_box();
+ if (prev_bbox != nullptr) {
+ gaps.add(box.bottom() - prev_bbox->bounding_box().top(), 1);
+ }
+ while (!box_it2.cycled_list() && box_it2.data() != bbox &&
+ box_it2.data()->bounding_box().bottom() < box.bottom()) {
+ box_it2.forward();
+ }
+ if (!box_it2.cycled_list() && box_it2.data() == bbox &&
+ bbox->region_type() >= BRT_UNKNOWN &&
+ (prev_bbox == nullptr || prev_bbox->region_type() >= BRT_UNKNOWN))
+ ++num_matched;
+ else
+ ++num_unmatched;
+ total_widths += box.width();
+ prev_bbox = bbox;
+ }
+ if (num_unmatched + num_matched == 0) return nullptr;
+ double avg_width = total_widths * 1.0 / (num_unmatched + num_matched);
+ double max_gap = textord_tabvector_vertical_gap_fraction * avg_width;
+ int min_box_match = static_cast<int>((num_matched + num_unmatched) *
+ textord_tabvector_vertical_box_ratio);
+ bool is_vertical = (gaps.get_total() > 0 &&
+ num_matched >= min_box_match &&
+ gaps.median() <= max_gap);
+ if (textord_debug_tabfind > 1) {
+ tprintf("gaps=%d, matched=%d, unmatched=%d, min_match=%d "
+ "median gap=%.2f, width=%.2f max_gap=%.2f Vertical=%s\n",
+ gaps.get_total(), num_matched, num_unmatched, min_box_match,
+ gaps.median(), avg_width, max_gap, is_vertical?"Yes":"No");
+ }
+ return (is_vertical) ? partner : nullptr;
+}
+
+// The constructor is private.
+TabVector::TabVector(int extended_ymin, int extended_ymax,
+ TabAlignment alignment, BLOBNBOX_CLIST* boxes)
+ : extended_ymin_(extended_ymin), extended_ymax_(extended_ymax),
+ sort_key_(0), percent_score_(0), mean_width_(0),
+ needs_refit_(true), needs_evaluation_(true), alignment_(alignment),
+ top_constraints_(nullptr), bottom_constraints_(nullptr) {
+ BLOBNBOX_C_IT it(&boxes_);
+ it.add_list_after(boxes);
+}
+
+// Delete this, but first, repoint all the partners to point to
+// replacement. If replacement is nullptr, then partner relationships
+// are removed.
+void TabVector::Delete(TabVector* replacement) {
+ TabVector_C_IT it(&partners_);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ TabVector* partner = it.data();
+ TabVector_C_IT p_it(&partner->partners_);
+ // If partner already has replacement in its list, then make
+ // replacement null, and just remove this TabVector when we find it.
+ TabVector* partner_replacement = replacement;
+ for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward()) {
+ TabVector* p_partner = p_it.data();
+ if (p_partner == partner_replacement) {
+ partner_replacement = nullptr;
+ break;
+ }
+ }
+ // Remove all references to this, and replace with replacement if not nullptr.
+ for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward()) {
+ TabVector* p_partner = p_it.data();
+ if (p_partner == this) {
+ p_it.extract();
+ if (partner_replacement != nullptr)
+ p_it.add_before_stay_put(partner_replacement);
+ }
+ }
+ if (partner_replacement != nullptr) {
+ partner_replacement->AddPartner(partner);
+ }
+ }
+ delete this;
+}
+
+
+} // namespace tesseract.
diff --git a/tesseract/src/textord/tabvector.h b/tesseract/src/textord/tabvector.h
new file mode 100644
index 00000000..ce7464b8
--- /dev/null
+++ b/tesseract/src/textord/tabvector.h
@@ -0,0 +1,429 @@
+///////////////////////////////////////////////////////////////////////
+// File: tabvector.h
+// Description: Class to hold a near-vertical vector representing a tab-stop.
+// Author: Ray Smith
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TEXTORD_TABVECTOR_H_
+#define TESSERACT_TEXTORD_TABVECTOR_H_
+
+#include "blobgrid.h"
+#include "clst.h"
+#include "elst.h"
+#include "elst2.h"
+#include "rect.h"
+#include "bbgrid.h"
+
+#include <algorithm>
+
+class BLOBNBOX;
+class ScrollView;
+
+namespace tesseract {
+
+
+extern double_VAR_H(textord_tabvector_vertical_gap_fraction, 0.5,
+ "Max fraction of mean blob width allowed for vertical gaps in vertical text");
+extern double_VAR_H(textord_tabvector_vertical_box_ratio, 0.5,
+ "Fraction of box matches required to declare a line vertical");
+
+// The alignment type that a tab vector represents.
+// Keep this enum synced with kAlignmentNames in tabvector.cpp.
+enum TabAlignment {
+ TA_LEFT_ALIGNED,
+ TA_LEFT_RAGGED,
+ TA_CENTER_JUSTIFIED,
+ TA_RIGHT_ALIGNED,
+ TA_RIGHT_RAGGED,
+ TA_SEPARATOR,
+ TA_COUNT
+};
+
+// Forward declarations. The classes use their own list types, so we
+// need to make the list types first.
+class TabFind;
+class TabVector;
+class TabConstraint;
+
+ELIST2IZEH(TabVector)
+CLISTIZEH(TabVector)
+ELISTIZEH(TabConstraint)
+
+// TabConstraint is a totally self-contained class to maintain
+// a list of [min,max] constraints, each referring to a TabVector.
+// The constraints are manipulated through static methods that act
+// on a list of constraints. The list itself is cooperatively owned
+// by the TabVectors of the constraints on the list and managed
+// by implicit reference counting via the elements of the list.
+class TabConstraint : public ELIST_LINK {
+ public:
+ // This empty constructor is here only so that the class can be ELISTIZED.
+ // TODO(rays) change deep_copy in elst.h line 955 to take a callback copier
+ // and eliminate CLASSNAME##_copier.
+ TabConstraint() = default;
+
+ // Create a constraint for the top or bottom of this TabVector.
+ static void CreateConstraint(TabVector* vector, bool is_top);
+
+ // Test to see if the constraints are compatible enough to merge.
+ static bool CompatibleConstraints(TabConstraint_LIST* list1,
+ TabConstraint_LIST* list2);
+
+ // Merge the lists of constraints and update the TabVector pointers.
+ // The second list is deleted.
+ static void MergeConstraints(TabConstraint_LIST* list1,
+ TabConstraint_LIST* list2);
+
+ // Set all the tops and bottoms as appropriate to a mean of the
+ // constrained range. Delete all the constraints and list.
+ static void ApplyConstraints(TabConstraint_LIST* constraints);
+
+ private:
+ TabConstraint(TabVector* vector, bool is_top);
+
+ // Get the max of the mins and the min of the maxes.
+ static void GetConstraints(TabConstraint_LIST* constraints,
+ int* y_min, int* y_max);
+
+ // The TabVector this constraint applies to.
+ TabVector* vector_;
+ // If true then we refer to the top of the vector_.
+ bool is_top_;
+ // The allowed range of this vector_.
+ int y_min_;
+ int y_max_;
+};
+
+// Class to hold information about a single vector
+// that represents a tab stop or a rule line.
+class TabVector : public ELIST2_LINK {
+ public:
+ // TODO(rays) fix this in elst.h line 1076, where it should use the
+ // copy constructor instead of operator=.
+ TabVector() = default;
+ ~TabVector() = default;
+
+ // Public factory to build a TabVector from a list of boxes.
+ // The TabVector will be of the given alignment type.
+ // The input vertical vector is used in fitting, and the output
+ // vertical_x, vertical_y have the resulting line vector added to them
+ // if the alignment is not ragged.
+ // The extended_start_y and extended_end_y are the maximum possible
+ // extension to the line segment that can be used to align with others.
+ // The input CLIST of BLOBNBOX good_points is consumed and taken over.
+ static TabVector* FitVector(TabAlignment alignment, ICOORD vertical,
+ int extended_start_y, int extended_end_y,
+ BLOBNBOX_CLIST* good_points,
+ int* vertical_x, int* vertical_y);
+
+ // Build a ragged TabVector by copying another's direction, shifting it
+ // to match the given blob, and making its initial extent the height
+ // of the blob, but its extended bounds from the bounds of the original.
+ TabVector(const TabVector& src, TabAlignment alignment,
+ const ICOORD& vertical_skew, BLOBNBOX* blob);
+
+ // Copies basic attributes of a tab vector for simple operations.
+ // Copies things such startpt, endpt, range, width.
+ // Does not copy things such as partners, boxes, or constraints.
+ // This is useful if you only need vector information for processing, such
+ // as in the table detection code.
+ TabVector* ShallowCopy() const;
+
+ // Simple accessors.
+ const ICOORD& startpt() const {
+ return startpt_;
+ }
+ const ICOORD& endpt() const {
+ return endpt_;
+ }
+ int extended_ymax() const {
+ return extended_ymax_;
+ }
+ int extended_ymin() const {
+ return extended_ymin_;
+ }
+ int sort_key() const {
+ return sort_key_;
+ }
+ int mean_width() const {
+ return mean_width_;
+ }
+ void set_top_constraints(TabConstraint_LIST* constraints) {
+ top_constraints_ = constraints;
+ }
+ void set_bottom_constraints(TabConstraint_LIST* constraints) {
+ bottom_constraints_ = constraints;
+ }
+ TabVector_CLIST* partners() {
+ return &partners_;
+ }
+ void set_startpt(const ICOORD& start) {
+ startpt_ = start;
+ }
+ void set_endpt(const ICOORD& end) {
+ endpt_ = end;
+ }
+ bool intersects_other_lines() const {
+ return intersects_other_lines_;
+ }
+ void set_intersects_other_lines(bool value) {
+ intersects_other_lines_ = value;
+ }
+
+ // Inline quasi-accessors that require some computation.
+
+ // Compute the x coordinate at the given y coordinate.
+ int XAtY(int y) const {
+ int height = endpt_.y() - startpt_.y();
+ if (height != 0)
+ return (y - startpt_.y()) * (endpt_.x() - startpt_.x()) / height +
+ startpt_.x();
+ else
+ return startpt_.x();
+ }
+
+ // Compute the vertical overlap with the other TabVector.
+ int VOverlap(const TabVector& other) const {
+ return std::min(other.endpt_.y(), endpt_.y()) -
+ std::max(other.startpt_.y(), startpt_.y());
+ }
+ // Compute the vertical overlap with the given y bounds.
+ int VOverlap(int top_y, int bottom_y) const {
+ return std::min(top_y, static_cast<int>(endpt_.y())) - std::max(bottom_y, static_cast<int>(startpt_.y()));
+ }
+ // Compute the extended vertical overlap with the given y bounds.
+ int ExtendedOverlap(int top_y, int bottom_y) const {
+ return std::min(top_y, extended_ymax_) - std::max(bottom_y, extended_ymin_);
+ }
+
+ // Return true if this is a left tab stop, either aligned, or ragged.
+ bool IsLeftTab() const {
+ return alignment_ == TA_LEFT_ALIGNED || alignment_ == TA_LEFT_RAGGED;
+ }
+ // Return true if this is a right tab stop, either aligned, or ragged.
+ bool IsRightTab() const {
+ return alignment_ == TA_RIGHT_ALIGNED || alignment_ == TA_RIGHT_RAGGED;
+ }
+ // Return true if this is a separator.
+ bool IsSeparator() const {
+ return alignment_ == TA_SEPARATOR;
+ }
+ // Return true if this is a center aligned tab stop.
+ bool IsCenterTab() const {
+ return alignment_ == TA_CENTER_JUSTIFIED;
+ }
+ // Return true if this is a ragged tab top, either left or right.
+ bool IsRagged() const {
+ return alignment_ == TA_LEFT_RAGGED || alignment_ == TA_RIGHT_RAGGED;
+ }
+
+ // Return true if this vector is to the left of the other in terms
+ // of sort_key_.
+ bool IsLeftOf(const TabVector& other) const {
+ return sort_key_ < other.sort_key_;
+ }
+
+ // Return true if the vector has no partners.
+ bool Partnerless() {
+ return partners_.empty();
+ }
+
+ // Return the number of tab boxes in this vector.
+ int BoxCount() {
+ return boxes_.length();
+ }
+
+ // Lock the vector from refits by clearing the boxes_ list.
+ void Freeze() {
+ boxes_.shallow_clear();
+ }
+
+ // Flip x and y on the ends so a vector can be created from flipped input.
+ void XYFlip() {
+ int x = startpt_.y();
+ startpt_.set_y(startpt_.x());
+ startpt_.set_x(x);
+ x = endpt_.y();
+ endpt_.set_y(endpt_.x());
+ endpt_.set_x(x);
+ }
+
+ // Reflect the tab vector in the y-axis.
+ void ReflectInYAxis() {
+ startpt_.set_x(-startpt_.x());
+ endpt_.set_x(-endpt_.x());
+ sort_key_ = -sort_key_;
+ if (alignment_ == TA_LEFT_ALIGNED)
+ alignment_ = TA_RIGHT_ALIGNED;
+ else if (alignment_ == TA_RIGHT_ALIGNED)
+ alignment_ = TA_LEFT_ALIGNED;
+ if (alignment_ == TA_LEFT_RAGGED)
+ alignment_ = TA_RIGHT_RAGGED;
+ else if (alignment_ == TA_RIGHT_RAGGED)
+ alignment_ = TA_LEFT_RAGGED;
+ }
+
+ // Separate function to compute the sort key for a given coordinate pair.
+ static int SortKey(const ICOORD& vertical, int x, int y) {
+ ICOORD pt(x, y);
+ return pt * vertical;
+ }
+
+ // Return the x at the given y for the given sort key.
+ static int XAtY(const ICOORD& vertical, int sort_key, int y) {
+ if (vertical.y() != 0)
+ return (vertical.x() * y + sort_key) / vertical.y();
+ else
+ return sort_key;
+ }
+
+ // Sort function for E2LIST::sort to sort by sort_key_.
+ static int SortVectorsByKey(const void* v1, const void* v2) {
+ const TabVector* tv1 = *static_cast<const TabVector* const*>(v1);
+ const TabVector* tv2 = *static_cast<const TabVector* const*>(v2);
+ return tv1->sort_key_ - tv2->sort_key_;
+ }
+
+ // More complex members.
+
+ // Extend this vector to include the supplied blob if it doesn't
+ // already have it.
+ void ExtendToBox(BLOBNBOX* blob);
+
+ // Set the ycoord of the start and move the xcoord to match.
+ void SetYStart(int start_y);
+ // Set the ycoord of the end and move the xcoord to match.
+ void SetYEnd(int end_y);
+
+ // Rotate the ends by the given vector.
+ void Rotate(const FCOORD& rotation);
+
+ // Setup the initial constraints, being the limits of
+ // the vector and the extended ends.
+ void SetupConstraints();
+
+ // Setup the constraints between the partners of this TabVector.
+ void SetupPartnerConstraints();
+
+ // Setup the constraints between this and its partner.
+ void SetupPartnerConstraints(TabVector* partner);
+
+ // Use the constraints to modify the top and bottom.
+ void ApplyConstraints();
+
+ // Merge close tab vectors of the same side that overlap.
+ static void MergeSimilarTabVectors(const ICOORD& vertical,
+ TabVector_LIST* vectors, BlobGrid* grid);
+
+ // Return true if this vector is the same side, overlaps, and close
+ // enough to the other to be merged.
+ bool SimilarTo(const ICOORD& vertical,
+ const TabVector& other, BlobGrid* grid) const;
+
+ // Eat the other TabVector into this and delete it.
+ void MergeWith(const ICOORD& vertical, TabVector* other);
+
+ // Add a new element to the list of partner TabVectors.
+ // Partners must be added in order of increasing y coordinate of the text line
+ // that makes them partners.
+ // Groups of identical partners are merged into one.
+ void AddPartner(TabVector* partner);
+
+ // Return true if other is a partner of this.
+ bool IsAPartner(const TabVector* other);
+
+ // Print basic information about this tab vector.
+ void Print(const char* prefix);
+
+ // Print basic information about this tab vector and every box in it.
+ void Debug(const char* prefix);
+
+ // Draw this tabvector in place in the given window.
+ void Display(ScrollView* tab_win);
+
+ // Refit the line and/or re-evaluate the vector if the dirty flags are set.
+ void FitAndEvaluateIfNeeded(const ICOORD& vertical, TabFind* finder);
+
+ // Evaluate the vector in terms of coverage of its length by good-looking
+ // box edges. A good looking box is one where its nearest neighbour on the
+ // inside is nearer than half the distance its nearest neighbour on the
+ // outside of the putative column. Bad boxes are removed from the line.
+ // A second pass then further filters boxes by requiring that the gutter
+ // width be a minimum fraction of the mean gutter along the line.
+ void Evaluate(const ICOORD& vertical, TabFind* finder);
+
+ // (Re)Fit a line to the stored points. Returns false if the line
+ // is degenerate. Althougth the TabVector code mostly doesn't care about the
+ // direction of lines, XAtY would give silly results for a horizontal line.
+ // The class is mostly aimed at use for vertical lines representing
+ // horizontal tab stops.
+ bool Fit(ICOORD vertical, bool force_parallel);
+
+ // Return the partner of this TabVector if the vector qualifies as
+ // being a vertical text line, otherwise nullptr.
+ TabVector* VerticalTextlinePartner();
+
+ // Return the matching tabvector if there is exactly one partner, or
+ // nullptr otherwise. This can be used after matching is done, eg. by
+ // VerticalTextlinePartner(), without checking if the line is vertical.
+ TabVector* GetSinglePartner();
+
+ private:
+ // Constructor is private as the static factory is the external way
+ // to build a TabVector.
+ TabVector(int extended_ymin, int extended_ymax,
+ TabAlignment alignment, BLOBNBOX_CLIST* boxes);
+
+ // Delete this, but first, repoint all the partners to point to
+ // replacement. If replacement is nullptr, then partner relationships
+ // are removed.
+ void Delete(TabVector* replacement);
+
+ private:
+ // The bottom of the tab line.
+ ICOORD startpt_;
+ // The top of the tab line.
+ ICOORD endpt_;
+ // The lowest y that the vector might extend to.
+ int extended_ymin_ = 0;
+ // The highest y that the vector might extend to.
+ int extended_ymax_ = 0;
+ // Perpendicular distance of vector from a given vertical for sorting.
+ int sort_key_ = 0;
+ // Result of Evaluate 0-100. Coverage of line with good boxes.
+ int percent_score_ = 0;
+ // The mean width of the blobs. Meaningful only for separator lines.
+ int mean_width_ = 0;
+ // True if the boxes_ list has been modified, so a refit is needed.
+ bool needs_refit_ = false;
+ // True if a fit has been done, so re-evaluation is needed.
+ bool needs_evaluation_ = false;
+ // True if a separator line intersects at least 2 other lines.
+ bool intersects_other_lines_ = false;
+ // The type of this TabVector.
+ TabAlignment alignment_ = TA_LEFT_ALIGNED;
+ // The list of boxes whose edges are aligned at this TabVector.
+ BLOBNBOX_CLIST boxes_;
+ // List of TabVectors that have a connection with this via a text line.
+ TabVector_CLIST partners_;
+ // Constraints used to resolve the exact location of the top and bottom
+ // of the tab line.
+ TabConstraint_LIST* top_constraints_ = nullptr;
+ TabConstraint_LIST* bottom_constraints_ = nullptr;
+};
+
+} // namespace tesseract.
+
+#endif // TESSERACT_TEXTORD_TABVECTOR_H_
diff --git a/tesseract/src/textord/textlineprojection.cpp b/tesseract/src/textord/textlineprojection.cpp
new file mode 100644
index 00000000..e52abaa0
--- /dev/null
+++ b/tesseract/src/textord/textlineprojection.cpp
@@ -0,0 +1,779 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+// Author: rays@google.com (Ray Smith)
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "textlineprojection.h"
+#include "allheaders.h"
+#include "bbgrid.h" // Base class.
+#include "blobbox.h" // BlobNeighourDir.
+#include "blobs.h"
+#include "colpartition.h"
+#include "normalis.h"
+
+#include <algorithm>
+
+// Padding factor to use on definitely oriented blobs
+const int kOrientedPadFactor = 8;
+// Padding factor to use on not definitely oriented blobs.
+const int kDefaultPadFactor = 2;
+// Penalty factor for going away from the line center.
+const int kWrongWayPenalty = 4;
+// Ratio between parallel gap and perpendicular gap used to measure total
+// distance of a box from a target box in curved textline space.
+// parallel-gap is treated more favorably by this factor to allow catching
+// quotes and elipsis at the end of textlines.
+const int kParaPerpDistRatio = 4;
+// Multiple of scale_factor_ that the inter-line gap must be before we start
+// padding the increment box perpendicular to the text line.
+const int kMinLineSpacingFactor = 4;
+// Maximum tab-stop overrun for horizontal padding, in projection pixels.
+const int kMaxTabStopOverrun = 6;
+
+namespace tesseract {
+
+TextlineProjection::TextlineProjection(int resolution)
+ : x_origin_(0), y_origin_(0), pix_(nullptr) {
+ // The projection map should be about 100 ppi, whatever the input.
+ scale_factor_ = IntCastRounded(resolution / 100.0);
+ if (scale_factor_ < 1) scale_factor_ = 1;
+}
+TextlineProjection::~TextlineProjection() {
+ pixDestroy(&pix_);
+}
+
+// Build the projection profile given the input_block containing lists of
+// blobs, a rotation to convert to image coords,
+// and a full-resolution nontext_map, marking out areas to avoid.
+// During construction, we have the following assumptions:
+// The rotation is a multiple of 90 degrees, ie no deskew yet.
+// The blobs have had their left and right rules set to also limit
+// the range of projection.
+void TextlineProjection::ConstructProjection(TO_BLOCK* input_block,
+ const FCOORD& rotation,
+ Pix* nontext_map) {
+ pixDestroy(&pix_);
+ TBOX image_box(0, 0, pixGetWidth(nontext_map), pixGetHeight(nontext_map));
+ x_origin_ = 0;
+ y_origin_ = image_box.height();
+ int width = (image_box.width() + scale_factor_ - 1) / scale_factor_;
+ int height = (image_box.height() + scale_factor_ - 1) / scale_factor_;
+
+ pix_ = pixCreate(width, height, 8);
+ ProjectBlobs(&input_block->blobs, rotation, image_box, nontext_map);
+ ProjectBlobs(&input_block->large_blobs, rotation, image_box, nontext_map);
+ Pix* final_pix = pixBlockconv(pix_, 1, 1);
+// Pix* final_pix = pixBlockconv(pix_, 2, 2);
+ pixDestroy(&pix_);
+ pix_ = final_pix;
+}
+
+#ifndef GRAPHICS_DISABLED
+
+// Display the blobs in the window colored according to textline quality.
+void TextlineProjection::PlotGradedBlobs(BLOBNBOX_LIST* blobs,
+ ScrollView* win) {
+ BLOBNBOX_IT it(blobs);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ BLOBNBOX* blob = it.data();
+ const TBOX& box = blob->bounding_box();
+ bool bad_box = BoxOutOfHTextline(box, nullptr, false);
+ if (blob->UniquelyVertical())
+ win->Pen(ScrollView::YELLOW);
+ else
+ win->Pen(bad_box ? ScrollView::RED : ScrollView::BLUE);
+ win->Rectangle(box.left(), box.bottom(), box.right(), box.top());
+ }
+ win->Update();
+}
+
+#endif // !GRAPHICS_DISABLED
+
+// Moves blobs that look like they don't sit well on a textline from the
+// input blobs list to the output small_blobs list.
+// This gets them away from initial textline finding to stop diacritics
+// from forming incorrect textlines. (Introduced mainly to fix Thai.)
+void TextlineProjection::MoveNonTextlineBlobs(
+ BLOBNBOX_LIST* blobs, BLOBNBOX_LIST* small_blobs) const {
+ BLOBNBOX_IT it(blobs);
+ BLOBNBOX_IT small_it(small_blobs);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ BLOBNBOX* blob = it.data();
+ const TBOX& box = blob->bounding_box();
+ bool debug = AlignedBlob::WithinTestRegion(2, box.left(),
+ box.bottom());
+ if (BoxOutOfHTextline(box, nullptr, debug) && !blob->UniquelyVertical()) {
+ blob->ClearNeighbours();
+ small_it.add_to_end(it.extract());
+ }
+ }
+}
+
+#ifndef GRAPHICS_DISABLED
+
+// Create a window and display the projection in it.
+void TextlineProjection::DisplayProjection() const {
+ int width = pixGetWidth(pix_);
+ int height = pixGetHeight(pix_);
+ Pix* pixc = pixCreate(width, height, 32);
+ int src_wpl = pixGetWpl(pix_);
+ int col_wpl = pixGetWpl(pixc);
+ uint32_t* src_data = pixGetData(pix_);
+ uint32_t* col_data = pixGetData(pixc);
+ for (int y = 0; y < height; ++y, src_data += src_wpl, col_data += col_wpl) {
+ for (int x = 0; x < width; ++x) {
+ int pixel = GET_DATA_BYTE(src_data, x);
+ l_uint32 result;
+ if (pixel <= 17)
+ composeRGBPixel(0, 0, pixel * 15, &result);
+ else if (pixel <= 145)
+ composeRGBPixel(0, (pixel - 17) * 2, 255, &result);
+ else
+ composeRGBPixel((pixel - 145) * 2, 255, 255, &result);
+ col_data[x] = result;
+ }
+ }
+ auto* win = new ScrollView("Projection", 0, 0,
+ width, height, width, height);
+ win->Image(pixc, 0, 0);
+ win->Update();
+ pixDestroy(&pixc);
+}
+
+#endif // !GRAPHICS_DISABLED
+
+// Compute the distance of the box from the partition using curved projection
+// space. As DistanceOfBoxFromBox, except that the direction is taken from
+// the ColPartition and the median bounds of the ColPartition are used as
+// the to_box.
+int TextlineProjection::DistanceOfBoxFromPartition(const TBOX& box,
+ const ColPartition& part,
+ const DENORM* denorm,
+ bool debug) const {
+ // Compute a partition box that uses the median top/bottom of the blobs
+ // within and median left/right for vertical.
+ TBOX part_box = part.bounding_box();
+ if (part.IsHorizontalType()) {
+ part_box.set_top(part.median_top());
+ part_box.set_bottom(part.median_bottom());
+ } else {
+ part_box.set_left(part.median_left());
+ part_box.set_right(part.median_right());
+ }
+ // Now use DistanceOfBoxFromBox to make the actual calculation.
+ return DistanceOfBoxFromBox(box, part_box, part.IsHorizontalType(),
+ denorm, debug);
+}
+
+// Compute the distance from the from_box to the to_box using curved
+// projection space. Separation that involves a decrease in projection
+// density (moving from the from_box to the to_box) is weighted more heavily
+// than constant density, and an increase is weighted less.
+// If horizontal_textline is true, then curved space is used vertically,
+// as for a diacritic on the edge of a textline.
+// The projection uses original image coords, so denorm is used to get
+// back to the image coords from box/part space.
+// How the calculation works: Think of a diacritic near a textline.
+// Distance is measured from the far side of the from_box to the near side of
+// the to_box. Shown is the horizontal textline case.
+// |------^-----|
+// | from | box |
+// |------|-----|
+// perpendicular |
+// <------v-------->|--------------------|
+// parallel | to box |
+// |--------------------|
+// Perpendicular distance uses "curved space" See VerticalDistance below.
+// Parallel distance is linear.
+// Result is perpendicular_gap + parallel_gap / kParaPerpDistRatio.
+int TextlineProjection::DistanceOfBoxFromBox(const TBOX& from_box,
+ const TBOX& to_box,
+ bool horizontal_textline,
+ const DENORM* denorm,
+ bool debug) const {
+ // The parallel_gap is the horizontal gap between a horizontal textline and
+ // the box. Analogous for vertical.
+ int parallel_gap = 0;
+ // start_pt is the box end of the line to be modified for curved space.
+ TPOINT start_pt;
+ // end_pt is the partition end of the line to be modified for curved space.
+ TPOINT end_pt;
+ if (horizontal_textline) {
+ parallel_gap = from_box.x_gap(to_box) + from_box.width();
+ start_pt.x = (from_box.left() + from_box.right()) / 2;
+ end_pt.x = start_pt.x;
+ if (from_box.top() - to_box.top() >= to_box.bottom() - from_box.bottom()) {
+ start_pt.y = from_box.top();
+ end_pt.y = std::min(to_box.top(), start_pt.y);
+ } else {
+ start_pt.y = from_box.bottom();
+ end_pt.y = std::max(to_box.bottom(), start_pt.y);
+ }
+ } else {
+ parallel_gap = from_box.y_gap(to_box) + from_box.height();
+ if (from_box.right() - to_box.right() >= to_box.left() - from_box.left()) {
+ start_pt.x = from_box.right();
+ end_pt.x = std::min(to_box.right(), start_pt.x);
+ } else {
+ start_pt.x = from_box.left();
+ end_pt.x = std::max(to_box.left(), start_pt.x);
+ }
+ start_pt.y = (from_box.bottom() + from_box.top()) / 2;
+ end_pt.y = start_pt.y;
+ }
+ // The perpendicular gap is the max vertical distance gap out of:
+ // top of from_box to to_box top and bottom of from_box to to_box bottom.
+ // This value is then modified for curved projection space.
+ // Analogous for vertical.
+ int perpendicular_gap = 0;
+ // If start_pt == end_pt, then the from_box lies entirely within the to_box
+ // (in the perpendicular direction), so we don't need to calculate the
+ // perpendicular_gap.
+ if (start_pt.x != end_pt.x || start_pt.y != end_pt.y) {
+ if (denorm != nullptr) {
+ // Denormalize the start and end.
+ denorm->DenormTransform(nullptr, start_pt, &start_pt);
+ denorm->DenormTransform(nullptr, end_pt, &end_pt);
+ }
+ if (abs(start_pt.y - end_pt.y) >= abs(start_pt.x - end_pt.x)) {
+ perpendicular_gap = VerticalDistance(debug, start_pt.x, start_pt.y,
+ end_pt.y);
+ } else {
+ perpendicular_gap = HorizontalDistance(debug, start_pt.x, end_pt.x,
+ start_pt.y);
+ }
+ }
+ // The parallel_gap weighs less than the perpendicular_gap.
+ return perpendicular_gap + parallel_gap / kParaPerpDistRatio;
+}
+
+// Compute the distance between (x, y1) and (x, y2) using the rule that
+// a decrease in textline density is weighted more heavily than an increase.
+// The coordinates are in source image space, ie processed by any denorm
+// already, but not yet scaled by scale_factor_.
+// Going from the outside of a textline to the inside should measure much
+// less distance than going from the inside of a textline to the outside.
+// How it works:
+// An increase is cheap (getting closer to a textline).
+// Constant costs unity.
+// A decrease is expensive (getting further from a textline).
+// Pixels in projection map Counted distance
+// 2
+// 3 1/x
+// 3 1
+// 2 x
+// 5 1/x
+// 7 1/x
+// Total: 1 + x + 3/x where x = kWrongWayPenalty.
+int TextlineProjection::VerticalDistance(bool debug, int x,
+ int y1, int y2) const {
+ x = ImageXToProjectionX(x);
+ y1 = ImageYToProjectionY(y1);
+ y2 = ImageYToProjectionY(y2);
+ if (y1 == y2) return 0;
+ int wpl = pixGetWpl(pix_);
+ int step = y1 < y2 ? 1 : -1;
+ uint32_t* data = pixGetData(pix_) + y1 * wpl;
+ wpl *= step;
+ int prev_pixel = GET_DATA_BYTE(data, x);
+ int distance = 0;
+ int right_way_steps = 0;
+ for (int y = y1; y != y2; y += step) {
+ data += wpl;
+ int pixel = GET_DATA_BYTE(data, x);
+ if (debug)
+ tprintf("At (%d,%d), pix = %d, prev=%d\n",
+ x, y + step, pixel, prev_pixel);
+ if (pixel < prev_pixel)
+ distance += kWrongWayPenalty;
+ else if (pixel > prev_pixel)
+ ++right_way_steps;
+ else
+ ++distance;
+ prev_pixel = pixel;
+ }
+ return distance * scale_factor_ +
+ right_way_steps * scale_factor_ / kWrongWayPenalty;
+}
+
+// Compute the distance between (x1, y) and (x2, y) using the rule that
+// a decrease in textline density is weighted more heavily than an increase.
+int TextlineProjection::HorizontalDistance(bool debug, int x1, int x2,
+ int y) const {
+ x1 = ImageXToProjectionX(x1);
+ x2 = ImageXToProjectionX(x2);
+ y = ImageYToProjectionY(y);
+ if (x1 == x2) return 0;
+ int wpl = pixGetWpl(pix_);
+ int step = x1 < x2 ? 1 : -1;
+ uint32_t* data = pixGetData(pix_) + y * wpl;
+ int prev_pixel = GET_DATA_BYTE(data, x1);
+ int distance = 0;
+ int right_way_steps = 0;
+ for (int x = x1; x != x2; x += step) {
+ int pixel = GET_DATA_BYTE(data, x + step);
+ if (debug)
+ tprintf("At (%d,%d), pix = %d, prev=%d\n",
+ x + step, y, pixel, prev_pixel);
+ if (pixel < prev_pixel)
+ distance += kWrongWayPenalty;
+ else if (pixel > prev_pixel)
+ ++right_way_steps;
+ else
+ ++distance;
+ prev_pixel = pixel;
+ }
+ return distance * scale_factor_ +
+ right_way_steps * scale_factor_ / kWrongWayPenalty;
+}
+
+// Returns true if the blob appears to be outside of a textline.
+// Such blobs are potentially diacritics (even if large in Thai) and should
+// be kept away from initial textline finding.
+bool TextlineProjection::BoxOutOfHTextline(const TBOX& box,
+ const DENORM* denorm,
+ bool debug) const {
+ int grad1 = 0;
+ int grad2 = 0;
+ EvaluateBoxInternal(box, denorm, debug, &grad1, &grad2, nullptr, nullptr);
+ int worst_result = std::min(grad1, grad2);
+ int total_result = grad1 + grad2;
+ if (total_result >= 6) return false; // Strongly in textline.
+ // Medium strength: if either gradient is negative, it is likely outside
+ // the body of the textline.
+ if (worst_result < 0)
+ return true;
+ return false;
+}
+
+// Evaluates the textlineiness of a ColPartition. Uses EvaluateBox below,
+// but uses the median top/bottom for horizontal and median left/right for
+// vertical instead of the bounding box edges.
+// Evaluates for both horizontal and vertical and returns the best result,
+// with a positive value for horizontal and a negative value for vertical.
+int TextlineProjection::EvaluateColPartition(const ColPartition& part,
+ const DENORM* denorm,
+ bool debug) const {
+ if (part.IsSingleton())
+ return EvaluateBox(part.bounding_box(), denorm, debug);
+ // Test vertical orientation.
+ TBOX box = part.bounding_box();
+ // Use the partition median for left/right.
+ box.set_left(part.median_left());
+ box.set_right(part.median_right());
+ int vresult = EvaluateBox(box, denorm, debug);
+
+ // Test horizontal orientation.
+ box = part.bounding_box();
+ // Use the partition median for top/bottom.
+ box.set_top(part.median_top());
+ box.set_bottom(part.median_bottom());
+ int hresult = EvaluateBox(box, denorm, debug);
+ if (debug) {
+ tprintf("Partition hresult=%d, vresult=%d from:", hresult, vresult);
+ part.bounding_box().print();
+ part.Print();
+ }
+ return hresult >= -vresult ? hresult : vresult;
+}
+
+// Computes the mean projection gradients over the horizontal and vertical
+// edges of the box:
+// -h-h-h-h-h-h
+// |------------| mean=htop -v|+v--------+v|-v
+// |+h+h+h+h+h+h| -v|+v +v|-v
+// | | -v|+v +v|-v
+// | box | -v|+v box +v|-v
+// | | -v|+v +v|-v
+// |+h+h+h+h+h+h| -v|+v +v|-v
+// |------------| mean=hbot -v|+v--------+v|-v
+// -h-h-h-h-h-h
+// mean=vleft mean=vright
+//
+// Returns MAX(htop,hbot) - MAX(vleft,vright), which is a positive number
+// for a horizontal textline, a negative number for a vertical textline,
+// and near zero for undecided. Undecided is most likely non-text.
+// All the gradients are truncated to remain non-negative, since negative
+// horizontal gradients don't give any indication of being vertical and
+// vice versa.
+// Additional complexity: The coordinates have to be transformed to original
+// image coordinates with denorm (if not null), scaled to match the projection
+// pix, and THEN step out 2 pixels each way from the edge to compute the
+// gradient, and tries 3 positions, each measuring the gradient over a
+// 4-pixel spread: (+3/-1), (+2/-2), (+1/-3). This complexity is handled by
+// several layers of helpers below.
+int TextlineProjection::EvaluateBox(const TBOX& box, const DENORM* denorm,
+ bool debug) const {
+ return EvaluateBoxInternal(box, denorm, debug, nullptr, nullptr, nullptr, nullptr);
+}
+
+// Internal version of EvaluateBox returns the unclipped gradients as well
+// as the result of EvaluateBox.
+// hgrad1 and hgrad2 are the gradients for the horizontal textline.
+int TextlineProjection::EvaluateBoxInternal(const TBOX& box,
+ const DENORM* denorm, bool debug,
+ int* hgrad1, int* hgrad2,
+ int* vgrad1, int* vgrad2) const {
+ int top_gradient = BestMeanGradientInRow(denorm, box.left(), box.right(),
+ box.top(), true);
+ int bottom_gradient = -BestMeanGradientInRow(denorm, box.left(), box.right(),
+ box.bottom(), false);
+ int left_gradient = BestMeanGradientInColumn(denorm, box.left(), box.bottom(),
+ box.top(), true);
+ int right_gradient = -BestMeanGradientInColumn(denorm, box.right(),
+ box.bottom(), box.top(),
+ false);
+ int top_clipped = std::max(top_gradient, 0);
+ int bottom_clipped = std::max(bottom_gradient, 0);
+ int left_clipped = std::max(left_gradient, 0);
+ int right_clipped = std::max(right_gradient, 0);
+ if (debug) {
+ tprintf("Gradients: top = %d, bottom = %d, left= %d, right= %d for box:",
+ top_gradient, bottom_gradient, left_gradient, right_gradient);
+ box.print();
+ }
+ int result = std::max(top_clipped, bottom_clipped) -
+ std::max(left_clipped, right_clipped);
+ if (hgrad1 != nullptr && hgrad2 != nullptr) {
+ *hgrad1 = top_gradient;
+ *hgrad2 = bottom_gradient;
+ }
+ if (vgrad1 != nullptr && vgrad2 != nullptr) {
+ *vgrad1 = left_gradient;
+ *vgrad2 = right_gradient;
+ }
+ return result;
+}
+
+// Helper returns the mean gradient value for the horizontal row at the given
+// y, (in the external coordinates) by subtracting the mean of the transformed
+// row 2 pixels above from the mean of the transformed row 2 pixels below.
+// This gives a positive value for a good top edge and negative for bottom.
+// Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
+int TextlineProjection::BestMeanGradientInRow(const DENORM* denorm,
+ int16_t min_x, int16_t max_x, int16_t y,
+ bool best_is_max) const {
+ TPOINT start_pt(min_x, y);
+ TPOINT end_pt(max_x, y);
+ int upper = MeanPixelsInLineSegment(denorm, -2, start_pt, end_pt);
+ int lower = MeanPixelsInLineSegment(denorm, 2, start_pt, end_pt);
+ int best_gradient = lower - upper;
+ upper = MeanPixelsInLineSegment(denorm, -1, start_pt, end_pt);
+ lower = MeanPixelsInLineSegment(denorm, 3, start_pt, end_pt);
+ int gradient = lower - upper;
+ if ((gradient > best_gradient) == best_is_max)
+ best_gradient = gradient;
+ upper = MeanPixelsInLineSegment(denorm, -3, start_pt, end_pt);
+ lower = MeanPixelsInLineSegment(denorm, 1, start_pt, end_pt);
+ gradient = lower - upper;
+ if ((gradient > best_gradient) == best_is_max)
+ best_gradient = gradient;
+ return best_gradient;
+}
+
+// Helper returns the mean gradient value for the vertical column at the
+// given x, (in the external coordinates) by subtracting the mean of the
+// transformed column 2 pixels left from the mean of the transformed column
+// 2 pixels to the right.
+// This gives a positive value for a good left edge and negative for right.
+// Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
+int TextlineProjection::BestMeanGradientInColumn(const DENORM* denorm, int16_t x,
+ int16_t min_y, int16_t max_y,
+ bool best_is_max) const {
+ TPOINT start_pt(x, min_y);
+ TPOINT end_pt(x, max_y);
+ int left = MeanPixelsInLineSegment(denorm, -2, start_pt, end_pt);
+ int right = MeanPixelsInLineSegment(denorm, 2, start_pt, end_pt);
+ int best_gradient = right - left;
+ left = MeanPixelsInLineSegment(denorm, -1, start_pt, end_pt);
+ right = MeanPixelsInLineSegment(denorm, 3, start_pt, end_pt);
+ int gradient = right - left;
+ if ((gradient > best_gradient) == best_is_max)
+ best_gradient = gradient;
+ left = MeanPixelsInLineSegment(denorm, -3, start_pt, end_pt);
+ right = MeanPixelsInLineSegment(denorm, 1, start_pt, end_pt);
+ gradient = right - left;
+ if ((gradient > best_gradient) == best_is_max)
+ best_gradient = gradient;
+ return best_gradient;
+}
+
+// Helper returns the mean pixel value over the line between the start_pt and
+// end_pt (inclusive), but shifted perpendicular to the line in the projection
+// image by offset pixels. For simplicity, it is assumed that the vector is
+// either nearly horizontal or nearly vertical. It works on skewed textlines!
+// The end points are in external coordinates, and will be denormalized with
+// the denorm if not nullptr before further conversion to pix coordinates.
+// After all the conversions, the offset is added to the direction
+// perpendicular to the line direction. The offset is thus in projection image
+// coordinates, which allows the caller to get a guaranteed displacement
+// between pixels used to calculate gradients.
+int TextlineProjection::MeanPixelsInLineSegment(const DENORM* denorm,
+ int offset,
+ TPOINT start_pt,
+ TPOINT end_pt) const {
+ TransformToPixCoords(denorm, &start_pt);
+ TransformToPixCoords(denorm, &end_pt);
+ TruncateToImageBounds(&start_pt);
+ TruncateToImageBounds(&end_pt);
+ int wpl = pixGetWpl(pix_);
+ uint32_t* data = pixGetData(pix_);
+ int total = 0;
+ int count = 0;
+ int x_delta = end_pt.x - start_pt.x;
+ int y_delta = end_pt.y - start_pt.y;
+ if (abs(x_delta) >= abs(y_delta)) {
+ if (x_delta == 0)
+ return 0;
+ // Horizontal line. Add the offset vertically.
+ int x_step = x_delta > 0 ? 1 : -1;
+ // Correct offset for rotation, keeping it anti-clockwise of the delta.
+ offset *= x_step;
+ start_pt.y += offset;
+ end_pt.y += offset;
+ TruncateToImageBounds(&start_pt);
+ TruncateToImageBounds(&end_pt);
+ x_delta = end_pt.x - start_pt.x;
+ y_delta = end_pt.y - start_pt.y;
+ count = x_delta * x_step + 1;
+ for (int x = start_pt.x; x != end_pt.x; x += x_step) {
+ int y = start_pt.y + DivRounded(y_delta * (x - start_pt.x), x_delta);
+ total += GET_DATA_BYTE(data + wpl * y, x);
+ }
+ } else {
+ // Vertical line. Add the offset horizontally.
+ int y_step = y_delta > 0 ? 1 : -1;
+ // Correct offset for rotation, keeping it anti-clockwise of the delta.
+ // Pix holds the image with y=0 at the top, so the offset is negated.
+ offset *= -y_step;
+ start_pt.x += offset;
+ end_pt.x += offset;
+ TruncateToImageBounds(&start_pt);
+ TruncateToImageBounds(&end_pt);
+ x_delta = end_pt.x - start_pt.x;
+ y_delta = end_pt.y - start_pt.y;
+ count = y_delta * y_step + 1;
+ for (int y = start_pt.y; y != end_pt.y; y += y_step) {
+ int x = start_pt.x + DivRounded(x_delta * (y - start_pt.y), y_delta);
+ total += GET_DATA_BYTE(data + wpl * y, x);
+ }
+ }
+ return DivRounded(total, count);
+}
+
+// Given an input pix, and a box, the sides of the box are shrunk inwards until
+// they bound any black pixels found within the original box.
+// The function converts between tesseract coords and the pix coords assuming
+// that this pix is full resolution equal in size to the original image.
+// Returns an empty box if there are no black pixels in the source box.
+static TBOX BoundsWithinBox(Pix* pix, const TBOX& box) {
+ int im_height = pixGetHeight(pix);
+ Box* input_box = boxCreate(box.left(), im_height - box.top(),
+ box.width(), box.height());
+ Box* output_box = nullptr;
+ pixClipBoxToForeground(pix, input_box, nullptr, &output_box);
+ TBOX result_box;
+ if (output_box != nullptr) {
+ l_int32 x, y, width, height;
+ boxGetGeometry(output_box, &x, &y, &width, &height);
+ result_box.set_left(x);
+ result_box.set_right(x + width);
+ result_box.set_top(im_height - y);
+ result_box.set_bottom(result_box.top() - height);
+ boxDestroy(&output_box);
+ }
+ boxDestroy(&input_box);
+ return result_box;
+}
+
+// Splits the given box in half at x_middle or y_middle according to split_on_x
+// and checks for nontext_map pixels in each half. Reduces the bbox so that it
+// still includes the middle point, but does not touch any fg pixels in
+// nontext_map. An empty box may be returned if there is no such box.
+static void TruncateBoxToMissNonText(int x_middle, int y_middle,
+ bool split_on_x, Pix* nontext_map,
+ TBOX* bbox) {
+ TBOX box1(*bbox);
+ TBOX box2(*bbox);
+ TBOX im_box;
+ if (split_on_x) {
+ box1.set_right(x_middle);
+ im_box = BoundsWithinBox(nontext_map, box1);
+ if (!im_box.null_box()) box1.set_left(im_box.right());
+ box2.set_left(x_middle);
+ im_box = BoundsWithinBox(nontext_map, box2);
+ if (!im_box.null_box()) box2.set_right(im_box.left());
+ } else {
+ box1.set_bottom(y_middle);
+ im_box = BoundsWithinBox(nontext_map, box1);
+ if (!im_box.null_box()) box1.set_top(im_box.bottom());
+ box2.set_top(y_middle);
+ im_box = BoundsWithinBox(nontext_map, box2);
+ if (!im_box.null_box()) box2.set_bottom(im_box.top());
+ }
+ box1 += box2;
+ *bbox = box1;
+}
+
+
+// Helper function to add 1 to a rectangle in source image coords to the
+// internal projection pix_.
+void TextlineProjection::IncrementRectangle8Bit(const TBOX& box) {
+ int scaled_left = ImageXToProjectionX(box.left());
+ int scaled_top = ImageYToProjectionY(box.top());
+ int scaled_right = ImageXToProjectionX(box.right());
+ int scaled_bottom = ImageYToProjectionY(box.bottom());
+ int wpl = pixGetWpl(pix_);
+ uint32_t* data = pixGetData(pix_) + scaled_top * wpl;
+ for (int y = scaled_top; y <= scaled_bottom; ++y) {
+ for (int x = scaled_left; x <= scaled_right; ++x) {
+ int pixel = GET_DATA_BYTE(data, x);
+ if (pixel < 255)
+ SET_DATA_BYTE(data, x, pixel + 1);
+ }
+ data += wpl;
+ }
+}
+
+// Inserts a list of blobs into the projection.
+// Rotation is a multiple of 90 degrees to get from blob coords to
+// nontext_map coords, nontext_map_box is the bounds of the nontext_map.
+// Blobs are spread horizontally or vertically according to their internal
+// flags, but the spreading is truncated by set pixels in the nontext_map
+// and also by the horizontal rule line limits on the blobs.
+void TextlineProjection::ProjectBlobs(BLOBNBOX_LIST* blobs,
+ const FCOORD& rotation,
+ const TBOX& nontext_map_box,
+ Pix* nontext_map) {
+ BLOBNBOX_IT blob_it(blobs);
+ for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+ BLOBNBOX* blob = blob_it.data();
+ TBOX bbox = blob->bounding_box();
+ ICOORD middle((bbox.left() + bbox.right()) / 2,
+ (bbox.bottom() + bbox.top()) / 2);
+ bool spreading_horizontally = PadBlobBox(blob, &bbox);
+ // Rotate to match the nontext_map.
+ bbox.rotate(rotation);
+ middle.rotate(rotation);
+ if (rotation.x() == 0.0f)
+ spreading_horizontally = !spreading_horizontally;
+ // Clip to the image before applying the increments.
+ bbox &= nontext_map_box; // This is in-place box intersection.
+ // Check for image pixels before spreading.
+ TruncateBoxToMissNonText(middle.x(), middle.y(), spreading_horizontally,
+ nontext_map, &bbox);
+ if (bbox.area() > 0) {
+ IncrementRectangle8Bit(bbox);
+ }
+ }
+}
+
+// Pads the bounding box of the given blob according to whether it is on
+// a horizontal or vertical text line, taking into account tab-stops near
+// the blob. Returns true if padding was in the horizontal direction.
+bool TextlineProjection::PadBlobBox(BLOBNBOX* blob, TBOX* bbox) {
+ // Determine which direction to spread.
+ // If text is well spaced out, it can be useful to pad perpendicular to
+ // the textline direction, so as to ensure diacritics get absorbed
+ // correctly, but if the text is tightly spaced, this will destroy the
+ // blank space between textlines in the projection map, and that would
+ // be very bad.
+ int pad_limit = scale_factor_ * kMinLineSpacingFactor;
+ int xpad = 0;
+ int ypad = 0;
+ bool padding_horizontally = false;
+ if (blob->UniquelyHorizontal()) {
+ xpad = bbox->height() * kOrientedPadFactor;
+ padding_horizontally = true;
+ // If the text appears to be very well spaced, pad the other direction by a
+ // single pixel in the projection profile space to help join diacritics to
+ // the textline.
+ if ((blob->neighbour(BND_ABOVE) == nullptr ||
+ bbox->y_gap(blob->neighbour(BND_ABOVE)->bounding_box()) > pad_limit) &&
+ (blob->neighbour(BND_BELOW) == nullptr ||
+ bbox->y_gap(blob->neighbour(BND_BELOW)->bounding_box()) > pad_limit)) {
+ ypad = scale_factor_;
+ }
+ } else if (blob->UniquelyVertical()) {
+ ypad = bbox->width() * kOrientedPadFactor;
+ if ((blob->neighbour(BND_LEFT) == nullptr ||
+ bbox->x_gap(blob->neighbour(BND_LEFT)->bounding_box()) > pad_limit) &&
+ (blob->neighbour(BND_RIGHT) == nullptr ||
+ bbox->x_gap(blob->neighbour(BND_RIGHT)->bounding_box()) > pad_limit)) {
+ xpad = scale_factor_;
+ }
+ } else {
+ if ((blob->neighbour(BND_ABOVE) != nullptr &&
+ blob->neighbour(BND_ABOVE)->neighbour(BND_BELOW) == blob) ||
+ (blob->neighbour(BND_BELOW) != nullptr &&
+ blob->neighbour(BND_BELOW)->neighbour(BND_ABOVE) == blob)) {
+ ypad = bbox->width() * kDefaultPadFactor;
+ }
+ if ((blob->neighbour(BND_RIGHT) != nullptr &&
+ blob->neighbour(BND_RIGHT)->neighbour(BND_LEFT) == blob) ||
+ (blob->neighbour(BND_LEFT) != nullptr &&
+ blob->neighbour(BND_LEFT)->neighbour(BND_RIGHT) == blob)) {
+ xpad = bbox->height() * kDefaultPadFactor;
+ padding_horizontally = true;
+ }
+ }
+ bbox->pad(xpad, ypad);
+ pad_limit = scale_factor_ * kMaxTabStopOverrun;
+ // Now shrink horizontally to avoid stepping more than pad_limit over a
+ // tab-stop.
+ if (bbox->left() < blob->left_rule() - pad_limit) {
+ bbox->set_left(blob->left_rule() - pad_limit);
+ }
+ if (bbox->right() > blob->right_rule() + pad_limit) {
+ bbox->set_right(blob->right_rule() + pad_limit);
+ }
+ return padding_horizontally;
+}
+
+// Helper denormalizes the TPOINT with the denorm if not nullptr, then
+// converts to pix_ coordinates.
+void TextlineProjection::TransformToPixCoords(const DENORM* denorm,
+ TPOINT* pt) const {
+ if (denorm != nullptr) {
+ // Denormalize the point.
+ denorm->DenormTransform(nullptr, *pt, pt);
+ }
+ pt->x = ImageXToProjectionX(pt->x);
+ pt->y = ImageYToProjectionY(pt->y);
+}
+
+#if defined(_MSC_VER) && !defined(__clang__)
+#pragma optimize("g", off)
+#endif // _MSC_VER
+// Helper truncates the TPOINT to be within the pix_.
+void TextlineProjection::TruncateToImageBounds(TPOINT* pt) const {
+ pt->x = ClipToRange<int>(pt->x, 0, pixGetWidth(pix_) - 1);
+ pt->y = ClipToRange<int>(pt->y, 0, pixGetHeight(pix_) - 1);
+}
+#if defined(_MSC_VER) && !defined(__clang__)
+#pragma optimize("", on)
+#endif // _MSC_VER
+
+// Transform tesseract image coordinates to coordinates used in the projection.
+int TextlineProjection::ImageXToProjectionX(int x) const {
+ x = ClipToRange((x - x_origin_) / scale_factor_, 0, pixGetWidth(pix_) - 1);
+ return x;
+}
+int TextlineProjection::ImageYToProjectionY(int y) const {
+ y = ClipToRange((y_origin_ - y) / scale_factor_, 0, pixGetHeight(pix_) - 1);
+ return y;
+}
+
+} // namespace tesseract.
diff --git a/tesseract/src/textord/textlineprojection.h b/tesseract/src/textord/textlineprojection.h
new file mode 100644
index 00000000..c35ae350
--- /dev/null
+++ b/tesseract/src/textord/textlineprojection.h
@@ -0,0 +1,206 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+// Author: rays@google.com (Ray Smith)
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TESSERACT_TEXTORD_TEXTLINEPROJECTION_H_
+#define TESSERACT_TEXTORD_TEXTLINEPROJECTION_H_
+
+#include "blobgrid.h" // For BlobGrid
+
+struct Pix;
+
+namespace tesseract {
+
+class DENORM;
+struct TPOINT;
+class ColPartition;
+
+// Simple class to encapsulate the computation of an image representing
+// local textline density, and function(s) to make use of it.
+// The underlying principle is that if you smear connected components
+// horizontally (vertically for components on a vertically written textline)
+// and count the number of smeared components in an image, then the resulting
+// image shows the density of the textlines at each image position.
+class TESS_API TextlineProjection {
+ public:
+ // The down-scaling factor is computed to obtain a projection resolution
+ // of about 100 dpi, whatever the input.
+ explicit TextlineProjection(int resolution);
+ ~TextlineProjection();
+
+ // Build the projection profile given the input_block containing lists of
+ // blobs, a rotation to convert to image coords,
+ // and a full-resolution nontext_map, marking out areas to avoid.
+ // During construction, we have the following assumptions:
+ // The rotation is a multiple of 90 degrees, ie no deskew yet.
+ // The blobs have had their left and right rules set to also limit
+ // the range of projection.
+ void ConstructProjection(TO_BLOCK* input_block,
+ const FCOORD& rotation, Pix* nontext_map);
+
+ // Display the blobs in the window colored according to textline quality.
+ void PlotGradedBlobs(BLOBNBOX_LIST* blobs, ScrollView* win);
+
+ // Moves blobs that look like they don't sit well on a textline from the
+ // input blobs list to the output small_blobs list.
+ // This gets them away from initial textline finding to stop diacritics
+ // from forming incorrect textlines. (Introduced mainly to fix Thai.)
+ void MoveNonTextlineBlobs(BLOBNBOX_LIST* blobs,
+ BLOBNBOX_LIST* small_blobs) const;
+
+ // Create a window and display the projection in it.
+ void DisplayProjection() const;
+
+ // Compute the distance of the box from the partition using curved projection
+ // space. As DistanceOfBoxFromBox, except that the direction is taken from
+ // the ColPartition and the median bounds of the ColPartition are used as
+ // the to_box.
+ int DistanceOfBoxFromPartition(const TBOX& box, const ColPartition& part,
+ const DENORM* denorm, bool debug) const;
+
+ // Compute the distance from the from_box to the to_box using curved
+ // projection space. Separation that involves a decrease in projection
+ // density (moving from the from_box to the to_box) is weighted more heavily
+ // than constant density, and an increase is weighted less.
+ // If horizontal_textline is true, then curved space is used vertically,
+ // as for a diacritic on the edge of a textline.
+ // The projection uses original image coords, so denorm is used to get
+ // back to the image coords from box/part space.
+ int DistanceOfBoxFromBox(const TBOX& from_box, const TBOX& to_box,
+ bool horizontal_textline,
+ const DENORM* denorm, bool debug) const;
+
+ // Compute the distance between (x, y1) and (x, y2) using the rule that
+ // a decrease in textline density is weighted more heavily than an increase.
+ // The coordinates are in source image space, ie processed by any denorm
+ // already, but not yet scaled by scale_factor_.
+ // Going from the outside of a textline to the inside should measure much
+ // less distance than going from the inside of a textline to the outside.
+ int VerticalDistance(bool debug, int x, int y1, int y2) const;
+
+ // Compute the distance between (x1, y) and (x2, y) using the rule that
+ // a decrease in textline density is weighted more heavily than an increase.
+ int HorizontalDistance(bool debug, int x1, int x2, int y) const;
+
+ // Returns true if the blob appears to be outside of a horizontal textline.
+ // Such blobs are potentially diacritics (even if large in Thai) and should
+ // be kept away from initial textline finding.
+ bool BoxOutOfHTextline(const TBOX& box, const DENORM* denorm,
+ bool debug) const;
+
+ // Evaluates the textlineiness of a ColPartition. Uses EvaluateBox below,
+ // but uses the median top/bottom for horizontal and median left/right for
+ // vertical instead of the bounding box edges.
+ // Evaluates for both horizontal and vertical and returns the best result,
+ // with a positive value for horizontal and a negative value for vertical.
+ int EvaluateColPartition(const ColPartition& part, const DENORM* denorm,
+ bool debug) const;
+
+ // Computes the mean projection gradients over the horizontal and vertical
+ // edges of the box:
+ // -h-h-h-h-h-h
+ // |------------| mean=htop -v|+v--------+v|-v
+ // |+h+h+h+h+h+h| -v|+v +v|-v
+ // | | -v|+v +v|-v
+ // | box | -v|+v box +v|-v
+ // | | -v|+v +v|-v
+ // |+h+h+h+h+h+h| -v|+v +v|-v
+ // |------------| mean=hbot -v|+v--------+v|-v
+ // -h-h-h-h-h-h
+ // mean=vleft mean=vright
+ //
+ // Returns MAX(htop,hbot) - MAX(vleft,vright), which is a positive number
+ // for a horizontal textline, a negative number for a vertical textline,
+ // and near zero for undecided. Undecided is most likely non-text.
+ int EvaluateBox(const TBOX& box, const DENORM* denorm, bool debug) const;
+
+ private:
+ // Internal version of EvaluateBox returns the unclipped gradients as well
+ // as the result of EvaluateBox.
+ // hgrad1 and hgrad2 are the gradients for the horizontal textline.
+ int EvaluateBoxInternal(const TBOX& box, const DENORM* denorm, bool debug,
+ int* hgrad1, int* hgrad2,
+ int* vgrad1, int* vgrad2) const;
+
+ // Helper returns the mean gradient value for the horizontal row at the given
+ // y, (in the external coordinates) by subtracting the mean of the transformed
+ // row 2 pixels above from the mean of the transformed row 2 pixels below.
+ // This gives a positive value for a good top edge and negative for bottom.
+ // Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
+ int BestMeanGradientInRow(const DENORM* denorm, int16_t min_x, int16_t max_x,
+ int16_t y, bool best_is_max) const;
+
+ // Helper returns the mean gradient value for the vertical column at the
+ // given x, (in the external coordinates) by subtracting the mean of the
+ // transformed column 2 pixels left from the mean of the transformed column
+ // 2 pixels to the right.
+ // This gives a positive value for a good left edge and negative for right.
+ // Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
+ int BestMeanGradientInColumn(const DENORM* denorm, int16_t x, int16_t min_y,
+ int16_t max_y, bool best_is_max) const;
+
+ // Helper returns the mean pixel value over the line between the start_pt and
+ // end_pt (inclusive), but shifted perpendicular to the line in the projection
+ // image by offset pixels. For simplicity, it is assumed that the vector is
+ // either nearly horizontal or nearly vertical. It works on skewed textlines!
+ // The end points are in external coordinates, and will be denormalized with
+ // the denorm if not nullptr before further conversion to pix coordinates.
+ // After all the conversions, the offset is added to the direction
+ // perpendicular to the line direction. The offset is thus in projection image
+ // coordinates, which allows the caller to get a guaranteed displacement
+ // between pixels used to calculate gradients.
+ int MeanPixelsInLineSegment(const DENORM* denorm, int offset,
+ TPOINT start_pt, TPOINT end_pt) const;
+
+ // Helper function to add 1 to a rectangle in source image coords to the
+ // internal projection pix_.
+ void IncrementRectangle8Bit(const TBOX& box);
+ // Inserts a list of blobs into the projection.
+ // Rotation is a multiple of 90 degrees to get from blob coords to
+ // nontext_map coords, image_box is the bounds of the nontext_map.
+ // Blobs are spread horizontally or vertically according to their internal
+ // flags, but the spreading is truncated by set pixels in the nontext_map
+ // and also by the horizontal rule line limits on the blobs.
+ void ProjectBlobs(BLOBNBOX_LIST* blobs, const FCOORD& rotation,
+ const TBOX& image_box, Pix* nontext_map);
+ // Pads the bounding box of the given blob according to whether it is on
+ // a horizontal or vertical text line, taking into account tab-stops near
+ // the blob. Returns true if padding was in the horizontal direction.
+ bool PadBlobBox(BLOBNBOX* blob, TBOX* bbox);
+
+ // Helper denormalizes the TPOINT with the denorm if not nullptr, then
+ // converts to pix_ coordinates.
+ void TransformToPixCoords(const DENORM* denorm, TPOINT* pt) const;
+
+ // Helper truncates the TPOINT to be within the pix_.
+ void TruncateToImageBounds(TPOINT* pt) const;
+
+ // Transform tesseract coordinates to coordinates used in the pix.
+ int ImageXToProjectionX(int x) const;
+ int ImageYToProjectionY(int y) const;
+
+ // The down-sampling scale factor used in building the image.
+ int scale_factor_;
+ // The blob coordinates of the top-left (origin of the pix_) in tesseract
+ // coordinates. Used to transform the bottom-up tesseract coordinates to
+ // the top-down coordinates of the pix.
+ int x_origin_;
+ int y_origin_;
+ // The image of horizontally smeared blob boxes summed to provide a
+ // textline density map. As with a horizontal projection, the map has
+ // dips in the gaps between textlines.
+ Pix* pix_;
+};
+
+} // namespace tesseract.
+
+#endif // TESSERACT_TEXTORD_TEXTLINEPROJECTION_H_
diff --git a/tesseract/src/textord/textord.cpp b/tesseract/src/textord/textord.cpp
new file mode 100644
index 00000000..756ca78a
--- /dev/null
+++ b/tesseract/src/textord/textord.cpp
@@ -0,0 +1,349 @@
+///////////////////////////////////////////////////////////////////////
+// File: textord.cpp
+// Description: The top-level text line and word finding functionality.
+// Author: Ray Smith
+// Created: Fri Mar 13 14:43:01 PDT 2009
+//
+// (C) Copyright 2009, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+// Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "baselinedetect.h"
+#include "drawtord.h"
+#include "textord.h"
+#include "makerow.h"
+#include "pageres.h"
+#include "tordmain.h"
+#include "wordseg.h"
+
+namespace tesseract {
+
+Textord::Textord(CCStruct* ccstruct)
+ : ccstruct_(ccstruct),
+ use_cjk_fp_model_(false),
+ // makerow.cpp ///////////////////////////////////////////
+ BOOL_MEMBER(textord_single_height_mode, false,
+ "Script has no xheight, so use a single mode",
+ ccstruct_->params()),
+ // tospace.cpp ///////////////////////////////////////////
+ BOOL_MEMBER(tosp_old_to_method, false, "Space stats use prechopping?",
+ ccstruct_->params()),
+ BOOL_MEMBER(tosp_old_to_constrain_sp_kn, false,
+ "Constrain relative values of inter and intra-word gaps for "
+ "old_to_method.",
+ ccstruct_->params()),
+ BOOL_MEMBER(tosp_only_use_prop_rows, true,
+ "Block stats to use fixed pitch rows?", ccstruct_->params()),
+ BOOL_MEMBER(tosp_force_wordbreak_on_punct, false,
+ "Force word breaks on punct to break long lines in non-space "
+ "delimited langs",
+ ccstruct_->params()),
+ BOOL_MEMBER(tosp_use_pre_chopping, false, "Space stats use prechopping?",
+ ccstruct_->params()),
+ BOOL_MEMBER(tosp_old_to_bug_fix, false, "Fix suspected bug in old code",
+ ccstruct_->params()),
+ BOOL_MEMBER(tosp_block_use_cert_spaces, true, "Only stat OBVIOUS spaces",
+ ccstruct_->params()),
+ BOOL_MEMBER(tosp_row_use_cert_spaces, true, "Only stat OBVIOUS spaces",
+ ccstruct_->params()),
+ BOOL_MEMBER(tosp_narrow_blobs_not_cert, true, "Only stat OBVIOUS spaces",
+ ccstruct_->params()),
+ BOOL_MEMBER(tosp_row_use_cert_spaces1, true, "Only stat OBVIOUS spaces",
+ ccstruct_->params()),
+ BOOL_MEMBER(tosp_recovery_isolated_row_stats, true,
+ "Use row alone when inadequate cert spaces",
+ ccstruct_->params()),
+ BOOL_MEMBER(tosp_only_small_gaps_for_kern, false, "Better guess",
+ ccstruct_->params()),
+ BOOL_MEMBER(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?",
+ ccstruct_->params()),
+ BOOL_MEMBER(tosp_fuzzy_limit_all, true,
+ "Don't restrict kn->sp fuzzy limit to tables",
+ ccstruct_->params()),
+ BOOL_MEMBER(tosp_stats_use_xht_gaps, true,
+ "Use within xht gap for wd breaks", ccstruct_->params()),
+ BOOL_MEMBER(tosp_use_xht_gaps, true, "Use within xht gap for wd breaks",
+ ccstruct_->params()),
+ BOOL_MEMBER(tosp_only_use_xht_gaps, false,
+ "Only use within xht gap for wd breaks", ccstruct_->params()),
+ BOOL_MEMBER(tosp_rule_9_test_punct, false,
+ "Don't chng kn to space next to punct", ccstruct_->params()),
+ BOOL_MEMBER(tosp_flip_fuzz_kn_to_sp, true, "Default flip",
+ ccstruct_->params()),
+ BOOL_MEMBER(tosp_flip_fuzz_sp_to_kn, true, "Default flip",
+ ccstruct_->params()),
+ BOOL_MEMBER(tosp_improve_thresh, false, "Enable improvement heuristic",
+ ccstruct_->params()),
+ INT_MEMBER(tosp_debug_level, 0, "Debug data", ccstruct_->params()),
+ INT_MEMBER(tosp_enough_space_samples_for_median, 3,
+ "or should we use mean", ccstruct_->params()),
+ INT_MEMBER(tosp_redo_kern_limit, 10,
+ "No.samples reqd to reestimate for row", ccstruct_->params()),
+ INT_MEMBER(tosp_few_samples, 40,
+ "No.gaps reqd with 1 large gap to treat as a table",
+ ccstruct_->params()),
+ INT_MEMBER(tosp_short_row, 20,
+ "No.gaps reqd with few cert spaces to use certs",
+ ccstruct_->params()),
+ INT_MEMBER(tosp_sanity_method, 1, "How to avoid being silly",
+ ccstruct_->params()),
+ double_MEMBER(tosp_old_sp_kn_th_factor, 2.0,
+ "Factor for defining space threshold in terms of space and "
+ "kern sizes",
+ ccstruct_->params()),
+ double_MEMBER(tosp_threshold_bias1, 0, "how far between kern and space?",
+ ccstruct_->params()),
+ double_MEMBER(tosp_threshold_bias2, 0, "how far between kern and space?",
+ ccstruct_->params()),
+ double_MEMBER(tosp_narrow_fraction, 0.3, "Fract of xheight for narrow",
+ ccstruct_->params()),
+ double_MEMBER(tosp_narrow_aspect_ratio, 0.48,
+ "narrow if w/h less than this", ccstruct_->params()),
+ double_MEMBER(tosp_wide_fraction, 0.52, "Fract of xheight for wide",
+ ccstruct_->params()),
+ double_MEMBER(tosp_wide_aspect_ratio, 0.0, "wide if w/h less than this",
+ ccstruct_->params()),
+ double_MEMBER(tosp_fuzzy_space_factor, 0.6,
+ "Fract of xheight for fuzz sp", ccstruct_->params()),
+ double_MEMBER(tosp_fuzzy_space_factor1, 0.5,
+ "Fract of xheight for fuzz sp", ccstruct_->params()),
+ double_MEMBER(tosp_fuzzy_space_factor2, 0.72,
+ "Fract of xheight for fuzz sp", ccstruct_->params()),
+ double_MEMBER(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern",
+ ccstruct_->params()),
+ double_MEMBER(tosp_kern_gap_factor1, 2.0, "gap ratio to flip kern->sp",
+ ccstruct_->params()),
+ double_MEMBER(tosp_kern_gap_factor2, 1.3, "gap ratio to flip kern->sp",
+ ccstruct_->params()),
+ double_MEMBER(tosp_kern_gap_factor3, 2.5, "gap ratio to flip kern->sp",
+ ccstruct_->params()),
+ double_MEMBER(tosp_ignore_big_gaps, -1, "xht multiplier",
+ ccstruct_->params()),
+ double_MEMBER(tosp_ignore_very_big_gaps, 3.5, "xht multiplier",
+ ccstruct_->params()),
+ double_MEMBER(tosp_rep_space, 1.6, "rep gap multiplier for space",
+ ccstruct_->params()),
+ double_MEMBER(tosp_enough_small_gaps, 0.65,
+ "Fract of kerns reqd for isolated row stats",
+ ccstruct_->params()),
+ double_MEMBER(tosp_table_kn_sp_ratio, 2.25,
+ "Min difference of kn & sp in table", ccstruct_->params()),
+ double_MEMBER(tosp_table_xht_sp_ratio, 0.33,
+ "Expect spaces bigger than this", ccstruct_->params()),
+ double_MEMBER(tosp_table_fuzzy_kn_sp_ratio, 3.0,
+ "Fuzzy if less than this", ccstruct_->params()),
+ double_MEMBER(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg",
+ ccstruct_->params()),
+ double_MEMBER(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg",
+ ccstruct_->params()),
+ double_MEMBER(tosp_min_sane_kn_sp, 1.5,
+ "Don't trust spaces less than this time kn",
+ ccstruct_->params()),
+ double_MEMBER(tosp_init_guess_kn_mult, 2.2,
+ "Thresh guess - mult kn by this", ccstruct_->params()),
+ double_MEMBER(tosp_init_guess_xht_mult, 0.28,
+ "Thresh guess - mult xht by this", ccstruct_->params()),
+ double_MEMBER(tosp_max_sane_kn_thresh, 5.0,
+ "Multiplier on kn to limit thresh", ccstruct_->params()),
+ double_MEMBER(tosp_flip_caution, 0.0,
+ "Don't autoflip kn to sp when large separation",
+ ccstruct_->params()),
+ double_MEMBER(tosp_large_kerning, 0.19,
+ "Limit use of xht gap with large kns", ccstruct_->params()),
+ double_MEMBER(tosp_dont_fool_with_small_kerns, -1,
+ "Limit use of xht gap with odd small kns",
+ ccstruct_->params()),
+ double_MEMBER(tosp_near_lh_edge, 0,
+ "Don't reduce box if the top left is non blank",
+ ccstruct_->params()),
+ double_MEMBER(tosp_silly_kn_sp_gap, 0.2,
+ "Don't let sp minus kn get too small", ccstruct_->params()),
+ double_MEMBER(tosp_pass_wide_fuzz_sp_to_context, 0.75,
+ "How wide fuzzies need context", ccstruct_->params()),
+ // tordmain.cpp ///////////////////////////////////////////
+ BOOL_MEMBER(textord_no_rejects, false, "Don't remove noise blobs",
+ ccstruct_->params()),
+ BOOL_MEMBER(textord_show_blobs, false, "Display unsorted blobs",
+ ccstruct_->params()),
+ BOOL_MEMBER(textord_show_boxes, false, "Display unsorted blobs",
+ ccstruct_->params()),
+ INT_MEMBER(textord_max_noise_size, 7, "Pixel size of noise",
+ ccstruct_->params()),
+ INT_MEMBER(textord_baseline_debug, 0, "Baseline debug level",
+ ccstruct_->params()),
+ double_MEMBER(textord_noise_area_ratio, 0.7,
+ "Fraction of bounding box for noise", ccstruct_->params()),
+ double_MEMBER(textord_initialx_ile, 0.75,
+ "Ile of sizes for xheight guess", ccstruct_->params()),
+ double_MEMBER(textord_initialasc_ile, 0.90,
+ "Ile of sizes for xheight guess", ccstruct_->params()),
+ INT_MEMBER(textord_noise_sizefraction, 10, "Fraction of size for maxima",
+ ccstruct_->params()),
+ double_MEMBER(textord_noise_sizelimit, 0.5,
+ "Fraction of x for big t count", ccstruct_->params()),
+ INT_MEMBER(textord_noise_translimit, 16, "Transitions for normal blob",
+ ccstruct_->params()),
+ double_MEMBER(textord_noise_normratio, 2.0,
+ "Dot to norm ratio for deletion", ccstruct_->params()),
+ BOOL_MEMBER(textord_noise_rejwords, true, "Reject noise-like words",
+ ccstruct_->params()),
+ BOOL_MEMBER(textord_noise_rejrows, true, "Reject noise-like rows",
+ ccstruct_->params()),
+ double_MEMBER(textord_noise_syfract, 0.2,
+ "xh fract height error for norm blobs",
+ ccstruct_->params()),
+ double_MEMBER(textord_noise_sxfract, 0.4,
+ "xh fract width error for norm blobs", ccstruct_->params()),
+ double_MEMBER(textord_noise_hfract, 1.0 / 64,
+ "Height fraction to discard outlines as speckle noise",
+ ccstruct_->params()),
+ INT_MEMBER(textord_noise_sncount, 1, "super norm blobs to save row",
+ ccstruct_->params()),
+ double_MEMBER(textord_noise_rowratio, 6.0,
+ "Dot to norm ratio for deletion", ccstruct_->params()),
+ BOOL_MEMBER(textord_noise_debug, false, "Debug row garbage detector",
+ ccstruct_->params()),
+ double_MEMBER(textord_blshift_maxshift, 0.00, "Max baseline shift",
+ ccstruct_->params()),
+ double_MEMBER(textord_blshift_xfraction, 9.99,
+ "Min size of baseline shift", ccstruct_->params()) {}
+
+// Make the textlines and words inside each block.
+void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD& reskew,
+ int width, int height, Pix* binary_pix,
+ Pix* thresholds_pix, Pix* grey_pix,
+ bool use_box_bottoms, BLOBNBOX_LIST* diacritic_blobs,
+ BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks) {
+ page_tr_.set_x(width);
+ page_tr_.set_y(height);
+ if (to_blocks->empty()) {
+ // AutoPageSeg was not used, so we need to find_components first.
+ find_components(binary_pix, blocks, to_blocks);
+ TO_BLOCK_IT it(to_blocks);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ TO_BLOCK* to_block = it.data();
+ // Compute the edge offsets whether or not there is a grey_pix.
+ // We have by-passed auto page seg, so we have to run it here.
+ // By page segmentation mode there is no non-text to avoid running on.
+ to_block->ComputeEdgeOffsets(thresholds_pix, grey_pix);
+ }
+ } else if (!PSM_SPARSE(pageseg_mode)) {
+ // AutoPageSeg does not need to find_components as it did that already.
+ // Filter_blobs sets up the TO_BLOCKs the same as find_components does.
+ filter_blobs(page_tr_, to_blocks, true);
+ }
+
+ ASSERT_HOST(!to_blocks->empty());
+ if (pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT) {
+ const FCOORD anticlockwise90(0.0f, 1.0f);
+ const FCOORD clockwise90(0.0f, -1.0f);
+ TO_BLOCK_IT it(to_blocks);
+ for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
+ TO_BLOCK* to_block = it.data();
+ BLOCK* block = to_block->block;
+ // Create a fake poly_block in block from its bounding box.
+ block->pdblk.set_poly_block(new POLY_BLOCK(block->pdblk.bounding_box(),
+ PT_VERTICAL_TEXT));
+ // Rotate the to_block along with its contained block and blobnbox lists.
+ to_block->rotate(anticlockwise90);
+ // Set the block's rotation values to obey the convention followed in
+ // layout analysis for vertical text.
+ block->set_re_rotation(clockwise90);
+ block->set_classify_rotation(clockwise90);
+ }
+ }
+
+ TO_BLOCK_IT to_block_it(to_blocks);
+ TO_BLOCK* to_block = to_block_it.data();
+ // Make the rows in the block.
+ float gradient;
+ // Do it the old fashioned way.
+ if (PSM_LINE_FIND_ENABLED(pageseg_mode)) {
+ gradient = make_rows(page_tr_, to_blocks);
+ } else if (!PSM_SPARSE(pageseg_mode)) {
+ // RAW_LINE, SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row.
+ gradient = make_single_row(page_tr_, pageseg_mode != PSM_RAW_LINE,
+ to_block, to_blocks);
+ } else {
+ gradient = 0.0f;
+ }
+ BaselineDetect baseline_detector(textord_baseline_debug,
+ reskew, to_blocks);
+ baseline_detector.ComputeStraightBaselines(use_box_bottoms);
+ baseline_detector.ComputeBaselineSplinesAndXheights(
+ page_tr_, pageseg_mode != PSM_RAW_LINE, textord_heavy_nr,
+ textord_show_final_rows, this);
+ // Now make the words in the lines.
+ if (PSM_WORD_FIND_ENABLED(pageseg_mode)) {
+ // SINGLE_LINE uses the old word maker on the single line.
+ make_words(this, page_tr_, gradient, blocks, to_blocks);
+ } else {
+ // SINGLE_WORD and SINGLE_CHAR cram all the blobs into a
+ // single word, and in SINGLE_CHAR mode, all the outlines
+ // go in a single blob.
+ TO_BLOCK* to_block = to_block_it.data();
+ make_single_word(pageseg_mode == PSM_SINGLE_CHAR,
+ to_block->get_rows(), to_block->block->row_list());
+ }
+ // Remove empties.
+ cleanup_blocks(PSM_WORD_FIND_ENABLED(pageseg_mode), blocks);
+ TransferDiacriticsToBlockGroups(diacritic_blobs, blocks);
+ // Compute the margins for each row in the block, to be used later for
+ // paragraph detection.
+ BLOCK_IT b_it(blocks);
+ for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
+ b_it.data()->compute_row_margins();
+ }
+#ifndef GRAPHICS_DISABLED
+ close_to_win();
+#endif
+}
+
+// If we were supposed to return only a single textline, and there is more
+// than one, clean up and leave only the best.
+void Textord::CleanupSingleRowResult(PageSegMode pageseg_mode,
+ PAGE_RES* page_res) {
+ if (PSM_LINE_FIND_ENABLED(pageseg_mode) || PSM_SPARSE(pageseg_mode))
+ return; // No cleanup required.
+ PAGE_RES_IT it(page_res);
+ // Find the best row, being the greatest mean word conf.
+ float row_total_conf = 0.0f;
+ int row_word_count = 0;
+ ROW_RES* best_row = nullptr;
+ float best_conf = 0.0f;
+ for (it.restart_page(); it.word() != nullptr; it.forward()) {
+ WERD_RES* word = it.word();
+ row_total_conf += word->best_choice->certainty();
+ ++row_word_count;
+ if (it.next_row() != it.row()) {
+ row_total_conf /= row_word_count;
+ if (best_row == nullptr || best_conf < row_total_conf) {
+ best_row = it.row();
+ best_conf = row_total_conf;
+ }
+ row_total_conf = 0.0f;
+ row_word_count = 0;
+ }
+ }
+ // Now eliminate any word not in the best row.
+ for (it.restart_page(); it.word() != nullptr; it.forward()) {
+ if (it.row() != best_row)
+ it.DeleteCurrentWord();
+ }
+}
+
+} // namespace tesseract.
diff --git a/tesseract/src/textord/textord.h b/tesseract/src/textord/textord.h
new file mode 100644
index 00000000..b2ca7079
--- /dev/null
+++ b/tesseract/src/textord/textord.h
@@ -0,0 +1,403 @@
+///////////////////////////////////////////////////////////////////////
+// File: textord.h
+// Description: The Textord class definition gathers text line and word
+// finding functionality.
+// Author: Ray Smith
+// Created: Fri Mar 13 14:29:01 PDT 2009
+//
+// (C) Copyright 2009, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TEXTORD_TEXTORD_H_
+#define TESSERACT_TEXTORD_TEXTORD_H_
+
+#include "ccstruct.h"
+#include "bbgrid.h"
+#include "blobbox.h"
+#include "gap_map.h"
+
+#include <tesseract/publictypes.h> // For PageSegMode.
+
+namespace tesseract {
+
+class FCOORD;
+class BLOCK_LIST;
+class PAGE_RES;
+class TO_BLOCK;
+class TO_BLOCK_LIST;
+class ScrollView;
+
+// A simple class that can be used by BBGrid to hold a word and an expanded
+// bounding box that makes it easy to find words to put diacritics.
+class WordWithBox {
+ public:
+ WordWithBox() : word_(nullptr) {}
+ explicit WordWithBox(WERD *word)
+ : word_(word), bounding_box_(word->bounding_box()) {
+ int height = bounding_box_.height();
+ bounding_box_.pad(height, height);
+ }
+
+ const TBOX &bounding_box() const { return bounding_box_; }
+ // Returns the bounding box of only the good blobs.
+ TBOX true_bounding_box() const { return word_->true_bounding_box(); }
+ C_BLOB_LIST *RejBlobs() const { return word_->rej_cblob_list(); }
+ const WERD *word() const { return word_; }
+
+ private:
+ // Borrowed pointer to a real word somewhere that must outlive this class.
+ WERD *word_;
+ // Cached expanded bounding box of the word, padded all round by its height.
+ TBOX bounding_box_;
+};
+
+// Make it usable by BBGrid.
+CLISTIZEH(WordWithBox)
+using WordGrid = BBGrid<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT>;
+using WordSearch = GridSearch<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT>;
+
+class Textord {
+ public:
+ explicit Textord(CCStruct* ccstruct);
+ ~Textord() = default;
+
+ // Make the textlines and words inside each block.
+ // binary_pix is mandatory and is the binarized input after line removal.
+ // grey_pix is optional, but if present must match the binary_pix in size,
+ // and must be a *real* grey image instead of binary_pix * 255.
+ // thresholds_pix is expected to be present iff grey_pix is present and
+ // can be an integer factor reduction of the grey_pix. It represents the
+ // thresholds that were used to create the binary_pix from the grey_pix.
+ // diacritic_blobs contain small confusing components that should be added
+ // to the appropriate word(s) in case they are really diacritics.
+ void TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width,
+ int height, Pix *binary_pix, Pix *thresholds_pix,
+ Pix *grey_pix, bool use_box_bottoms,
+ BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks,
+ TO_BLOCK_LIST *to_blocks);
+
+ // If we were supposed to return only a single textline, and there is more
+ // than one, clean up and leave only the best.
+ void CleanupSingleRowResult(PageSegMode pageseg_mode, PAGE_RES* page_res);
+
+ bool use_cjk_fp_model() const {
+ return use_cjk_fp_model_;
+ }
+ void set_use_cjk_fp_model(bool flag) {
+ use_cjk_fp_model_ = flag;
+ }
+
+ // tospace.cpp ///////////////////////////////////////////
+ void to_spacing(
+ ICOORD page_tr, //topright of page
+ TO_BLOCK_LIST *blocks //blocks on page
+ );
+ ROW *make_prop_words(TO_ROW *row, // row to make
+ FCOORD rotation // for drawing
+ );
+ ROW *make_blob_words(TO_ROW *row, // row to make
+ FCOORD rotation // for drawing
+ );
+ // tordmain.cpp ///////////////////////////////////////////
+ void find_components(Pix* pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks);
+ void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST* blocks, bool testing_on);
+
+ private:
+ // For underlying memory management and other utilities.
+ CCStruct* ccstruct_;
+
+ // The size of the input image.
+ ICOORD page_tr_;
+
+ bool use_cjk_fp_model_;
+
+ // makerow.cpp ///////////////////////////////////////////
+ // Make the textlines inside each block.
+ void MakeRows(PageSegMode pageseg_mode, const FCOORD& skew,
+ int width, int height, TO_BLOCK_LIST* to_blocks);
+ // Make the textlines inside a single block.
+ void MakeBlockRows(int min_spacing, int max_spacing,
+ const FCOORD& skew, TO_BLOCK* block,
+ ScrollView* win);
+
+ public:
+ void compute_block_xheight(TO_BLOCK *block, float gradient);
+ void compute_row_xheight(TO_ROW *row, // row to do
+ const FCOORD& rotation,
+ float gradient, // global skew
+ int block_line_size);
+ void make_spline_rows(TO_BLOCK* block, // block to do
+ float gradient, // gradient to fit
+ bool testing_on);
+ private:
+ //// oldbasel.cpp ////////////////////////////////////////
+ void make_old_baselines(TO_BLOCK* block, // block to do
+ bool testing_on, // correct orientation
+ float gradient);
+ void correlate_lines(TO_BLOCK *block, float gradient);
+ void correlate_neighbours(TO_BLOCK *block, // block rows are in.
+ TO_ROW **rows, // rows of block.
+ int rowcount); // no of rows to do.
+ int correlate_with_stats(TO_ROW **rows, // rows of block.
+ int rowcount, // no of rows to do.
+ TO_BLOCK* block);
+ void find_textlines(TO_BLOCK *block, // block row is in
+ TO_ROW *row, // row to do
+ int degree, // required approximation
+ QSPLINE *spline); // starting spline
+ // tospace.cpp ///////////////////////////////////////////
+ //DEBUG USE ONLY
+ void block_spacing_stats(TO_BLOCK* block,
+ GAPMAP* gapmap,
+ bool& old_text_ord_proportional,
+ //resulting estimate
+ int16_t& block_space_gap_width,
+ //resulting estimate
+ int16_t& block_non_space_gap_width
+ );
+ void row_spacing_stats(TO_ROW *row,
+ GAPMAP *gapmap,
+ int16_t block_idx,
+ int16_t row_idx,
+ //estimate for block
+ int16_t block_space_gap_width,
+ //estimate for block
+ int16_t block_non_space_gap_width
+ );
+ void old_to_method(TO_ROW *row,
+ STATS *all_gap_stats,
+ STATS *space_gap_stats,
+ STATS *small_gap_stats,
+ int16_t block_space_gap_width,
+ //estimate for block
+ int16_t block_non_space_gap_width
+ );
+ bool isolated_row_stats(TO_ROW* row,
+ GAPMAP* gapmap,
+ STATS* all_gap_stats,
+ bool suspected_table,
+ int16_t block_idx,
+ int16_t row_idx);
+ int16_t stats_count_under(STATS *stats, int16_t threshold);
+ void improve_row_threshold(TO_ROW *row, STATS *all_gap_stats);
+ bool make_a_word_break(TO_ROW* row, // row being made
+ TBOX blob_box, // for next_blob // how many blanks?
+ int16_t prev_gap,
+ TBOX prev_blob_box,
+ int16_t real_current_gap,
+ int16_t within_xht_current_gap,
+ TBOX next_blob_box,
+ int16_t next_gap,
+ uint8_t& blanks,
+ bool& fuzzy_sp,
+ bool& fuzzy_non,
+ bool& prev_gap_was_a_space,
+ bool& break_at_next_gap);
+ bool narrow_blob(TO_ROW* row, TBOX blob_box);
+ bool wide_blob(TO_ROW* row, TBOX blob_box);
+ bool suspected_punct_blob(TO_ROW* row, TBOX box);
+ void peek_at_next_gap(TO_ROW *row,
+ BLOBNBOX_IT box_it,
+ TBOX &next_blob_box,
+ int16_t &next_gap,
+ int16_t &next_within_xht_gap);
+ void mark_gap(TBOX blob, //blob following gap
+ int16_t rule, // heuristic id
+ int16_t prev_gap,
+ int16_t prev_blob_width,
+ int16_t current_gap,
+ int16_t next_blob_width,
+ int16_t next_gap);
+ float find_mean_blob_spacing(WERD *word);
+ bool ignore_big_gap(TO_ROW* row,
+ int32_t row_length,
+ GAPMAP* gapmap,
+ int16_t left,
+ int16_t right);
+ //get bounding box
+ TBOX reduced_box_next(TO_ROW *row, //current row
+ BLOBNBOX_IT *it //iterator to blobds
+ );
+ TBOX reduced_box_for_blob(BLOBNBOX *blob, TO_ROW *row, int16_t *left_above_xht);
+ // tordmain.cpp ///////////////////////////////////////////
+ float filter_noise_blobs(BLOBNBOX_LIST *src_list,
+ BLOBNBOX_LIST *noise_list,
+ BLOBNBOX_LIST *small_list,
+ BLOBNBOX_LIST *large_list);
+ // Fixes the block so it obeys all the rules:
+ // Must have at least one ROW.
+ // Must have at least one WERD.
+ // WERDs contain a fake blob.
+ void cleanup_nontext_block(BLOCK* block);
+ void cleanup_blocks(bool clean_noise, BLOCK_LIST *blocks);
+ bool clean_noise_from_row(ROW* row);
+ void clean_noise_from_words(ROW *row);
+ // Remove outlines that are a tiny fraction in either width or height
+ // of the word height.
+ void clean_small_noise_from_words(ROW *row);
+ // Groups blocks by rotation, then, for each group, makes a WordGrid and calls
+ // TransferDiacriticsToWords to copy the diacritic blobs to the most
+ // appropriate words in the group of blocks. Source blobs are not touched.
+ void TransferDiacriticsToBlockGroups(BLOBNBOX_LIST* diacritic_blobs,
+ BLOCK_LIST* blocks);
+ // Places a copy of blobs that are near a word (after applying rotation to the
+ // blob) in the most appropriate word, unless there is doubt, in which case a
+ // blob can end up in two words. Source blobs are not touched.
+ void TransferDiacriticsToWords(BLOBNBOX_LIST *diacritic_blobs,
+ const FCOORD &rotation, WordGrid *word_grid);
+
+ public:
+ // makerow.cpp ///////////////////////////////////////////
+ BOOL_VAR_H(textord_single_height_mode, false,
+ "Script has no xheight, so use a single mode for horizontal text");
+ // tospace.cpp ///////////////////////////////////////////
+ BOOL_VAR_H(tosp_old_to_method, false, "Space stats use prechopping?");
+ BOOL_VAR_H(tosp_old_to_constrain_sp_kn, false,
+ "Constrain relative values of inter and intra-word gaps for "
+ "old_to_method.");
+ BOOL_VAR_H(tosp_only_use_prop_rows, true,
+ "Block stats to use fixed pitch rows?");
+ BOOL_VAR_H(tosp_force_wordbreak_on_punct, false,
+ "Force word breaks on punct to break long lines in non-space "
+ "delimited langs");
+ BOOL_VAR_H(tosp_use_pre_chopping, false,
+ "Space stats use prechopping?");
+ BOOL_VAR_H(tosp_old_to_bug_fix, false,
+ "Fix suspected bug in old code");
+ BOOL_VAR_H(tosp_block_use_cert_spaces, true,
+ "Only stat OBVIOUS spaces");
+ BOOL_VAR_H(tosp_row_use_cert_spaces, true,
+ "Only stat OBVIOUS spaces");
+ BOOL_VAR_H(tosp_narrow_blobs_not_cert, true,
+ "Only stat OBVIOUS spaces");
+ BOOL_VAR_H(tosp_row_use_cert_spaces1, true,
+ "Only stat OBVIOUS spaces");
+ BOOL_VAR_H(tosp_recovery_isolated_row_stats, true,
+ "Use row alone when inadequate cert spaces");
+ BOOL_VAR_H(tosp_only_small_gaps_for_kern, false, "Better guess");
+ BOOL_VAR_H(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?");
+ BOOL_VAR_H(tosp_fuzzy_limit_all, true,
+ "Don't restrict kn->sp fuzzy limit to tables");
+ BOOL_VAR_H(tosp_stats_use_xht_gaps, true,
+ "Use within xht gap for wd breaks");
+ BOOL_VAR_H(tosp_use_xht_gaps, true,
+ "Use within xht gap for wd breaks");
+ BOOL_VAR_H(tosp_only_use_xht_gaps, false,
+ "Only use within xht gap for wd breaks");
+ BOOL_VAR_H(tosp_rule_9_test_punct, false,
+ "Don't chng kn to space next to punct");
+ BOOL_VAR_H(tosp_flip_fuzz_kn_to_sp, true, "Default flip");
+ BOOL_VAR_H(tosp_flip_fuzz_sp_to_kn, true, "Default flip");
+ BOOL_VAR_H(tosp_improve_thresh, false,
+ "Enable improvement heuristic");
+ INT_VAR_H(tosp_debug_level, 0, "Debug data");
+ INT_VAR_H(tosp_enough_space_samples_for_median, 3,
+ "or should we use mean");
+ INT_VAR_H(tosp_redo_kern_limit, 10,
+ "No.samples reqd to reestimate for row");
+ INT_VAR_H(tosp_few_samples, 40,
+ "No.gaps reqd with 1 large gap to treat as a table");
+ INT_VAR_H(tosp_short_row, 20,
+ "No.gaps reqd with few cert spaces to use certs");
+ INT_VAR_H(tosp_sanity_method, 1, "How to avoid being silly");
+ double_VAR_H(tosp_old_sp_kn_th_factor, 2.0,
+ "Factor for defining space threshold in terms of space and "
+ "kern sizes");
+ double_VAR_H(tosp_threshold_bias1, 0,
+ "how far between kern and space?");
+ double_VAR_H(tosp_threshold_bias2, 0,
+ "how far between kern and space?");
+ double_VAR_H(tosp_narrow_fraction, 0.3,
+ "Fract of xheight for narrow");
+ double_VAR_H(tosp_narrow_aspect_ratio, 0.48,
+ "narrow if w/h less than this");
+ double_VAR_H(tosp_wide_fraction, 0.52, "Fract of xheight for wide");
+ double_VAR_H(tosp_wide_aspect_ratio, 0.0,
+ "wide if w/h less than this");
+ double_VAR_H(tosp_fuzzy_space_factor, 0.6,
+ "Fract of xheight for fuzz sp");
+ double_VAR_H(tosp_fuzzy_space_factor1, 0.5,
+ "Fract of xheight for fuzz sp");
+ double_VAR_H(tosp_fuzzy_space_factor2, 0.72,
+ "Fract of xheight for fuzz sp");
+ double_VAR_H(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern");
+ double_VAR_H(tosp_kern_gap_factor1, 2.0,
+ "gap ratio to flip kern->sp");
+ double_VAR_H(tosp_kern_gap_factor2, 1.3,
+ "gap ratio to flip kern->sp");
+ double_VAR_H(tosp_kern_gap_factor3, 2.5,
+ "gap ratio to flip kern->sp");
+ double_VAR_H(tosp_ignore_big_gaps, -1, "xht multiplier");
+ double_VAR_H(tosp_ignore_very_big_gaps, 3.5, "xht multiplier");
+ double_VAR_H(tosp_rep_space, 1.6, "rep gap multiplier for space");
+ double_VAR_H(tosp_enough_small_gaps, 0.65,
+ "Fract of kerns reqd for isolated row stats");
+ double_VAR_H(tosp_table_kn_sp_ratio, 2.25,
+ "Min difference of kn & sp in table");
+ double_VAR_H(tosp_table_xht_sp_ratio, 0.33,
+ "Expect spaces bigger than this");
+ double_VAR_H(tosp_table_fuzzy_kn_sp_ratio, 3.0,
+ "Fuzzy if less than this");
+ double_VAR_H(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg");
+ double_VAR_H(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg");
+ double_VAR_H(tosp_min_sane_kn_sp, 1.5,
+ "Don't trust spaces less than this time kn");
+ double_VAR_H(tosp_init_guess_kn_mult, 2.2,
+ "Thresh guess - mult kn by this");
+ double_VAR_H(tosp_init_guess_xht_mult, 0.28,
+ "Thresh guess - mult xht by this");
+ double_VAR_H(tosp_max_sane_kn_thresh, 5.0,
+ "Multiplier on kn to limit thresh");
+ double_VAR_H(tosp_flip_caution, 0.0,
+ "Don't autoflip kn to sp when large separation");
+ double_VAR_H(tosp_large_kerning, 0.19,
+ "Limit use of xht gap with large kns");
+ double_VAR_H(tosp_dont_fool_with_small_kerns, -1,
+ "Limit use of xht gap with odd small kns");
+ double_VAR_H(tosp_near_lh_edge, 0,
+ "Don't reduce box if the top left is non blank");
+ double_VAR_H(tosp_silly_kn_sp_gap, 0.2,
+ "Don't let sp minus kn get too small");
+ double_VAR_H(tosp_pass_wide_fuzz_sp_to_context, 0.75,
+ "How wide fuzzies need context");
+ // tordmain.cpp ///////////////////////////////////////////
+ BOOL_VAR_H(textord_no_rejects, false, "Don't remove noise blobs");
+ BOOL_VAR_H(textord_show_blobs, false, "Display unsorted blobs");
+ BOOL_VAR_H(textord_show_boxes, false, "Display boxes");
+ INT_VAR_H(textord_max_noise_size, 7, "Pixel size of noise");
+ INT_VAR_H(textord_baseline_debug, 0, "Baseline debug level");
+ double_VAR_H(textord_noise_area_ratio, 0.7,
+ "Fraction of bounding box for noise");
+ double_VAR_H(textord_initialx_ile, 0.75, "Ile of sizes for xheight guess");
+ double_VAR_H(textord_initialasc_ile, 0.90, "Ile of sizes for xheight guess");
+ INT_VAR_H(textord_noise_sizefraction, 10, "Fraction of size for maxima");
+ double_VAR_H(textord_noise_sizelimit, 0.5, "Fraction of x for big t count");
+ INT_VAR_H(textord_noise_translimit, 16, "Transitions for normal blob");
+ double_VAR_H(textord_noise_normratio, 2.0, "Dot to norm ratio for deletion");
+ BOOL_VAR_H(textord_noise_rejwords, true, "Reject noise-like words");
+ BOOL_VAR_H(textord_noise_rejrows, true, "Reject noise-like rows");
+ double_VAR_H(textord_noise_syfract, 0.2, "xh fract error for norm blobs");
+ double_VAR_H(textord_noise_sxfract, 0.4,
+ "xh fract width error for norm blobs");
+ double_VAR_H(textord_noise_hfract, 1.0/64,
+ "Height fraction to discard outlines as speckle noise");
+ INT_VAR_H(textord_noise_sncount, 1, "super norm blobs to save row");
+ double_VAR_H(textord_noise_rowratio, 6.0, "Dot to norm ratio for deletion");
+ BOOL_VAR_H(textord_noise_debug, false, "Debug row garbage detector");
+ double_VAR_H(textord_blshift_maxshift, 0.00, "Max baseline shift");
+ double_VAR_H(textord_blshift_xfraction, 9.99, "Min size of baseline shift");
+};
+
+} // namespace tesseract
+
+#endif // TESSERACT_TEXTORD_TEXTORD_H_
diff --git a/tesseract/src/textord/topitch.cpp b/tesseract/src/textord/topitch.cpp
new file mode 100644
index 00000000..655f75bd
--- /dev/null
+++ b/tesseract/src/textord/topitch.cpp
@@ -0,0 +1,1847 @@
+/**********************************************************************
+ * File: topitch.cpp (Formerly to_pitch.c)
+ * Description: Code to determine fixed pitchness and the pitch if fixed.
+ * Author: Ray Smith
+ *
+ * (C) Copyright 1993, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+ // Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "topitch.h"
+
+#include "blobbox.h"
+#include "statistc.h"
+#include "drawtord.h"
+#include "makerow.h"
+#include "pitsync1.h"
+#include "pithsync.h"
+#include "tovars.h"
+#include "wordseg.h"
+
+#include "helpers.h"
+
+#include <memory>
+
+namespace tesseract {
+
+static BOOL_VAR (textord_all_prop, false, "All doc is proportial text");
+BOOL_VAR (textord_debug_pitch_test, false,
+"Debug on fixed pitch test");
+static BOOL_VAR (textord_disable_pitch_test, false,
+"Turn off dp fixed pitch algorithm");
+BOOL_VAR (textord_fast_pitch_test, false,
+"Do even faster pitch algorithm");
+BOOL_VAR (textord_debug_pitch_metric, false,
+"Write full metric stuff");
+BOOL_VAR (textord_show_row_cuts, false, "Draw row-level cuts");
+BOOL_VAR (textord_show_page_cuts, false, "Draw page-level cuts");
+BOOL_VAR (textord_pitch_cheat, false,
+"Use correct answer for fixed/prop");
+BOOL_VAR (textord_blockndoc_fixed, false,
+"Attempt whole doc/block fixed pitch");
+double_VAR (textord_projection_scale, 0.200, "Ding rate for mid-cuts");
+double_VAR (textord_balance_factor, 1.0,
+"Ding rate for unbalanced char cells");
+
+#define BLOCK_STATS_CLUSTERS 10
+#define MAX_ALLOWED_PITCH 100 //max pixel pitch.
+
+// qsort function to sort 2 floats.
+static int sort_floats(const void *arg1, const void *arg2) {
+ float diff = *reinterpret_cast<const float*>(arg1) -
+ *reinterpret_cast<const float*>(arg2);
+ if (diff > 0) {
+ return 1;
+ } else if (diff < 0) {
+ return -1;
+ } else {
+ return 0;
+ }
+}
+
+/**********************************************************************
+ * compute_fixed_pitch
+ *
+ * Decide whether each row is fixed pitch individually.
+ * Correlate definite and uncertain results to obtain an individual
+ * result for each row in the TO_ROW class.
+ **********************************************************************/
+
+void compute_fixed_pitch(ICOORD page_tr, // top right
+ TO_BLOCK_LIST* port_blocks, // input list
+ float gradient, // page skew
+ FCOORD rotation, // for drawing
+ bool testing_on) { // correct orientation
+ TO_BLOCK_IT block_it; //iterator
+ TO_BLOCK *block; //current block;
+ TO_ROW *row; //current row
+ int block_index; //block number
+ int row_index; //row number
+
+#ifndef GRAPHICS_DISABLED
+ if (textord_show_initial_words && testing_on) {
+ if (to_win == nullptr)
+ create_to_win(page_tr);
+ }
+#endif
+
+ block_it.set_to_list (port_blocks);
+ block_index = 1;
+ for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
+ block_it.forward ()) {
+ block = block_it.data ();
+ compute_block_pitch(block, rotation, block_index, testing_on);
+ block_index++;
+ }
+
+ if (!try_doc_fixed (page_tr, port_blocks, gradient)) {
+ block_index = 1;
+ for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
+ block_it.forward ()) {
+ block = block_it.data ();
+ if (!try_block_fixed (block, block_index))
+ try_rows_fixed(block, block_index, testing_on);
+ block_index++;
+ }
+ }
+
+ block_index = 1;
+ for (block_it.mark_cycle_pt(); !block_it.cycled_list();
+ block_it.forward()) {
+ block = block_it.data ();
+ POLY_BLOCK* pb = block->block->pdblk.poly_block();
+ if (pb != nullptr && !pb->IsText()) continue; // Non-text doesn't exist!
+ // row iterator
+ TO_ROW_IT row_it(block->get_rows());
+ row_index = 1;
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+ row = row_it.data ();
+ fix_row_pitch(row, block, port_blocks, row_index, block_index);
+ row_index++;
+ }
+ block_index++;
+ }
+#ifndef GRAPHICS_DISABLED
+ if (textord_show_initial_words && testing_on) {
+ ScrollView::Update();
+ }
+#endif
+}
+
+
+/**********************************************************************
+ * fix_row_pitch
+ *
+ * Get a pitch_decision for this row by voting among similar rows in the
+ * block, then similar rows over all the page, or any other rows at all.
+ **********************************************************************/
+
+void fix_row_pitch(TO_ROW *bad_row, // row to fix
+ TO_BLOCK *bad_block, // block of bad_row
+ TO_BLOCK_LIST *blocks, // blocks to scan
+ int32_t row_target, // number of row
+ int32_t block_target) { // number of block
+ int16_t mid_cuts;
+ int block_votes; //votes in block
+ int like_votes; //votes over page
+ int other_votes; //votes of unlike blocks
+ int block_index; //number of block
+ int row_index; //number of row
+ int maxwidth; //max pitch
+ TO_BLOCK_IT block_it = blocks; //block iterator
+ TO_BLOCK *block; //current block
+ TO_ROW *row; //current row
+ float sp_sd; //space deviation
+ STATS block_stats; //pitches in block
+ STATS like_stats; //pitches in page
+
+ block_votes = like_votes = other_votes = 0;
+ maxwidth = static_cast<int32_t>(ceil (bad_row->xheight * textord_words_maxspace));
+ if (bad_row->pitch_decision != PITCH_DEF_FIXED
+ && bad_row->pitch_decision != PITCH_DEF_PROP) {
+ block_stats.set_range (0, maxwidth);
+ like_stats.set_range (0, maxwidth);
+ block_index = 1;
+ for (block_it.mark_cycle_pt(); !block_it.cycled_list();
+ block_it.forward()) {
+ block = block_it.data();
+ POLY_BLOCK* pb = block->block->pdblk.poly_block();
+ if (pb != nullptr && !pb->IsText()) continue; // Non text doesn't exist!
+ row_index = 1;
+ TO_ROW_IT row_it(block->get_rows());
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
+ row_it.forward ()) {
+ row = row_it.data ();
+ if ((bad_row->all_caps
+ && row->xheight + row->ascrise
+ <
+ (bad_row->xheight + bad_row->ascrise) * (1 +
+ textord_pitch_rowsimilarity)
+ && row->xheight + row->ascrise >
+ (bad_row->xheight + bad_row->ascrise) * (1 -
+ textord_pitch_rowsimilarity))
+ || (!bad_row->all_caps
+ && row->xheight <
+ bad_row->xheight * (1 + textord_pitch_rowsimilarity)
+ && row->xheight >
+ bad_row->xheight * (1 - textord_pitch_rowsimilarity))) {
+ if (block_index == block_target) {
+ if (row->pitch_decision == PITCH_DEF_FIXED) {
+ block_votes += textord_words_veto_power;
+ block_stats.add (static_cast<int32_t>(row->fixed_pitch),
+ textord_words_veto_power);
+ }
+ else if (row->pitch_decision == PITCH_MAYBE_FIXED
+ || row->pitch_decision == PITCH_CORR_FIXED) {
+ block_votes++;
+ block_stats.add (static_cast<int32_t>(row->fixed_pitch), 1);
+ }
+ else if (row->pitch_decision == PITCH_DEF_PROP)
+ block_votes -= textord_words_veto_power;
+ else if (row->pitch_decision == PITCH_MAYBE_PROP
+ || row->pitch_decision == PITCH_CORR_PROP)
+ block_votes--;
+ }
+ else {
+ if (row->pitch_decision == PITCH_DEF_FIXED) {
+ like_votes += textord_words_veto_power;
+ like_stats.add (static_cast<int32_t>(row->fixed_pitch),
+ textord_words_veto_power);
+ }
+ else if (row->pitch_decision == PITCH_MAYBE_FIXED
+ || row->pitch_decision == PITCH_CORR_FIXED) {
+ like_votes++;
+ like_stats.add (static_cast<int32_t>(row->fixed_pitch), 1);
+ }
+ else if (row->pitch_decision == PITCH_DEF_PROP)
+ like_votes -= textord_words_veto_power;
+ else if (row->pitch_decision == PITCH_MAYBE_PROP
+ || row->pitch_decision == PITCH_CORR_PROP)
+ like_votes--;
+ }
+ }
+ else {
+ if (row->pitch_decision == PITCH_DEF_FIXED)
+ other_votes += textord_words_veto_power;
+ else if (row->pitch_decision == PITCH_MAYBE_FIXED
+ || row->pitch_decision == PITCH_CORR_FIXED)
+ other_votes++;
+ else if (row->pitch_decision == PITCH_DEF_PROP)
+ other_votes -= textord_words_veto_power;
+ else if (row->pitch_decision == PITCH_MAYBE_PROP
+ || row->pitch_decision == PITCH_CORR_PROP)
+ other_votes--;
+ }
+ row_index++;
+ }
+ block_index++;
+ }
+ if (block_votes > textord_words_veto_power) {
+ bad_row->fixed_pitch = block_stats.ile (0.5);
+ bad_row->pitch_decision = PITCH_CORR_FIXED;
+ }
+ else if (block_votes <= textord_words_veto_power && like_votes > 0) {
+ bad_row->fixed_pitch = like_stats.ile (0.5);
+ bad_row->pitch_decision = PITCH_CORR_FIXED;
+ }
+ else {
+ bad_row->pitch_decision = PITCH_CORR_PROP;
+ if (block_votes == 0 && like_votes == 0 && other_votes > 0
+ && (textord_debug_pitch_test || textord_debug_pitch_metric))
+ tprintf
+ ("Warning:row %d of block %d set prop with no like rows against trend\n",
+ row_target, block_target);
+ }
+ }
+ if (textord_debug_pitch_metric) {
+ tprintf(":b_votes=%d:l_votes=%d:o_votes=%d",
+ block_votes, like_votes, other_votes);
+ tprintf("x=%g:asc=%g\n", bad_row->xheight, bad_row->ascrise);
+ }
+ if (bad_row->pitch_decision == PITCH_CORR_FIXED) {
+ if (bad_row->fixed_pitch < textord_min_xheight) {
+ if (block_votes > 0)
+ bad_row->fixed_pitch = block_stats.ile (0.5);
+ else if (block_votes == 0 && like_votes > 0)
+ bad_row->fixed_pitch = like_stats.ile (0.5);
+ else {
+ tprintf
+ ("Warning:guessing pitch as xheight on row %d, block %d\n",
+ row_target, block_target);
+ bad_row->fixed_pitch = bad_row->xheight;
+ }
+ }
+ if (bad_row->fixed_pitch < textord_min_xheight)
+ bad_row->fixed_pitch = (float) textord_min_xheight;
+ bad_row->kern_size = bad_row->fixed_pitch / 4;
+ bad_row->min_space = static_cast<int32_t>(bad_row->fixed_pitch * 0.6);
+ bad_row->max_nonspace = static_cast<int32_t>(bad_row->fixed_pitch * 0.4);
+ bad_row->space_threshold =
+ (bad_row->min_space + bad_row->max_nonspace) / 2;
+ bad_row->space_size = bad_row->fixed_pitch;
+ if (bad_row->char_cells.empty() && !bad_row->blob_list()->empty()) {
+ tune_row_pitch (bad_row, &bad_row->projection,
+ bad_row->projection_left, bad_row->projection_right,
+ (bad_row->fixed_pitch +
+ bad_row->max_nonspace * 3) / 4, bad_row->fixed_pitch,
+ sp_sd, mid_cuts, &bad_row->char_cells, false);
+ }
+ }
+ else if (bad_row->pitch_decision == PITCH_CORR_PROP
+ || bad_row->pitch_decision == PITCH_DEF_PROP) {
+ bad_row->fixed_pitch = 0.0f;
+ bad_row->char_cells.clear ();
+ }
+}
+
+
+/**********************************************************************
+ * compute_block_pitch
+ *
+ * Decide whether each block is fixed pitch individually.
+ **********************************************************************/
+
+void compute_block_pitch(TO_BLOCK* block, // input list
+ FCOORD rotation, // for drawing
+ int32_t block_index, // block number
+ bool testing_on) { // correct orientation
+ TBOX block_box; //bounding box
+
+ block_box = block->block->pdblk.bounding_box ();
+ if (testing_on && textord_debug_pitch_test) {
+ tprintf ("Block %d at (%d,%d)->(%d,%d)\n",
+ block_index,
+ block_box.left (), block_box.bottom (),
+ block_box.right (), block_box.top ());
+ }
+ block->min_space = static_cast<int32_t>(floor (block->xheight
+ * textord_words_default_minspace));
+ block->max_nonspace = static_cast<int32_t>(ceil (block->xheight
+ * textord_words_default_nonspace));
+ block->fixed_pitch = 0.0f;
+ block->space_size = static_cast<float>(block->min_space);
+ block->kern_size = static_cast<float>(block->max_nonspace);
+ block->pr_nonsp = block->xheight * words_default_prop_nonspace;
+ block->pr_space = block->pr_nonsp * textord_spacesize_ratioprop;
+ if (!block->get_rows ()->empty ()) {
+ ASSERT_HOST (block->xheight > 0);
+ find_repeated_chars(block, textord_show_initial_words && testing_on);
+#ifndef GRAPHICS_DISABLED
+ if (textord_show_initial_words && testing_on)
+ //overlap_picture_ops(true);
+ ScrollView::Update();
+#endif
+ compute_rows_pitch(block,
+ block_index,
+ textord_debug_pitch_test && testing_on);
+ }
+}
+
+
+/**********************************************************************
+ * compute_rows_pitch
+ *
+ * Decide whether each row is fixed pitch individually.
+ **********************************************************************/
+
+bool compute_rows_pitch( //find line stats
+ TO_BLOCK* block, //block to do
+ int32_t block_index, //block number
+ bool testing_on //correct orientation
+) {
+ int32_t maxwidth; //of spaces
+ TO_ROW *row; //current row
+ int32_t row_index; //row number.
+ float lower, upper; //cluster thresholds
+ TO_ROW_IT row_it = block->get_rows ();
+
+ row_index = 1;
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+ row = row_it.data ();
+ ASSERT_HOST (row->xheight > 0);
+ row->compute_vertical_projection ();
+ maxwidth = static_cast<int32_t>(ceil (row->xheight * textord_words_maxspace));
+ if (row_pitch_stats (row, maxwidth, testing_on)
+ && find_row_pitch (row, maxwidth,
+ textord_dotmatrix_gap + 1, block, block_index,
+ row_index, testing_on)) {
+ if (row->fixed_pitch == 0) {
+ lower = row->pr_nonsp;
+ upper = row->pr_space;
+ row->space_size = upper;
+ row->kern_size = lower;
+ }
+ }
+ else {
+ row->fixed_pitch = 0.0f; //insufficient data
+ row->pitch_decision = PITCH_DUNNO;
+ }
+ row_index++;
+ }
+ return false;
+}
+
+
+/**********************************************************************
+ * try_doc_fixed
+ *
+ * Attempt to call the entire document fixed pitch.
+ **********************************************************************/
+
+bool try_doc_fixed( //determine pitch
+ ICOORD page_tr, //top right
+ TO_BLOCK_LIST* port_blocks, //input list
+ float gradient //page skew
+) {
+ int16_t master_x; //uniform shifts
+ int16_t pitch; //median pitch.
+ int x; //profile coord
+ int prop_blocks; //correct counts
+ int fixed_blocks;
+ int total_row_count; //total in page
+ //iterator
+ TO_BLOCK_IT block_it = port_blocks;
+ TO_BLOCK *block; //current block;
+ TO_ROW *row; //current row
+ int16_t projection_left; //edges
+ int16_t projection_right;
+ int16_t row_left; //edges of row
+ int16_t row_right;
+ float master_y; //uniform shifts
+ float shift_factor; //page skew correction
+ float final_pitch; //output pitch
+ float row_y; //baseline
+ STATS projection; //entire page
+ STATS pitches (0, MAX_ALLOWED_PITCH);
+ //for median
+ float sp_sd; //space sd
+ int16_t mid_cuts; //no of cheap cuts
+ float pitch_sd; //sync rating
+
+ if (block_it.empty ()
+ // || block_it.data()==block_it.data_relative(1)
+ || !textord_blockndoc_fixed)
+ return false;
+ shift_factor = gradient / (gradient * gradient + 1);
+ // row iterator
+ TO_ROW_IT row_it(block_it.data ()->get_rows());
+ master_x = row_it.data ()->projection_left;
+ master_y = row_it.data ()->baseline.y (master_x);
+ projection_left = INT16_MAX;
+ projection_right = -INT16_MAX;
+ prop_blocks = 0;
+ fixed_blocks = 0;
+ total_row_count = 0;
+
+ for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
+ block_it.forward ()) {
+ block = block_it.data ();
+ row_it.set_to_list (block->get_rows ());
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+ row = row_it.data ();
+ total_row_count++;
+ if (row->fixed_pitch > 0)
+ pitches.add (static_cast<int32_t>(row->fixed_pitch), 1);
+ //find median
+ row_y = row->baseline.y (master_x);
+ row_left =
+ static_cast<int16_t>(row->projection_left -
+ shift_factor * (master_y - row_y));
+ row_right =
+ static_cast<int16_t>(row->projection_right -
+ shift_factor * (master_y - row_y));
+ if (row_left < projection_left)
+ projection_left = row_left;
+ if (row_right > projection_right)
+ projection_right = row_right;
+ }
+ }
+ if (pitches.get_total () == 0)
+ return false;
+ projection.set_range (projection_left, projection_right);
+
+ for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
+ block_it.forward ()) {
+ block = block_it.data ();
+ row_it.set_to_list (block->get_rows ());
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+ row = row_it.data ();
+ row_y = row->baseline.y (master_x);
+ row_left =
+ static_cast<int16_t>(row->projection_left -
+ shift_factor * (master_y - row_y));
+ for (x = row->projection_left; x < row->projection_right;
+ x++, row_left++) {
+ projection.add (row_left, row->projection.pile_count (x));
+ }
+ }
+ }
+
+ row_it.set_to_list (block_it.data ()->get_rows ());
+ row = row_it.data ();
+#ifndef GRAPHICS_DISABLED
+ if (textord_show_page_cuts && to_win != nullptr)
+ projection.plot (to_win, projection_left,
+ row->intercept (), 1.0f, -1.0f, ScrollView::CORAL);
+#endif
+ final_pitch = pitches.ile (0.5);
+ pitch = static_cast<int16_t>(final_pitch);
+ pitch_sd =
+ tune_row_pitch (row, &projection, projection_left, projection_right,
+ pitch * 0.75, final_pitch, sp_sd, mid_cuts,
+ &row->char_cells, false);
+
+ if (textord_debug_pitch_metric)
+ tprintf
+ ("try_doc:props=%d:fixed=%d:pitch=%d:final_pitch=%g:pitch_sd=%g:sp_sd=%g:sd/trc=%g:sd/p=%g:sd/trc/p=%g\n",
+ prop_blocks, fixed_blocks, pitch, final_pitch, pitch_sd, sp_sd,
+ pitch_sd / total_row_count, pitch_sd / pitch,
+ pitch_sd / total_row_count / pitch);
+
+#ifndef GRAPHICS_DISABLED
+ if (textord_show_page_cuts && to_win != nullptr) {
+ float row_shift; //shift for row
+ ICOORDELT_LIST *master_cells; //cells for page
+ master_cells = &row->char_cells;
+ for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
+ block_it.forward ()) {
+ block = block_it.data ();
+ row_it.set_to_list (block->get_rows ());
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
+ row_it.forward ()) {
+ row = row_it.data ();
+ row_y = row->baseline.y (master_x);
+ row_shift = shift_factor * (master_y - row_y);
+ plot_row_cells(to_win, ScrollView::GOLDENROD, row, row_shift, master_cells);
+ }
+ }
+ }
+#endif
+ row->char_cells.clear ();
+ return false;
+}
+
+
+/**********************************************************************
+ * try_block_fixed
+ *
+ * Try to call the entire block fixed.
+ **********************************************************************/
+
+bool try_block_fixed( //find line stats
+ TO_BLOCK* block, //block to do
+ int32_t block_index //block number
+) {
+ return false;
+}
+
+
+/**********************************************************************
+ * try_rows_fixed
+ *
+ * Decide whether each row is fixed pitch individually.
+ **********************************************************************/
+
+bool try_rows_fixed( //find line stats
+ TO_BLOCK* block, //block to do
+ int32_t block_index, //block number
+ bool testing_on //correct orientation
+) {
+ TO_ROW *row; //current row
+ int32_t row_index; //row number.
+ int32_t def_fixed = 0; //counters
+ int32_t def_prop = 0;
+ int32_t maybe_fixed = 0;
+ int32_t maybe_prop = 0;
+ int32_t dunno = 0;
+ int32_t corr_fixed = 0;
+ int32_t corr_prop = 0;
+ float lower, upper; //cluster thresholds
+ TO_ROW_IT row_it = block->get_rows ();
+
+ row_index = 1;
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+ row = row_it.data ();
+ ASSERT_HOST (row->xheight > 0);
+ if (row->fixed_pitch > 0 &&
+ fixed_pitch_row(row, block->block, block_index)) {
+ if (row->fixed_pitch == 0) {
+ lower = row->pr_nonsp;
+ upper = row->pr_space;
+ row->space_size = upper;
+ row->kern_size = lower;
+ }
+ }
+ row_index++;
+ }
+ count_block_votes(block,
+ def_fixed,
+ def_prop,
+ maybe_fixed,
+ maybe_prop,
+ corr_fixed,
+ corr_prop,
+ dunno);
+ if (testing_on
+ && (textord_debug_pitch_test
+ || textord_blocksall_prop || textord_blocksall_fixed)) {
+ tprintf ("Initially:");
+ print_block_counts(block, block_index);
+ }
+ if (def_fixed > def_prop * textord_words_veto_power)
+ block->pitch_decision = PITCH_DEF_FIXED;
+ else if (def_prop > def_fixed * textord_words_veto_power)
+ block->pitch_decision = PITCH_DEF_PROP;
+ else if (def_fixed > 0 || def_prop > 0)
+ block->pitch_decision = PITCH_DUNNO;
+ else if (maybe_fixed > maybe_prop * textord_words_veto_power)
+ block->pitch_decision = PITCH_MAYBE_FIXED;
+ else if (maybe_prop > maybe_fixed * textord_words_veto_power)
+ block->pitch_decision = PITCH_MAYBE_PROP;
+ else
+ block->pitch_decision = PITCH_DUNNO;
+ return false;
+}
+
+
+/**********************************************************************
+ * print_block_counts
+ *
+ * Count up how many rows have what decision and print the results.
+ **********************************************************************/
+
+void print_block_counts( //find line stats
+ TO_BLOCK *block, //block to do
+ int32_t block_index //block number
+ ) {
+ int32_t def_fixed = 0; //counters
+ int32_t def_prop = 0;
+ int32_t maybe_fixed = 0;
+ int32_t maybe_prop = 0;
+ int32_t dunno = 0;
+ int32_t corr_fixed = 0;
+ int32_t corr_prop = 0;
+
+ count_block_votes(block,
+ def_fixed,
+ def_prop,
+ maybe_fixed,
+ maybe_prop,
+ corr_fixed,
+ corr_prop,
+ dunno);
+ tprintf ("Block %d has (%d,%d,%d)",
+ block_index, def_fixed, maybe_fixed, corr_fixed);
+ if (textord_blocksall_prop && (def_fixed || maybe_fixed || corr_fixed))
+ tprintf (" (Wrongly)");
+ tprintf (" fixed, (%d,%d,%d)", def_prop, maybe_prop, corr_prop);
+ if (textord_blocksall_fixed && (def_prop || maybe_prop || corr_prop))
+ tprintf (" (Wrongly)");
+ tprintf (" prop, %d dunno\n", dunno);
+}
+
+
+/**********************************************************************
+ * count_block_votes
+ *
+ * Count the number of rows in the block with each kind of pitch_decision.
+ **********************************************************************/
+
+void count_block_votes( //find line stats
+ TO_BLOCK *block, //block to do
+ int32_t &def_fixed, //add to counts
+ int32_t &def_prop,
+ int32_t &maybe_fixed,
+ int32_t &maybe_prop,
+ int32_t &corr_fixed,
+ int32_t &corr_prop,
+ int32_t &dunno) {
+ TO_ROW *row; //current row
+ TO_ROW_IT row_it = block->get_rows ();
+
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+ row = row_it.data ();
+ switch (row->pitch_decision) {
+ case PITCH_DUNNO:
+ dunno++;
+ break;
+ case PITCH_DEF_PROP:
+ def_prop++;
+ break;
+ case PITCH_MAYBE_PROP:
+ maybe_prop++;
+ break;
+ case PITCH_DEF_FIXED:
+ def_fixed++;
+ break;
+ case PITCH_MAYBE_FIXED:
+ maybe_fixed++;
+ break;
+ case PITCH_CORR_PROP:
+ corr_prop++;
+ break;
+ case PITCH_CORR_FIXED:
+ corr_fixed++;
+ break;
+ }
+ }
+}
+
+
+/**********************************************************************
+ * row_pitch_stats
+ *
+ * Decide whether each row is fixed pitch individually.
+ **********************************************************************/
+
+bool row_pitch_stats( //find line stats
+ TO_ROW* row, //current row
+ int32_t maxwidth, //of spaces
+ bool testing_on //correct orientation
+) {
+ BLOBNBOX *blob; //current blob
+ int gap_index; //current gap
+ int32_t prev_x; //end of prev blob
+ int32_t cluster_count; //no of clusters
+ int32_t prev_count; //of clusters
+ int32_t smooth_factor; //for smoothing stats
+ TBOX blob_box; //bounding box
+ float lower, upper; //cluster thresholds
+ //gap sizes
+ float gaps[BLOCK_STATS_CLUSTERS];
+ //blobs
+ BLOBNBOX_IT blob_it = row->blob_list ();
+ STATS gap_stats (0, maxwidth);
+ STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1];
+ //clusters
+
+ smooth_factor =
+ static_cast<int32_t>(row->xheight * textord_wordstats_smooth_factor + 1.5);
+ if (!blob_it.empty ()) {
+ prev_x = blob_it.data ()->bounding_box ().right ();
+ blob_it.forward ();
+ while (!blob_it.at_first ()) {
+ blob = blob_it.data ();
+ if (!blob->joined_to_prev ()) {
+ blob_box = blob->bounding_box ();
+ if (blob_box.left () - prev_x < maxwidth)
+ gap_stats.add (blob_box.left () - prev_x, 1);
+ prev_x = blob_box.right ();
+ }
+ blob_it.forward ();
+ }
+ }
+ if (gap_stats.get_total () == 0) {
+ return false;
+ }
+ cluster_count = 0;
+ lower = row->xheight * words_initial_lower;
+ upper = row->xheight * words_initial_upper;
+ gap_stats.smooth (smooth_factor);
+ do {
+ prev_count = cluster_count;
+ cluster_count = gap_stats.cluster (lower, upper,
+ textord_spacesize_ratioprop,
+ BLOCK_STATS_CLUSTERS, cluster_stats);
+ }
+ while (cluster_count > prev_count && cluster_count < BLOCK_STATS_CLUSTERS);
+ if (cluster_count < 1) {
+ return false;
+ }
+ for (gap_index = 0; gap_index < cluster_count; gap_index++)
+ gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
+ //get medians
+ if (testing_on) {
+ tprintf ("cluster_count=%d:", cluster_count);
+ for (gap_index = 0; gap_index < cluster_count; gap_index++)
+ tprintf (" %g(%d)", gaps[gap_index],
+ cluster_stats[gap_index + 1].get_total ());
+ tprintf ("\n");
+ }
+ qsort (gaps, cluster_count, sizeof (float), sort_floats);
+
+ //Try to find proportional non-space and space for row.
+ lower = row->xheight * words_default_prop_nonspace;
+ upper = row->xheight * textord_words_min_minspace;
+ for (gap_index = 0; gap_index < cluster_count
+ && gaps[gap_index] < lower; gap_index++);
+ if (gap_index == 0) {
+ if (testing_on)
+ tprintf ("No clusters below nonspace threshold!!\n");
+ if (cluster_count > 1) {
+ row->pr_nonsp = gaps[0];
+ row->pr_space = gaps[1];
+ }
+ else {
+ row->pr_nonsp = lower;
+ row->pr_space = gaps[0];
+ }
+ }
+ else {
+ row->pr_nonsp = gaps[gap_index - 1];
+ while (gap_index < cluster_count && gaps[gap_index] < upper)
+ gap_index++;
+ if (gap_index == cluster_count) {
+ if (testing_on)
+ tprintf ("No clusters above nonspace threshold!!\n");
+ row->pr_space = lower * textord_spacesize_ratioprop;
+ }
+ else
+ row->pr_space = gaps[gap_index];
+ }
+
+ //Now try to find the fixed pitch space and non-space.
+ upper = row->xheight * words_default_fixed_space;
+ for (gap_index = 0; gap_index < cluster_count
+ && gaps[gap_index] < upper; gap_index++);
+ if (gap_index == 0) {
+ if (testing_on)
+ tprintf ("No clusters below space threshold!!\n");
+ row->fp_nonsp = upper;
+ row->fp_space = gaps[0];
+ }
+ else {
+ row->fp_nonsp = gaps[gap_index - 1];
+ if (gap_index == cluster_count) {
+ if (testing_on)
+ tprintf ("No clusters above space threshold!!\n");
+ row->fp_space = row->xheight;
+ }
+ else
+ row->fp_space = gaps[gap_index];
+ }
+ if (testing_on) {
+ tprintf
+ ("Initial estimates:pr_nonsp=%g, pr_space=%g, fp_nonsp=%g, fp_space=%g\n",
+ row->pr_nonsp, row->pr_space, row->fp_nonsp, row->fp_space);
+ }
+ return true; //computed some stats
+}
+
+
+/**********************************************************************
+ * find_row_pitch
+ *
+ * Check to see if this row could be fixed pitch using the given spacings.
+ * Blobs with gaps smaller than the lower threshold are assumed to be one.
+ * The larger threshold is the word gap threshold.
+ **********************************************************************/
+
+bool find_row_pitch( //find lines
+ TO_ROW* row, //row to do
+ int32_t maxwidth, //max permitted space
+ int32_t dm_gap, //ignorable gaps
+ TO_BLOCK* block, //block of row
+ int32_t block_index, //block_number
+ int32_t row_index, //number of row
+ bool testing_on //correct orientation
+) {
+ bool used_dm_model; //looks like dot matrix
+ float min_space; //estimate threshold
+ float non_space; //gap size
+ float gap_iqr; //interquartile range
+ float pitch_iqr;
+ float dm_gap_iqr; //interquartile range
+ float dm_pitch_iqr;
+ float dm_pitch; //pitch with dm on
+ float pitch; //revised estimate
+ float initial_pitch; //guess at pitch
+ STATS gap_stats (0, maxwidth);
+ //centre-centre
+ STATS pitch_stats (0, maxwidth);
+
+ row->fixed_pitch = 0.0f;
+ initial_pitch = row->fp_space;
+ if (initial_pitch > row->xheight * (1 + words_default_fixed_limit))
+ initial_pitch = row->xheight;//keep pitch decent
+ non_space = row->fp_nonsp;
+ if (non_space > initial_pitch)
+ non_space = initial_pitch;
+ min_space = (initial_pitch + non_space) / 2;
+
+ if (!count_pitch_stats (row, &gap_stats, &pitch_stats,
+ initial_pitch, min_space, true, false, dm_gap)) {
+ dm_gap_iqr = 0.0001f;
+ dm_pitch_iqr = maxwidth * 2.0f;
+ dm_pitch = initial_pitch;
+ }
+ else {
+ dm_gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
+ dm_pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
+ dm_pitch = pitch_stats.ile (0.5);
+ }
+ gap_stats.clear ();
+ pitch_stats.clear ();
+ if (!count_pitch_stats (row, &gap_stats, &pitch_stats,
+ initial_pitch, min_space, true, false, 0)) {
+ gap_iqr = 0.0001f;
+ pitch_iqr = maxwidth * 3.0f;
+ }
+ else {
+ gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
+ pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
+ if (testing_on)
+ tprintf
+ ("First fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, pitch=%g\n",
+ initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile (0.5));
+ initial_pitch = pitch_stats.ile (0.5);
+ if (min_space > initial_pitch
+ && count_pitch_stats (row, &gap_stats, &pitch_stats,
+ initial_pitch, initial_pitch, true, false, 0)) {
+ min_space = initial_pitch;
+ gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
+ pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
+ if (testing_on)
+ tprintf
+ ("Revised fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, pitch=%g\n",
+ initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile (0.5));
+ initial_pitch = pitch_stats.ile (0.5);
+ }
+ }
+ if (textord_debug_pitch_metric)
+ tprintf("Blk=%d:Row=%d:%c:p_iqr=%g:g_iqr=%g:dm_p_iqr=%g:dm_g_iqr=%g:%c:",
+ block_index, row_index, 'X',
+ pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr,
+ pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth ? 'D' :
+ (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr ? 'S' : 'M'));
+ if (pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth) {
+ row->pitch_decision = PITCH_DUNNO;
+ if (textord_debug_pitch_metric)
+ tprintf ("\n");
+ return false; //insufficient data
+ }
+ if (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr) {
+ if (testing_on)
+ tprintf
+ ("Choosing non dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, dm_gap_iqr=%g\n",
+ pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr);
+ gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25);
+ pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25);
+ pitch = pitch_stats.ile (0.5);
+ used_dm_model = false;
+ }
+ else {
+ if (testing_on)
+ tprintf
+ ("Choosing dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, dm_gap_iqr=%g\n",
+ pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr);
+ gap_iqr = dm_gap_iqr;
+ pitch_iqr = dm_pitch_iqr;
+ pitch = dm_pitch;
+ used_dm_model = true;
+ }
+ if (textord_debug_pitch_metric) {
+ tprintf ("rev_p_iqr=%g:rev_g_iqr=%g:pitch=%g:",
+ pitch_iqr, gap_iqr, pitch);
+ tprintf ("p_iqr/g=%g:p_iqr/x=%g:iqr_res=%c:",
+ pitch_iqr / gap_iqr, pitch_iqr / block->xheight,
+ pitch_iqr < gap_iqr * textord_fpiqr_ratio
+ && pitch_iqr < block->xheight * textord_max_pitch_iqr
+ && pitch < block->xheight * textord_words_default_maxspace
+ ? 'F' : 'P');
+ }
+ if (pitch_iqr < gap_iqr * textord_fpiqr_ratio
+ && pitch_iqr < block->xheight * textord_max_pitch_iqr
+ && pitch < block->xheight * textord_words_default_maxspace)
+ row->pitch_decision = PITCH_MAYBE_FIXED;
+ else
+ row->pitch_decision = PITCH_MAYBE_PROP;
+ row->fixed_pitch = pitch;
+ row->kern_size = gap_stats.ile (0.5);
+ row->min_space = static_cast<int32_t>(row->fixed_pitch + non_space) / 2;
+ if (row->min_space > row->fixed_pitch)
+ row->min_space = static_cast<int32_t>(row->fixed_pitch);
+ row->max_nonspace = row->min_space;
+ row->space_size = row->fixed_pitch;
+ row->space_threshold = (row->max_nonspace + row->min_space) / 2;
+ row->used_dm_model = used_dm_model;
+ return true;
+}
+
+
+/**********************************************************************
+ * fixed_pitch_row
+ *
+ * Check to see if this row could be fixed pitch using the given spacings.
+ * Blobs with gaps smaller than the lower threshold are assumed to be one.
+ * The larger threshold is the word gap threshold.
+ **********************************************************************/
+
+bool fixed_pitch_row(TO_ROW* row, // row to do
+ BLOCK* block,
+ int32_t block_index // block_number
+) {
+ const char *res_string; // pitch result
+ int16_t mid_cuts; // no of cheap cuts
+ float non_space; // gap size
+ float pitch_sd; // error on pitch
+ float sp_sd = 0.0f; // space sd
+
+ non_space = row->fp_nonsp;
+ if (non_space > row->fixed_pitch)
+ non_space = row->fixed_pitch;
+ POLY_BLOCK* pb = block != nullptr ? block->pdblk.poly_block() : nullptr;
+ if (textord_all_prop || (pb != nullptr && !pb->IsText())) {
+ // Set the decision to definitely proportional.
+ pitch_sd = textord_words_def_prop * row->fixed_pitch;
+ row->pitch_decision = PITCH_DEF_PROP;
+ } else {
+ pitch_sd = tune_row_pitch (row, &row->projection, row->projection_left,
+ row->projection_right,
+ (row->fixed_pitch + non_space * 3) / 4,
+ row->fixed_pitch, sp_sd, mid_cuts,
+ &row->char_cells,
+ block_index == textord_debug_block);
+ if (pitch_sd < textord_words_pitchsd_threshold * row->fixed_pitch
+ && ((pitsync_linear_version & 3) < 3
+ || ((pitsync_linear_version & 3) >= 3 && (row->used_dm_model
+ || sp_sd > 20
+ || (pitch_sd == 0 && sp_sd > 10))))) {
+ if (pitch_sd < textord_words_def_fixed * row->fixed_pitch
+ && !row->all_caps
+ && ((pitsync_linear_version & 3) < 3 || sp_sd > 20))
+ row->pitch_decision = PITCH_DEF_FIXED;
+ else
+ row->pitch_decision = PITCH_MAYBE_FIXED;
+ }
+ else if ((pitsync_linear_version & 3) < 3
+ || sp_sd > 20
+ || mid_cuts > 0
+ || pitch_sd >= textord_words_pitchsd_threshold * row->fixed_pitch) {
+ if (pitch_sd < textord_words_def_prop * row->fixed_pitch)
+ row->pitch_decision = PITCH_MAYBE_PROP;
+ else
+ row->pitch_decision = PITCH_DEF_PROP;
+ }
+ else
+ row->pitch_decision = PITCH_DUNNO;
+ }
+
+ if (textord_debug_pitch_metric) {
+ res_string = "??";
+ switch (row->pitch_decision) {
+ case PITCH_DEF_PROP:
+ res_string = "DP";
+ break;
+ case PITCH_MAYBE_PROP:
+ res_string = "MP";
+ break;
+ case PITCH_DEF_FIXED:
+ res_string = "DF";
+ break;
+ case PITCH_MAYBE_FIXED:
+ res_string = "MF";
+ break;
+ default:
+ res_string = "??";
+ }
+ tprintf (":sd/p=%g:occ=%g:init_res=%s\n",
+ pitch_sd / row->fixed_pitch, sp_sd, res_string);
+ }
+ return true;
+}
+
+
+/**********************************************************************
+ * count_pitch_stats
+ *
+ * Count up the gap and pitch stats on the block to see if it is fixed pitch.
+ * Blobs with gaps smaller than the lower threshold are assumed to be one.
+ * The larger threshold is the word gap threshold.
+ * The return value indicates whether there were any decent values to use.
+ **********************************************************************/
+
+bool count_pitch_stats( //find lines
+ TO_ROW* row, //row to do
+ STATS* gap_stats, //blob gaps
+ STATS* pitch_stats, //centre-centre stats
+ float initial_pitch, //guess at pitch
+ float min_space, //estimate space size
+ bool ignore_outsize, //discard big objects
+ bool split_outsize, //split big objects
+ int32_t dm_gap //ignorable gaps
+) {
+ bool prev_valid; //not word broken
+ BLOBNBOX *blob; //current blob
+ //blobs
+ BLOBNBOX_IT blob_it = row->blob_list ();
+ int32_t prev_right; //end of prev blob
+ int32_t prev_centre; //centre of previous blob
+ int32_t x_centre; //centre of this blob
+ int32_t blob_width; //width of blob
+ int32_t width_units; //no of widths in blob
+ float width; //blob width
+ TBOX blob_box; //bounding box
+ TBOX joined_box; //of super blob
+
+ gap_stats->clear ();
+ pitch_stats->clear ();
+ if (blob_it.empty ())
+ return false;
+ prev_valid = false;
+ prev_centre = 0;
+ prev_right = 0; // stop compiler warning
+ joined_box = blob_it.data ()->bounding_box ();
+ do {
+ blob_it.forward ();
+ blob = blob_it.data ();
+ if (!blob->joined_to_prev ()) {
+ blob_box = blob->bounding_box ();
+ if ((blob_box.left () - joined_box.right () < dm_gap
+ && !blob_it.at_first ())
+ || blob->cblob() == nullptr)
+ joined_box += blob_box; //merge blobs
+ else {
+ blob_width = joined_box.width ();
+ if (split_outsize) {
+ width_units =
+ static_cast<int32_t>(floor (static_cast<float>(blob_width) / initial_pitch + 0.5));
+ if (width_units < 1)
+ width_units = 1;
+ width_units--;
+ }
+ else if (ignore_outsize) {
+ width = static_cast<float>(blob_width) / initial_pitch;
+ width_units = width < 1 + words_default_fixed_limit
+ && width > 1 - words_default_fixed_limit ? 0 : -1;
+ }
+ else
+ width_units = 0; //everything in
+ x_centre = static_cast<int32_t>(joined_box.left ()
+ + (blob_width -
+ width_units * initial_pitch) / 2);
+ if (prev_valid && width_units >= 0) {
+ // if (width_units>0)
+ // {
+ // tprintf("wu=%d, width=%d, xc=%d, adding %d\n",
+ // width_units,blob_width,x_centre,x_centre-prev_centre);
+ // }
+ gap_stats->add (joined_box.left () - prev_right, 1);
+ pitch_stats->add (x_centre - prev_centre, 1);
+ }
+ prev_centre = static_cast<int32_t>(x_centre + width_units * initial_pitch);
+ prev_right = joined_box.right ();
+ prev_valid = blob_box.left () - joined_box.right () < min_space;
+ prev_valid = prev_valid && width_units >= 0;
+ joined_box = blob_box;
+ }
+ }
+ }
+ while (!blob_it.at_first ());
+ return gap_stats->get_total () >= 3;
+}
+
+
+/**********************************************************************
+ * tune_row_pitch
+ *
+ * Use a dp algorithm to fit the character cells and return the sd of
+ * the cell size over the row.
+ **********************************************************************/
+
+float tune_row_pitch( //find fp cells
+ TO_ROW* row, //row to do
+ STATS* projection, //vertical projection
+ int16_t projection_left, //edge of projection
+ int16_t projection_right, //edge of projection
+ float space_size, //size of blank
+ float& initial_pitch, //guess at pitch
+ float& best_sp_sd, //space sd
+ int16_t& best_mid_cuts, //no of cheap cuts
+ ICOORDELT_LIST* best_cells, //row cells
+ bool testing_on //inidividual words
+) {
+ int pitch_delta; //offset pitch
+ int16_t mid_cuts; //cheap cuts
+ float pitch_sd; //current sd
+ float best_sd; //best result
+ float best_pitch; //pitch for best result
+ float initial_sd; //starting error
+ float sp_sd; //space sd
+ ICOORDELT_LIST test_cells; //row cells
+ ICOORDELT_IT best_it; //start of best list
+
+ if (textord_fast_pitch_test)
+ return tune_row_pitch2 (row, projection, projection_left,
+ projection_right, space_size, initial_pitch,
+ best_sp_sd,
+ //space sd
+ best_mid_cuts, best_cells, testing_on);
+ if (textord_disable_pitch_test) {
+ best_sp_sd = initial_pitch;
+ return initial_pitch;
+ }
+ initial_sd =
+ compute_pitch_sd(row,
+ projection,
+ projection_left,
+ projection_right,
+ space_size,
+ initial_pitch,
+ best_sp_sd,
+ best_mid_cuts,
+ best_cells,
+ testing_on);
+ best_sd = initial_sd;
+ best_pitch = initial_pitch;
+ if (testing_on)
+ tprintf ("tune_row_pitch:start pitch=%g, sd=%g\n", best_pitch, best_sd);
+ for (pitch_delta = 1; pitch_delta <= textord_pitch_range; pitch_delta++) {
+ pitch_sd =
+ compute_pitch_sd (row, projection, projection_left, projection_right,
+ space_size, initial_pitch + pitch_delta, sp_sd,
+ mid_cuts, &test_cells, testing_on);
+ if (testing_on)
+ tprintf ("testing pitch at %g, sd=%g\n", initial_pitch + pitch_delta,
+ pitch_sd);
+ if (pitch_sd < best_sd) {
+ best_sd = pitch_sd;
+ best_mid_cuts = mid_cuts;
+ best_sp_sd = sp_sd;
+ best_pitch = initial_pitch + pitch_delta;
+ best_cells->clear ();
+ best_it.set_to_list (best_cells);
+ best_it.add_list_after (&test_cells);
+ }
+ else
+ test_cells.clear ();
+ if (pitch_sd > initial_sd)
+ break; //getting worse
+ }
+ for (pitch_delta = 1; pitch_delta <= textord_pitch_range; pitch_delta++) {
+ pitch_sd =
+ compute_pitch_sd (row, projection, projection_left, projection_right,
+ space_size, initial_pitch - pitch_delta, sp_sd,
+ mid_cuts, &test_cells, testing_on);
+ if (testing_on)
+ tprintf ("testing pitch at %g, sd=%g\n", initial_pitch - pitch_delta,
+ pitch_sd);
+ if (pitch_sd < best_sd) {
+ best_sd = pitch_sd;
+ best_mid_cuts = mid_cuts;
+ best_sp_sd = sp_sd;
+ best_pitch = initial_pitch - pitch_delta;
+ best_cells->clear ();
+ best_it.set_to_list (best_cells);
+ best_it.add_list_after (&test_cells);
+ }
+ else
+ test_cells.clear ();
+ if (pitch_sd > initial_sd)
+ break;
+ }
+ initial_pitch = best_pitch;
+
+ if (textord_debug_pitch_metric)
+ print_pitch_sd(row,
+ projection,
+ projection_left,
+ projection_right,
+ space_size,
+ best_pitch);
+
+ return best_sd;
+}
+
+
+/**********************************************************************
+ * tune_row_pitch
+ *
+ * Use a dp algorithm to fit the character cells and return the sd of
+ * the cell size over the row.
+ **********************************************************************/
+
+float tune_row_pitch2( //find fp cells
+ TO_ROW* row, //row to do
+ STATS* projection, //vertical projection
+ int16_t projection_left, //edge of projection
+ int16_t projection_right, //edge of projection
+ float space_size, //size of blank
+ float& initial_pitch, //guess at pitch
+ float& best_sp_sd, //space sd
+ int16_t& best_mid_cuts, //no of cheap cuts
+ ICOORDELT_LIST* best_cells, //row cells
+ bool testing_on //inidividual words
+) {
+ int pitch_delta; //offset pitch
+ int16_t pixel; //pixel coord
+ int16_t best_pixel; //pixel coord
+ int16_t best_delta; //best pitch
+ int16_t best_pitch; //best pitch
+ int16_t start; //of good range
+ int16_t end; //of good range
+ int32_t best_count; //lowest sum
+ float best_sd; //best result
+
+ best_sp_sd = initial_pitch;
+
+ best_pitch = static_cast<int>(initial_pitch);
+ if (textord_disable_pitch_test || best_pitch <= textord_pitch_range) {
+ return initial_pitch;
+ }
+ std::unique_ptr<STATS[]> sum_proj(new STATS[textord_pitch_range * 2 + 1]); //summed projection
+
+ for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range;
+ pitch_delta++)
+ sum_proj[textord_pitch_range + pitch_delta].set_range (0,
+ best_pitch +
+ pitch_delta + 1);
+ for (pixel = projection_left; pixel <= projection_right; pixel++) {
+ for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range;
+ pitch_delta++) {
+ sum_proj[textord_pitch_range + pitch_delta].add(
+ (pixel - projection_left) % (best_pitch + pitch_delta),
+ projection->pile_count(pixel));
+ }
+ }
+ best_count = sum_proj[textord_pitch_range].pile_count (0);
+ best_delta = 0;
+ best_pixel = 0;
+ for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range;
+ pitch_delta++) {
+ for (pixel = 0; pixel < best_pitch + pitch_delta; pixel++) {
+ if (sum_proj[textord_pitch_range + pitch_delta].pile_count (pixel)
+ < best_count) {
+ best_count =
+ sum_proj[textord_pitch_range +
+ pitch_delta].pile_count (pixel);
+ best_delta = pitch_delta;
+ best_pixel = pixel;
+ }
+ }
+ }
+ if (testing_on)
+ tprintf ("tune_row_pitch:start pitch=%g, best_delta=%d, count=%d\n",
+ initial_pitch, best_delta, best_count);
+ best_pitch += best_delta;
+ initial_pitch = best_pitch;
+ best_count++;
+ best_count += best_count;
+ for (start = best_pixel - 2; start > best_pixel - best_pitch
+ && sum_proj[textord_pitch_range +
+ best_delta].pile_count (start % best_pitch) <= best_count;
+ start--);
+ for (end = best_pixel + 2;
+ end < best_pixel + best_pitch
+ && sum_proj[textord_pitch_range +
+ best_delta].pile_count (end % best_pitch) <= best_count;
+ end++);
+
+ best_sd =
+ compute_pitch_sd(row,
+ projection,
+ projection_left,
+ projection_right,
+ space_size,
+ initial_pitch,
+ best_sp_sd,
+ best_mid_cuts,
+ best_cells,
+ testing_on,
+ start,
+ end);
+ if (testing_on)
+ tprintf ("tune_row_pitch:output pitch=%g, sd=%g\n", initial_pitch,
+ best_sd);
+
+ if (textord_debug_pitch_metric)
+ print_pitch_sd(row,
+ projection,
+ projection_left,
+ projection_right,
+ space_size,
+ initial_pitch);
+
+ return best_sd;
+}
+
+
+/**********************************************************************
+ * compute_pitch_sd
+ *
+ * Use a dp algorithm to fit the character cells and return the sd of
+ * the cell size over the row.
+ **********************************************************************/
+
+float compute_pitch_sd( //find fp cells
+ TO_ROW* row, //row to do
+ STATS* projection, //vertical projection
+ int16_t projection_left, //edge
+ int16_t projection_right, //edge
+ float space_size, //size of blank
+ float initial_pitch, //guess at pitch
+ float& sp_sd, //space sd
+ int16_t& mid_cuts, //no of free cuts
+ ICOORDELT_LIST* row_cells, //list of chop pts
+ bool testing_on, //inidividual words
+ int16_t start, //start of good range
+ int16_t end //end of good range
+) {
+ int16_t occupation; //no of cells in word.
+ //blobs
+ BLOBNBOX_IT blob_it = row->blob_list ();
+ BLOBNBOX_IT start_it; //start of word
+ BLOBNBOX_IT plot_it; //for plotting
+ int16_t blob_count; //no of blobs
+ TBOX blob_box; //bounding box
+ TBOX prev_box; //of super blob
+ int32_t prev_right; //of word sync
+ int scale_factor; //on scores for big words
+ int32_t sp_count; //spaces
+ FPSEGPT_LIST seg_list; //char cells
+ FPSEGPT_IT seg_it; //iterator
+ int16_t segpos; //position of segment
+ int16_t cellpos; //previous cell boundary
+ //iterator
+ ICOORDELT_IT cell_it = row_cells;
+ ICOORDELT *cell; //new cell
+ double sqsum; //sum of squares
+ double spsum; //of spaces
+ double sp_var; //space error
+ double word_sync; //result for word
+ int32_t total_count; //total blobs
+
+ if ((pitsync_linear_version & 3) > 1) {
+ word_sync = compute_pitch_sd2 (row, projection, projection_left,
+ projection_right, initial_pitch,
+ occupation, mid_cuts, row_cells,
+ testing_on, start, end);
+ sp_sd = occupation;
+ return word_sync;
+ }
+ mid_cuts = 0;
+ cellpos = 0;
+ total_count = 0;
+ sqsum = 0;
+ sp_count = 0;
+ spsum = 0;
+ prev_right = -1;
+ if (blob_it.empty ())
+ return space_size * 10;
+#ifndef GRAPHICS_DISABLED
+ if (testing_on && to_win != nullptr) {
+ blob_box = blob_it.data ()->bounding_box ();
+ projection->plot (to_win, projection_left,
+ row->intercept (), 1.0f, -1.0f, ScrollView::CORAL);
+ }
+#endif
+ start_it = blob_it;
+ blob_count = 0;
+ blob_box = box_next (&blob_it);//first blob
+ blob_it.mark_cycle_pt ();
+ do {
+ for (; blob_count > 0; blob_count--)
+ box_next(&start_it);
+ do {
+ prev_box = blob_box;
+ blob_count++;
+ blob_box = box_next (&blob_it);
+ }
+ while (!blob_it.cycled_list ()
+ && blob_box.left () - prev_box.right () < space_size);
+ plot_it = start_it;
+ if (pitsync_linear_version & 3)
+ word_sync =
+ check_pitch_sync2 (&start_it, blob_count, static_cast<int16_t>(initial_pitch), 2,
+ projection, projection_left, projection_right,
+ row->xheight * textord_projection_scale,
+ occupation, &seg_list, start, end);
+ else
+ word_sync =
+ check_pitch_sync (&start_it, blob_count, static_cast<int16_t>(initial_pitch), 2,
+ projection, &seg_list);
+ if (testing_on) {
+ tprintf ("Word ending at (%d,%d), len=%d, sync rating=%g, ",
+ prev_box.right (), prev_box.top (),
+ seg_list.length () - 1, word_sync);
+ seg_it.set_to_list (&seg_list);
+ for (seg_it.mark_cycle_pt (); !seg_it.cycled_list ();
+ seg_it.forward ()) {
+ if (seg_it.data ()->faked)
+ tprintf ("(F)");
+ tprintf ("%d, ", seg_it.data ()->position ());
+ // tprintf("C=%g, s=%g, sq=%g\n",
+ // seg_it.data()->cost_function(),
+ // seg_it.data()->sum(),
+ // seg_it.data()->squares());
+ }
+ tprintf ("\n");
+ }
+#ifndef GRAPHICS_DISABLED
+ if (textord_show_fixed_cuts && blob_count > 0 && to_win != nullptr)
+ plot_fp_cells2(to_win, ScrollView::GOLDENROD, row, &seg_list);
+#endif
+ seg_it.set_to_list (&seg_list);
+ if (prev_right >= 0) {
+ sp_var = seg_it.data ()->position () - prev_right;
+ sp_var -= floor (sp_var / initial_pitch + 0.5) * initial_pitch;
+ sp_var *= sp_var;
+ spsum += sp_var;
+ sp_count++;
+ }
+ for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
+ segpos = seg_it.data ()->position ();
+ if (cell_it.empty () || segpos > cellpos + initial_pitch / 2) {
+ //big gap
+ while (!cell_it.empty () && segpos > cellpos + initial_pitch * 3 / 2) {
+ cell = new ICOORDELT (cellpos + static_cast<int16_t>(initial_pitch), 0);
+ cell_it.add_after_then_move (cell);
+ cellpos += static_cast<int16_t>(initial_pitch);
+ }
+ //make new one
+ cell = new ICOORDELT (segpos, 0);
+ cell_it.add_after_then_move (cell);
+ cellpos = segpos;
+ }
+ else if (segpos > cellpos - initial_pitch / 2) {
+ cell = cell_it.data ();
+ //average positions
+ cell->set_x ((cellpos + segpos) / 2);
+ cellpos = cell->x ();
+ }
+ }
+ seg_it.move_to_last ();
+ prev_right = seg_it.data ()->position ();
+ if (textord_pitch_scalebigwords) {
+ scale_factor = (seg_list.length () - 2) / 2;
+ if (scale_factor < 1)
+ scale_factor = 1;
+ }
+ else
+ scale_factor = 1;
+ sqsum += word_sync * scale_factor;
+ total_count += (seg_list.length () - 1) * scale_factor;
+ seg_list.clear ();
+ }
+ while (!blob_it.cycled_list ());
+ sp_sd = sp_count > 0 ? sqrt (spsum / sp_count) : 0;
+ return total_count > 0 ? sqrt (sqsum / total_count) : space_size * 10;
+}
+
+
+/**********************************************************************
+ * compute_pitch_sd2
+ *
+ * Use a dp algorithm to fit the character cells and return the sd of
+ * the cell size over the row.
+ **********************************************************************/
+
+float compute_pitch_sd2( //find fp cells
+ TO_ROW* row, //row to do
+ STATS* projection, //vertical projection
+ int16_t projection_left, //edge
+ int16_t projection_right, //edge
+ float initial_pitch, //guess at pitch
+ int16_t& occupation, //no of occupied cells
+ int16_t& mid_cuts, //no of free cuts
+ ICOORDELT_LIST* row_cells, //list of chop pts
+ bool testing_on, //inidividual words
+ int16_t start, //start of good range
+ int16_t end //end of good range
+) {
+ //blobs
+ BLOBNBOX_IT blob_it = row->blob_list ();
+ BLOBNBOX_IT plot_it;
+ int16_t blob_count; //no of blobs
+ TBOX blob_box; //bounding box
+ FPSEGPT_LIST seg_list; //char cells
+ FPSEGPT_IT seg_it; //iterator
+ int16_t segpos; //position of segment
+ //iterator
+ ICOORDELT_IT cell_it = row_cells;
+ ICOORDELT *cell; //new cell
+ double word_sync; //result for word
+
+ mid_cuts = 0;
+ if (blob_it.empty ()) {
+ occupation = 0;
+ return initial_pitch * 10;
+ }
+#ifndef GRAPHICS_DISABLED
+ if (testing_on && to_win != nullptr) {
+ projection->plot (to_win, projection_left,
+ row->intercept (), 1.0f, -1.0f, ScrollView::CORAL);
+ }
+#endif
+ blob_count = 0;
+ blob_it.mark_cycle_pt ();
+ do {
+ //first blob
+ blob_box = box_next (&blob_it);
+ blob_count++;
+ }
+ while (!blob_it.cycled_list ());
+ plot_it = blob_it;
+ word_sync = check_pitch_sync2 (&blob_it, blob_count, static_cast<int16_t>(initial_pitch),
+ 2, projection, projection_left,
+ projection_right,
+ row->xheight * textord_projection_scale,
+ occupation, &seg_list, start, end);
+ if (testing_on) {
+ tprintf ("Row ending at (%d,%d), len=%d, sync rating=%g, ",
+ blob_box.right (), blob_box.top (),
+ seg_list.length () - 1, word_sync);
+ seg_it.set_to_list (&seg_list);
+ for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
+ if (seg_it.data ()->faked)
+ tprintf ("(F)");
+ tprintf ("%d, ", seg_it.data ()->position ());
+ // tprintf("C=%g, s=%g, sq=%g\n",
+ // seg_it.data()->cost_function(),
+ // seg_it.data()->sum(),
+ // seg_it.data()->squares());
+ }
+ tprintf ("\n");
+ }
+#ifndef GRAPHICS_DISABLED
+ if (textord_show_fixed_cuts && blob_count > 0 && to_win != nullptr)
+ plot_fp_cells2(to_win, ScrollView::GOLDENROD, row, &seg_list);
+#endif
+ seg_it.set_to_list (&seg_list);
+ for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
+ segpos = seg_it.data ()->position ();
+ //make new one
+ cell = new ICOORDELT (segpos, 0);
+ cell_it.add_after_then_move (cell);
+ if (seg_it.at_last ())
+ mid_cuts = seg_it.data ()->cheap_cuts ();
+ }
+ seg_list.clear ();
+ return occupation > 0 ? sqrt (word_sync / occupation) : initial_pitch * 10;
+}
+
+
+/**********************************************************************
+ * print_pitch_sd
+ *
+ * Use a dp algorithm to fit the character cells and return the sd of
+ * the cell size over the row.
+ **********************************************************************/
+
+void print_pitch_sd( //find fp cells
+ TO_ROW *row, //row to do
+ STATS *projection, //vertical projection
+ int16_t projection_left, //edges //size of blank
+ int16_t projection_right,
+ float space_size,
+ float initial_pitch //guess at pitch
+ ) {
+ const char *res2; //pitch result
+ int16_t occupation; //used cells
+ float sp_sd; //space sd
+ //blobs
+ BLOBNBOX_IT blob_it = row->blob_list ();
+ BLOBNBOX_IT start_it; //start of word
+ BLOBNBOX_IT row_start; //start of row
+ int16_t blob_count; //no of blobs
+ int16_t total_blob_count; //total blobs in line
+ TBOX blob_box; //bounding box
+ TBOX prev_box; //of super blob
+ int32_t prev_right; //of word sync
+ int scale_factor; //on scores for big words
+ int32_t sp_count; //spaces
+ FPSEGPT_LIST seg_list; //char cells
+ FPSEGPT_IT seg_it; //iterator
+ double sqsum; //sum of squares
+ double spsum; //of spaces
+ double sp_var; //space error
+ double word_sync; //result for word
+ double total_count; //total cuts
+
+ if (blob_it.empty ())
+ return;
+ row_start = blob_it;
+ total_blob_count = 0;
+
+ total_count = 0;
+ sqsum = 0;
+ sp_count = 0;
+ spsum = 0;
+ prev_right = -1;
+ blob_it = row_start;
+ start_it = blob_it;
+ blob_count = 0;
+ blob_box = box_next (&blob_it);//first blob
+ blob_it.mark_cycle_pt ();
+ do {
+ for (; blob_count > 0; blob_count--)
+ box_next(&start_it);
+ do {
+ prev_box = blob_box;
+ blob_count++;
+ blob_box = box_next (&blob_it);
+ }
+ while (!blob_it.cycled_list ()
+ && blob_box.left () - prev_box.right () < space_size);
+ word_sync =
+ check_pitch_sync2 (&start_it, blob_count, static_cast<int16_t>(initial_pitch), 2,
+ projection, projection_left, projection_right,
+ row->xheight * textord_projection_scale,
+ occupation, &seg_list, 0, 0);
+ total_blob_count += blob_count;
+ seg_it.set_to_list (&seg_list);
+ if (prev_right >= 0) {
+ sp_var = seg_it.data ()->position () - prev_right;
+ sp_var -= floor (sp_var / initial_pitch + 0.5) * initial_pitch;
+ sp_var *= sp_var;
+ spsum += sp_var;
+ sp_count++;
+ }
+ seg_it.move_to_last ();
+ prev_right = seg_it.data ()->position ();
+ if (textord_pitch_scalebigwords) {
+ scale_factor = (seg_list.length () - 2) / 2;
+ if (scale_factor < 1)
+ scale_factor = 1;
+ }
+ else
+ scale_factor = 1;
+ sqsum += word_sync * scale_factor;
+ total_count += (seg_list.length () - 1) * scale_factor;
+ seg_list.clear ();
+ }
+ while (!blob_it.cycled_list ());
+ sp_sd = sp_count > 0 ? sqrt (spsum / sp_count) : 0;
+ word_sync = total_count > 0 ? sqrt (sqsum / total_count) : space_size * 10;
+ tprintf ("new_sd=%g:sd/p=%g:new_sp_sd=%g:res=%c:",
+ word_sync, word_sync / initial_pitch, sp_sd,
+ word_sync < textord_words_pitchsd_threshold * initial_pitch
+ ? 'F' : 'P');
+
+ start_it = row_start;
+ blob_it = row_start;
+ word_sync =
+ check_pitch_sync2 (&blob_it, total_blob_count, static_cast<int16_t>(initial_pitch), 2,
+ projection, projection_left, projection_right,
+ row->xheight * textord_projection_scale, occupation,
+ &seg_list, 0, 0);
+ if (occupation > 1)
+ word_sync /= occupation;
+ word_sync = sqrt (word_sync);
+
+#ifndef GRAPHICS_DISABLED
+ if (textord_show_row_cuts && to_win != nullptr)
+ plot_fp_cells2(to_win, ScrollView::CORAL, row, &seg_list);
+#endif
+ seg_list.clear ();
+ if (word_sync < textord_words_pitchsd_threshold * initial_pitch) {
+ if (word_sync < textord_words_def_fixed * initial_pitch
+ && !row->all_caps)
+ res2 = "DF";
+ else
+ res2 = "MF";
+ }
+ else
+ res2 = word_sync < textord_words_def_prop * initial_pitch ? "MP" : "DP";
+ tprintf
+ ("row_sd=%g:sd/p=%g:res=%c:N=%d:res2=%s,init pitch=%g, row_pitch=%g, all_caps=%d\n",
+ word_sync, word_sync / initial_pitch,
+ word_sync < textord_words_pitchsd_threshold * initial_pitch ? 'F' : 'P',
+ occupation, res2, initial_pitch, row->fixed_pitch, row->all_caps);
+}
+
+/**********************************************************************
+ * find_repeated_chars
+ *
+ * Extract marked leader blobs and put them
+ * into words in advance of fixed pitch checking and word generation.
+ **********************************************************************/
+void find_repeated_chars(TO_BLOCK* block, // Block to search.
+ bool testing_on) { // Debug mode.
+ POLY_BLOCK* pb = block->block->pdblk.poly_block();
+ if (pb != nullptr && !pb->IsText())
+ return; // Don't find repeated chars in non-text blocks.
+
+ TO_ROW *row;
+ BLOBNBOX_IT box_it;
+ BLOBNBOX_IT search_it; // forward search
+ WERD *word; // new word
+ TBOX word_box; // for plotting
+ int blobcount, repeated_set;
+
+ TO_ROW_IT row_it = block->get_rows();
+ if (row_it.empty()) return; // empty block
+ for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+ row = row_it.data();
+ box_it.set_to_list(row->blob_list());
+ if (box_it.empty()) continue; // no blobs in this row
+ if (!row->rep_chars_marked()) {
+ mark_repeated_chars(row);
+ }
+ if (row->num_repeated_sets() == 0) continue; // nothing to do for this row
+ // new words
+ WERD_IT word_it(&row->rep_words);
+ do {
+ if (box_it.data()->repeated_set() != 0 &&
+ !box_it.data()->joined_to_prev()) {
+ blobcount = 1;
+ repeated_set = box_it.data()->repeated_set();
+ search_it = box_it;
+ search_it.forward();
+ while (!search_it.at_first() &&
+ search_it.data()->repeated_set() == repeated_set) {
+ blobcount++;
+ search_it.forward();
+ }
+ // After the call to make_real_word() all the blobs from this
+ // repeated set will be removed from the blob list. box_it will be
+ // set to point to the blob after the end of the extracted sequence.
+ word = make_real_word(&box_it, blobcount, box_it.at_first(), 1);
+ if (!box_it.empty() && box_it.data()->joined_to_prev()) {
+ tprintf("Bad box joined to prev at");
+ box_it.data()->bounding_box().print();
+ tprintf("After repeated word:");
+ word->bounding_box().print();
+ }
+ ASSERT_HOST(box_it.empty() || !box_it.data()->joined_to_prev());
+ word->set_flag(W_REP_CHAR, true);
+ word->set_flag(W_DONT_CHOP, true);
+ word_it.add_after_then_move(word);
+ } else {
+ box_it.forward();
+ }
+ } while (!box_it.at_first());
+ }
+}
+
+
+/**********************************************************************
+ * plot_fp_word
+ *
+ * Plot a block of words as if fixed pitch.
+ **********************************************************************/
+
+#ifndef GRAPHICS_DISABLED
+void plot_fp_word( //draw block of words
+ TO_BLOCK *block, //block to draw
+ float pitch, //pitch to draw with
+ float nonspace //for space threshold
+ ) {
+ TO_ROW *row; //current row
+ TO_ROW_IT row_it = block->get_rows ();
+
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+ row = row_it.data ();
+ row->min_space = static_cast<int32_t>((pitch + nonspace) / 2);
+ row->max_nonspace = row->min_space;
+ row->space_threshold = row->min_space;
+ plot_word_decisions (to_win, static_cast<int16_t>(pitch), row);
+ }
+}
+#endif
+
+} // namespace tesseract
diff --git a/tesseract/src/textord/topitch.h b/tesseract/src/textord/topitch.h
new file mode 100644
index 00000000..39b239f2
--- /dev/null
+++ b/tesseract/src/textord/topitch.h
@@ -0,0 +1,191 @@
+/**********************************************************************
+ * File: topitch.h (Formerly to_pitch.h)
+ * Description: Code to determine fixed pitchness and the pitch if fixed.
+ * Author: Ray Smith
+ *
+ * (C) Copyright 1993, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef TOPITCH_H
+#define TOPITCH_H
+
+#include "blobbox.h"
+
+namespace tesseract {
+
+class Tesseract;
+
+extern BOOL_VAR_H (textord_debug_pitch_test, false,
+"Debug on fixed pitch test");
+extern BOOL_VAR_H (textord_debug_pitch_metric, false,
+"Write full metric stuff");
+extern BOOL_VAR_H (textord_show_row_cuts, false, "Draw row-level cuts");
+extern BOOL_VAR_H (textord_show_page_cuts, false, "Draw page-level cuts");
+extern BOOL_VAR_H (textord_pitch_cheat, false,
+"Use correct answer for fixed/prop");
+extern BOOL_VAR_H (textord_blockndoc_fixed, true,
+"Attempt whole doc/block fixed pitch");
+extern BOOL_VAR_H (textord_fast_pitch_test, false,
+"Do even faster pitch algorithm");
+extern double_VAR_H (textord_projection_scale, 0.125,
+"Ding rate for mid-cuts");
+extern double_VAR_H (textord_balance_factor, 2.0,
+"Ding rate for unbalanced char cells");
+
+void compute_fixed_pitch(ICOORD page_tr, // top right
+ TO_BLOCK_LIST* port_blocks, // input list
+ float gradient, // page skew
+ FCOORD rotation, // for drawing
+ bool testing_on); // correct orientation
+void fix_row_pitch( //get some value
+ TO_ROW *bad_row, //row to fix
+ TO_BLOCK *bad_block, //block of bad_row
+ TO_BLOCK_LIST *blocks, //blocks to scan
+ int32_t row_target, //number of row
+ int32_t block_target //number of block
+ );
+void compute_block_pitch(TO_BLOCK* block, // input list
+ FCOORD rotation, // for drawing
+ int32_t block_index, // block number
+ bool testing_on); // correct orientation
+bool compute_rows_pitch( //find line stats
+ TO_BLOCK* block, //block to do
+ int32_t block_index, //block number
+ bool testing_on //correct orientation
+);
+bool try_doc_fixed( //determine pitch
+ ICOORD page_tr, //top right
+ TO_BLOCK_LIST* port_blocks, //input list
+ float gradient //page skew
+);
+bool try_block_fixed( //find line stats
+ TO_BLOCK* block, //block to do
+ int32_t block_index //block number
+);
+bool try_rows_fixed( //find line stats
+ TO_BLOCK* block, //block to do
+ int32_t block_index, //block number
+ bool testing_on //correct orientation
+);
+void print_block_counts( //find line stats
+ TO_BLOCK *block, //block to do
+ int32_t block_index //block number
+ );
+void count_block_votes( //find line stats
+ TO_BLOCK *block, //block to do
+ int32_t &def_fixed, //add to counts
+ int32_t &def_prop,
+ int32_t &maybe_fixed,
+ int32_t &maybe_prop,
+ int32_t &corr_fixed,
+ int32_t &corr_prop,
+ int32_t &dunno);
+bool row_pitch_stats( //find line stats
+ TO_ROW* row, //current row
+ int32_t maxwidth, //of spaces
+ bool testing_on //correct orientation
+);
+bool find_row_pitch( //find lines
+ TO_ROW* row, //row to do
+ int32_t maxwidth, //max permitted space
+ int32_t dm_gap, //ignorable gaps
+ TO_BLOCK* block, //block of row
+ int32_t block_index, //block_number
+ int32_t row_index, //number of row
+ bool testing_on //correct orientation
+);
+bool fixed_pitch_row( //find lines
+ TO_ROW* row, //row to do
+ BLOCK* block,
+ int32_t block_index //block_number
+);
+bool count_pitch_stats( //find lines
+ TO_ROW* row, //row to do
+ STATS* gap_stats, //blob gaps
+ STATS* pitch_stats, //centre-centre stats
+ float initial_pitch, //guess at pitch
+ float min_space, //estimate space size
+ bool ignore_outsize, //discard big objects
+ bool split_outsize, //split big objects
+ int32_t dm_gap //ignorable gaps
+);
+float tune_row_pitch( //find fp cells
+ TO_ROW* row, //row to do
+ STATS* projection, //vertical projection
+ int16_t projection_left, //edge of projection
+ int16_t projection_right, //edge of projection
+ float space_size, //size of blank
+ float& initial_pitch, //guess at pitch
+ float& best_sp_sd, //space sd
+ int16_t& best_mid_cuts, //no of cheap cuts
+ ICOORDELT_LIST* best_cells, //row cells
+ bool testing_on //inidividual words
+);
+float tune_row_pitch2( //find fp cells
+ TO_ROW* row, //row to do
+ STATS* projection, //vertical projection
+ int16_t projection_left, //edge of projection
+ int16_t projection_right, //edge of projection
+ float space_size, //size of blank
+ float& initial_pitch, //guess at pitch
+ float& best_sp_sd, //space sd
+ int16_t& best_mid_cuts, //no of cheap cuts
+ ICOORDELT_LIST* best_cells, //row cells
+ bool testing_on //inidividual words
+);
+float compute_pitch_sd( //find fp cells
+ TO_ROW* row, //row to do
+ STATS* projection, //vertical projection
+ int16_t projection_left, //edge
+ int16_t projection_right, //edge
+ float space_size, //size of blank
+ float initial_pitch, //guess at pitch
+ float& sp_sd, //space sd
+ int16_t& mid_cuts, //no of free cuts
+ ICOORDELT_LIST* row_cells, //list of chop pts
+ bool testing_on, //inidividual words
+ int16_t start = 0, //start of good range
+ int16_t end = 0 //end of good range
+);
+float compute_pitch_sd2( //find fp cells
+ TO_ROW* row, //row to do
+ STATS* projection, //vertical projection
+ int16_t projection_left, //edge
+ int16_t projection_right, //edge
+ float initial_pitch, //guess at pitch
+ int16_t& occupation, //no of occupied cells
+ int16_t& mid_cuts, //no of free cuts
+ ICOORDELT_LIST* row_cells, //list of chop pts
+ bool testing_on, //inidividual words
+ int16_t start = 0, //start of good range
+ int16_t end = 0 //end of good range
+);
+void print_pitch_sd( //find fp cells
+ TO_ROW *row, //row to do
+ STATS *projection, //vertical projection
+ int16_t projection_left, //edges //size of blank
+ int16_t projection_right,
+ float space_size,
+ float initial_pitch //guess at pitch
+ );
+void find_repeated_chars(TO_BLOCK* block, // Block to search.
+ bool testing_on); // Debug mode.
+void plot_fp_word( //draw block of words
+ TO_BLOCK *block, //block to draw
+ float pitch, //pitch to draw with
+ float nonspace //for space threshold
+ );
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/textord/tordmain.cpp b/tesseract/src/textord/tordmain.cpp
new file mode 100644
index 00000000..7f91b1ff
--- /dev/null
+++ b/tesseract/src/textord/tordmain.cpp
@@ -0,0 +1,994 @@
+/**********************************************************************
+ * File: tordmain.cpp (Formerly textordp.c)
+ * Description: C++ top level textord code.
+ * Author: Ray Smith
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#define _USE_MATH_DEFINES // for M_PI
+
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "tordmain.h"
+
+#include "arrayaccess.h" // for GET_DATA_BYTE
+#include "blobbox.h" // for BLOBNBOX_IT, BLOBNBOX, TO_BLOCK, TO_B...
+#include "ccstruct.h" // for CCStruct, CCStruct::kXHeightFraction
+#include "clst.h" // for CLISTIZE
+#include "coutln.h" // for C_OUTLINE_IT, C_OUTLINE_LIST, C_OUTLINE
+#include "drawtord.h" // for plot_box_list, to_win, create_to_win
+#include "edgblob.h" // for extract_edges
+#include "errcode.h" // for ASSERT_HOST, ...
+#include "makerow.h" // for textord_test_x, textord_test_y, texto...
+#include "ocrblock.h" // for BLOCK_IT, BLOCK, BLOCK_LIST (ptr only)
+#include "ocrrow.h" // for ROW, ROW_IT, ROW_LIST, tweak_row_base...
+#include "params.h" // for DoubleParam, BoolParam, IntParam
+#include "pdblock.h" // for PDBLK
+#include "points.h" // for FCOORD, ICOORD
+#include "polyblk.h" // for POLY_BLOCK
+#include "quadratc.h" // for QUAD_COEFFS
+#include "quspline.h" // for QSPLINE, tweak_row_baseline
+#include "rect.h" // for TBOX
+#include "scrollview.h" // for ScrollView, ScrollView::WHITE
+#include "statistc.h" // for STATS
+#include "stepblob.h" // for C_BLOB_IT, C_BLOB, C_BLOB_LIST
+#include "textord.h" // for Textord, WordWithBox, WordGrid, WordS...
+#include "tprintf.h" // for tprintf
+#include "werd.h" // for WERD_IT, WERD, WERD_LIST, W_DONT_CHOP
+
+#include "genericvector.h" // for PointerVector, GenericVector
+
+#include "allheaders.h" // for pixDestroy, pixGetHeight, boxCreate
+
+#include <cfloat> // for FLT_MAX
+#include <cmath> // for ceil, floor, M_PI
+#include <cstdint> // for INT16_MAX, uint32_t, int32_t, int16_t
+
+namespace tesseract {
+
+#define MAX_NEAREST_DIST 600 //for block skew stats
+
+CLISTIZE(WordWithBox)
+
+/**********************************************************************
+ * SetBlobStrokeWidth
+ *
+ * Set the horizontal and vertical stroke widths in the blob.
+ **********************************************************************/
+void SetBlobStrokeWidth(Pix* pix, BLOBNBOX* blob) {
+ // Cut the blob rectangle into a Pix.
+ int pix_height = pixGetHeight(pix);
+ const TBOX& box = blob->bounding_box();
+ int width = box.width();
+ int height = box.height();
+ Box* blob_pix_box = boxCreate(box.left(), pix_height - box.top(),
+ width, height);
+ Pix* pix_blob = pixClipRectangle(pix, blob_pix_box, nullptr);
+ boxDestroy(&blob_pix_box);
+ Pix* dist_pix = pixDistanceFunction(pix_blob, 4, 8, L_BOUNDARY_BG);
+ pixDestroy(&pix_blob);
+ // Compute the stroke widths.
+ uint32_t* data = pixGetData(dist_pix);
+ int wpl = pixGetWpl(dist_pix);
+ // Horizontal width of stroke.
+ STATS h_stats(0, width + 1);
+ for (int y = 0; y < height; ++y) {
+ uint32_t* pixels = data + y*wpl;
+ int prev_pixel = 0;
+ int pixel = GET_DATA_BYTE(pixels, 0);
+ for (int x = 1; x < width; ++x) {
+ int next_pixel = GET_DATA_BYTE(pixels, x);
+ // We are looking for a pixel that is equal to its vertical neighbours,
+ // yet greater than its left neighbour.
+ if (prev_pixel < pixel &&
+ (y == 0 || pixel == GET_DATA_BYTE(pixels - wpl, x - 1)) &&
+ (y == height - 1 || pixel == GET_DATA_BYTE(pixels + wpl, x - 1))) {
+ if (pixel > next_pixel) {
+ // Single local max, so an odd width.
+ h_stats.add(pixel * 2 - 1, 1);
+ } else if (pixel == next_pixel && x + 1 < width &&
+ pixel > GET_DATA_BYTE(pixels, x + 1)) {
+ // Double local max, so an even width.
+ h_stats.add(pixel * 2, 1);
+ }
+ }
+ prev_pixel = pixel;
+ pixel = next_pixel;
+ }
+ }
+ // Vertical width of stroke.
+ STATS v_stats(0, height + 1);
+ for (int x = 0; x < width; ++x) {
+ int prev_pixel = 0;
+ int pixel = GET_DATA_BYTE(data, x);
+ for (int y = 1; y < height; ++y) {
+ uint32_t* pixels = data + y*wpl;
+ int next_pixel = GET_DATA_BYTE(pixels, x);
+ // We are looking for a pixel that is equal to its horizontal neighbours,
+ // yet greater than its upper neighbour.
+ if (prev_pixel < pixel &&
+ (x == 0 || pixel == GET_DATA_BYTE(pixels - wpl, x - 1)) &&
+ (x == width - 1 || pixel == GET_DATA_BYTE(pixels - wpl, x + 1))) {
+ if (pixel > next_pixel) {
+ // Single local max, so an odd width.
+ v_stats.add(pixel * 2 - 1, 1);
+ } else if (pixel == next_pixel && y + 1 < height &&
+ pixel > GET_DATA_BYTE(pixels + wpl, x)) {
+ // Double local max, so an even width.
+ v_stats.add(pixel * 2, 1);
+ }
+ }
+ prev_pixel = pixel;
+ pixel = next_pixel;
+ }
+ }
+ pixDestroy(&dist_pix);
+ // Store the horizontal and vertical width in the blob, keeping both
+ // widths if there is enough information, otherwise only the one with
+ // the most samples.
+ // If there are insufficient samples, store zero, rather than using
+ // 2*area/perimeter, as the numbers that gives do not match the numbers
+ // from the distance method.
+ if (h_stats.get_total() >= (width + height) / 4) {
+ blob->set_horz_stroke_width(h_stats.ile(0.5f));
+ if (v_stats.get_total() >= (width + height) / 4)
+ blob->set_vert_stroke_width(v_stats.ile(0.5f));
+ else
+ blob->set_vert_stroke_width(0.0f);
+ } else {
+ if (v_stats.get_total() >= (width + height) / 4 ||
+ v_stats.get_total() > h_stats.get_total()) {
+ blob->set_horz_stroke_width(0.0f);
+ blob->set_vert_stroke_width(v_stats.ile(0.5f));
+ } else {
+ blob->set_horz_stroke_width(h_stats.get_total() > 2 ? h_stats.ile(0.5f)
+ : 0.0f);
+ blob->set_vert_stroke_width(0.0f);
+ }
+ }
+}
+
+/**********************************************************************
+ * assign_blobs_to_blocks2
+ *
+ * Make a list of TO_BLOCKs for portrait and landscape orientation.
+ **********************************************************************/
+
+void assign_blobs_to_blocks2(Pix* pix,
+ BLOCK_LIST *blocks, // blocks to process
+ TO_BLOCK_LIST *port_blocks) { // output list
+ BLOCK *block; // current block
+ BLOBNBOX *newblob; // created blob
+ C_BLOB *blob; // current blob
+ BLOCK_IT block_it = blocks;
+ C_BLOB_IT blob_it; // iterator
+ BLOBNBOX_IT port_box_it; // iterator
+ // destination iterator
+ TO_BLOCK_IT port_block_it = port_blocks;
+ TO_BLOCK *port_block; // created block
+
+ for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
+ block = block_it.data();
+ port_block = new TO_BLOCK(block);
+
+ // Convert the good outlines to block->blob_list
+ port_box_it.set_to_list(&port_block->blobs);
+ blob_it.set_to_list(block->blob_list());
+ for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+ blob = blob_it.extract();
+ newblob = new BLOBNBOX(blob); // Convert blob to BLOBNBOX.
+ SetBlobStrokeWidth(pix, newblob);
+ port_box_it.add_after_then_move(newblob);
+ }
+
+ // Put the rejected outlines in block->noise_blobs, which allows them to
+ // be reconsidered and sorted back into rows and recover outlines mistakenly
+ // rejected.
+ port_box_it.set_to_list(&port_block->noise_blobs);
+ blob_it.set_to_list(block->reject_blobs());
+ for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+ blob = blob_it.extract();
+ newblob = new BLOBNBOX(blob); // Convert blob to BLOBNBOX.
+ SetBlobStrokeWidth(pix, newblob);
+ port_box_it.add_after_then_move(newblob);
+ }
+
+ port_block_it.add_after_then_move(port_block);
+ }
+}
+
+/**********************************************************************
+ * find_components
+ *
+ * Find the C_OUTLINEs of the connected components in each block, put them
+ * in C_BLOBs, and filter them by size, putting the different size
+ * grades on different lists in the matching TO_BLOCK in to_blocks.
+ **********************************************************************/
+
+void Textord::find_components(Pix* pix, BLOCK_LIST *blocks,
+ TO_BLOCK_LIST *to_blocks) {
+ int width = pixGetWidth(pix);
+ int height = pixGetHeight(pix);
+ if (width > INT16_MAX || height > INT16_MAX) {
+ tprintf("Input image too large! (%d, %d)\n", width, height);
+ return; // Can't handle it.
+ }
+
+ BLOCK_IT block_it(blocks); // iterator
+ for (block_it.mark_cycle_pt(); !block_it.cycled_list();
+ block_it.forward()) {
+ BLOCK* block = block_it.data();
+ if (block->pdblk.poly_block() == nullptr || block->pdblk.poly_block()->IsText()) {
+ extract_edges(pix, block);
+ }
+ }
+
+ assign_blobs_to_blocks2(pix, blocks, to_blocks);
+ ICOORD page_tr(width, height);
+ filter_blobs(page_tr, to_blocks, !textord_test_landscape);
+}
+
+/**********************************************************************
+ * filter_blobs
+ *
+ * Sort the blobs into sizes in all the blocks for later work.
+ **********************************************************************/
+
+void Textord::filter_blobs(ICOORD page_tr, // top right
+ TO_BLOCK_LIST* blocks, // output list
+ bool testing_on) { // for plotting
+ TO_BLOCK_IT block_it = blocks; // destination iterator
+ TO_BLOCK *block; // created block
+
+ #ifndef GRAPHICS_DISABLED
+ if (to_win != nullptr)
+ to_win->Clear();
+ #endif // !GRAPHICS_DISABLED
+
+ for (block_it.mark_cycle_pt(); !block_it.cycled_list();
+ block_it.forward()) {
+ block = block_it.data();
+ block->line_size = filter_noise_blobs(&block->blobs,
+ &block->noise_blobs,
+ &block->small_blobs,
+ &block->large_blobs);
+ if (block->line_size == 0) block->line_size = 1;
+ block->line_spacing = block->line_size *
+ (tesseract::CCStruct::kDescenderFraction +
+ tesseract::CCStruct::kXHeightFraction +
+ 2 * tesseract::CCStruct::kAscenderFraction) /
+ tesseract::CCStruct::kXHeightFraction;
+ block->line_size *= textord_min_linesize;
+ block->max_blob_size = block->line_size * textord_excess_blobsize;
+
+ #ifndef GRAPHICS_DISABLED
+ if (textord_show_blobs && testing_on) {
+ if (to_win == nullptr)
+ create_to_win(page_tr);
+ block->plot_graded_blobs(to_win);
+ }
+ if (textord_show_boxes && testing_on) {
+ if (to_win == nullptr)
+ create_to_win(page_tr);
+ plot_box_list(to_win, &block->noise_blobs, ScrollView::WHITE);
+ plot_box_list(to_win, &block->small_blobs, ScrollView::WHITE);
+ plot_box_list(to_win, &block->large_blobs, ScrollView::WHITE);
+ plot_box_list(to_win, &block->blobs, ScrollView::WHITE);
+ }
+ #endif // !GRAPHICS_DISABLED
+ }
+}
+
+/**********************************************************************
+ * filter_noise_blobs
+ *
+ * Move small blobs to a separate list.
+ **********************************************************************/
+
+float Textord::filter_noise_blobs(
+ BLOBNBOX_LIST *src_list, // original list
+ BLOBNBOX_LIST *noise_list, // noise list
+ BLOBNBOX_LIST *small_list, // small blobs
+ BLOBNBOX_LIST *large_list) { // large blobs
+ int16_t height; //height of blob
+ int16_t width; //of blob
+ BLOBNBOX *blob; //current blob
+ float initial_x; //first guess
+ BLOBNBOX_IT src_it = src_list; //iterators
+ BLOBNBOX_IT noise_it = noise_list;
+ BLOBNBOX_IT small_it = small_list;
+ BLOBNBOX_IT large_it = large_list;
+ STATS size_stats (0, MAX_NEAREST_DIST);
+ //blob heights
+ float min_y; //size limits
+ float max_y;
+ float max_x;
+ float max_height; //of good blobs
+
+ for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
+ blob = src_it.data();
+ if (blob->bounding_box().height() < textord_max_noise_size)
+ noise_it.add_after_then_move(src_it.extract());
+ else if (blob->enclosed_area() >= blob->bounding_box().height()
+ * blob->bounding_box().width() * textord_noise_area_ratio)
+ small_it.add_after_then_move(src_it.extract());
+ }
+ for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
+ size_stats.add(src_it.data()->bounding_box().height(), 1);
+ }
+ initial_x = size_stats.ile(textord_initialx_ile);
+ max_y = ceil(initial_x *
+ (tesseract::CCStruct::kDescenderFraction +
+ tesseract::CCStruct::kXHeightFraction +
+ 2 * tesseract::CCStruct::kAscenderFraction) /
+ tesseract::CCStruct::kXHeightFraction);
+ min_y = floor (initial_x / 2);
+ max_x = ceil (initial_x * textord_width_limit);
+ small_it.move_to_first ();
+ for (small_it.mark_cycle_pt (); !small_it.cycled_list ();
+ small_it.forward ()) {
+ height = small_it.data()->bounding_box().height();
+ if (height > max_y)
+ large_it.add_after_then_move(small_it.extract ());
+ else if (height >= min_y)
+ src_it.add_after_then_move(small_it.extract ());
+ }
+ size_stats.clear ();
+ for (src_it.mark_cycle_pt (); !src_it.cycled_list (); src_it.forward ()) {
+ height = src_it.data ()->bounding_box ().height ();
+ width = src_it.data ()->bounding_box ().width ();
+ if (height < min_y)
+ small_it.add_after_then_move (src_it.extract ());
+ else if (height > max_y || width > max_x)
+ large_it.add_after_then_move (src_it.extract ());
+ else
+ size_stats.add (height, 1);
+ }
+ max_height = size_stats.ile (textord_initialasc_ile);
+ // tprintf("max_y=%g, min_y=%g, initial_x=%g, max_height=%g,",
+ // max_y,min_y,initial_x,max_height);
+ max_height *= tesseract::CCStruct::kXHeightCapRatio;
+ if (max_height > initial_x)
+ initial_x = max_height;
+ // tprintf(" ret=%g\n",initial_x);
+ return initial_x;
+}
+
+// Fixes the block so it obeys all the rules:
+// Must have at least one ROW.
+// Must have at least one WERD.
+// WERDs contain a fake blob.
+void Textord::cleanup_nontext_block(BLOCK* block) {
+ // Non-text blocks must contain at least one row.
+ ROW_IT row_it(block->row_list());
+ if (row_it.empty()) {
+ const TBOX& box = block->pdblk.bounding_box();
+ float height = box.height();
+ int32_t xstarts[2] = {box.left(), box.right()};
+ double coeffs[3] = {0.0, 0.0, static_cast<double>(box.bottom())};
+ ROW* row = new ROW(1, xstarts, coeffs, height / 2.0f, height / 4.0f,
+ height / 4.0f, 0, 1);
+ row_it.add_after_then_move(row);
+ }
+ // Each row must contain at least one word.
+ for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+ ROW* row = row_it.data();
+ WERD_IT w_it(row->word_list());
+ if (w_it.empty()) {
+ // Make a fake blob to put in the word.
+ TBOX box = block->row_list()->singleton() ? block->pdblk.bounding_box()
+ : row->bounding_box();
+ C_BLOB* blob = C_BLOB::FakeBlob(box);
+ C_BLOB_LIST blobs;
+ C_BLOB_IT blob_it(&blobs);
+ blob_it.add_after_then_move(blob);
+ WERD* word = new WERD(&blobs, 0, nullptr);
+ w_it.add_after_then_move(word);
+ }
+ // Each word must contain a fake blob.
+ for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
+ WERD* word = w_it.data();
+ // Just assert that this is true, as it would be useful to find
+ // out why it isn't.
+ ASSERT_HOST(!word->cblob_list()->empty());
+ }
+ row->recalc_bounding_box();
+ }
+}
+
+/**********************************************************************
+ * cleanup_blocks
+ *
+ * Delete empty blocks, rows from the page.
+ **********************************************************************/
+
+void Textord::cleanup_blocks(bool clean_noise, BLOCK_LIST* blocks) {
+ BLOCK_IT block_it = blocks; //iterator
+ ROW_IT row_it; //row iterator
+
+ int num_rows = 0;
+ int num_rows_all = 0;
+ int num_blocks = 0;
+ int num_blocks_all = 0;
+ for (block_it.mark_cycle_pt(); !block_it.cycled_list();
+ block_it.forward()) {
+ BLOCK* block = block_it.data();
+ if (block->pdblk.poly_block() != nullptr && !block->pdblk.poly_block()->IsText()) {
+ cleanup_nontext_block(block);
+ continue;
+ }
+ num_rows = 0;
+ num_rows_all = 0;
+ if (clean_noise) {
+ row_it.set_to_list(block->row_list());
+ for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+ ROW* row = row_it.data();
+ ++num_rows_all;
+ clean_small_noise_from_words(row);
+ if ((textord_noise_rejrows && !row->word_list()->empty() &&
+ clean_noise_from_row(row)) ||
+ row->word_list()->empty()) {
+ delete row_it.extract(); // lose empty row.
+ } else {
+ if (textord_noise_rejwords)
+ clean_noise_from_words(row_it.data());
+ if (textord_blshift_maxshift >= 0)
+ tweak_row_baseline(row, textord_blshift_maxshift,
+ textord_blshift_xfraction);
+ ++num_rows;
+ }
+ }
+ }
+ if (block->row_list()->empty()) {
+ delete block_it.extract(); // Lose empty text blocks.
+ } else {
+ ++num_blocks;
+ }
+ ++num_blocks_all;
+ if (textord_noise_debug)
+ tprintf("cleanup_blocks: # rows = %d / %d\n", num_rows, num_rows_all);
+ }
+ if (textord_noise_debug)
+ tprintf("cleanup_blocks: # blocks = %d / %d\n", num_blocks, num_blocks_all);
+}
+
+
+/**********************************************************************
+ * clean_noise_from_row
+ *
+ * Move blobs of words from rows of garbage into the reject blobs list.
+ **********************************************************************/
+
+bool Textord::clean_noise_from_row( //remove empties
+ ROW* row //row to clean
+) {
+ bool testing_on;
+ TBOX blob_box; //bounding box
+ C_BLOB *blob; //current blob
+ C_OUTLINE *outline; //current outline
+ WERD *word; //current word
+ int32_t blob_size; //biggest size
+ int32_t trans_count = 0; //no of transitions
+ int32_t trans_threshold; //noise tolerance
+ int32_t dot_count; //small objects
+ int32_t norm_count; //normal objects
+ int32_t super_norm_count; //real char-like
+ //words of row
+ WERD_IT word_it = row->word_list ();
+ C_BLOB_IT blob_it; //blob iterator
+ C_OUTLINE_IT out_it; //outline iterator
+
+ testing_on = textord_test_y > row->base_line (textord_test_x)
+ && textord_show_blobs
+ && textord_test_y < row->base_line (textord_test_x) + row->x_height ();
+ dot_count = 0;
+ norm_count = 0;
+ super_norm_count = 0;
+ for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
+ word = word_it.data (); //current word
+ //blobs in word
+ blob_it.set_to_list (word->cblob_list ());
+ for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
+ blob_it.forward ()) {
+ blob = blob_it.data ();
+ if (!word->flag (W_DONT_CHOP)) {
+ //get outlines
+ out_it.set_to_list (blob->out_list ());
+ for (out_it.mark_cycle_pt (); !out_it.cycled_list ();
+ out_it.forward ()) {
+ outline = out_it.data ();
+ blob_box = outline->bounding_box ();
+ blob_size =
+ blob_box.width () >
+ blob_box.height ()? blob_box.width () : blob_box.
+ height();
+ if (blob_size < textord_noise_sizelimit * row->x_height ())
+ dot_count++; //count smal outlines
+ if (!outline->child ()->empty ()
+ && blob_box.height () <
+ (1 + textord_noise_syfract) * row->x_height ()
+ && blob_box.height () >
+ (1 - textord_noise_syfract) * row->x_height ()
+ && blob_box.width () <
+ (1 + textord_noise_sxfract) * row->x_height ()
+ && blob_box.width () >
+ (1 - textord_noise_sxfract) * row->x_height ())
+ super_norm_count++; //count smal outlines
+ }
+ }
+ else
+ super_norm_count++;
+ blob_box = blob->bounding_box ();
+ blob_size =
+ blob_box.width () >
+ blob_box.height ()? blob_box.width () : blob_box.height ();
+ if (blob_size >= textord_noise_sizelimit * row->x_height ()
+ && blob_size < row->x_height () * 2) {
+ trans_threshold = blob_size / textord_noise_sizefraction;
+ trans_count = blob->count_transitions (trans_threshold);
+ if (trans_count < textord_noise_translimit)
+ norm_count++;
+ }
+ else if (blob_box.height () > row->x_height () * 2
+ && (!word_it.at_first () || !blob_it.at_first ()))
+ dot_count += 2;
+ if (testing_on) {
+ tprintf
+ ("Blob at (%d,%d) -> (%d,%d), ols=%d, tc=%d, bldiff=%g\n",
+ blob_box.left (), blob_box.bottom (), blob_box.right (),
+ blob_box.top (), blob->out_list ()->length (), trans_count,
+ blob_box.bottom () - row->base_line (blob_box.left ()));
+ }
+ }
+ }
+ if (textord_noise_debug) {
+ tprintf ("Row ending at (%d,%g):",
+ blob_box.right (), row->base_line (blob_box.right ()));
+ tprintf (" R=%g, dc=%d, nc=%d, %s\n",
+ norm_count > 0 ? static_cast<float>(dot_count) / norm_count : 9999,
+ dot_count, norm_count,
+ dot_count > norm_count * textord_noise_normratio
+ && dot_count > 2 ? "REJECTED" : "ACCEPTED");
+ }
+ return super_norm_count < textord_noise_sncount
+ && dot_count > norm_count * textord_noise_rowratio && dot_count > 2;
+}
+
+/**********************************************************************
+ * clean_noise_from_words
+ *
+ * Move blobs of words from rows of garbage into the reject blobs list.
+ **********************************************************************/
+
+void Textord::clean_noise_from_words( //remove empties
+ ROW *row //row to clean
+ ) {
+ TBOX blob_box; //bounding box
+ C_BLOB *blob; //current blob
+ C_OUTLINE *outline; //current outline
+ WERD *word; //current word
+ int32_t blob_size; //biggest size
+ int32_t trans_count; //no of transitions
+ int32_t trans_threshold; //noise tolerance
+ int32_t dot_count; //small objects
+ int32_t norm_count; //normal objects
+ int32_t dud_words; //number discarded
+ int32_t ok_words; //number remaining
+ int32_t word_index; //current word
+ //words of row
+ WERD_IT word_it = row->word_list ();
+ C_BLOB_IT blob_it; //blob iterator
+ C_OUTLINE_IT out_it; //outline iterator
+
+ ok_words = word_it.length ();
+ if (ok_words == 0 || textord_no_rejects)
+ return;
+ // was it chucked
+ std::vector<int8_t> word_dud(ok_words);
+ dud_words = 0;
+ ok_words = 0;
+ word_index = 0;
+ for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
+ word = word_it.data (); //current word
+ dot_count = 0;
+ norm_count = 0;
+ //blobs in word
+ blob_it.set_to_list (word->cblob_list ());
+ for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
+ blob_it.forward ()) {
+ blob = blob_it.data ();
+ if (!word->flag (W_DONT_CHOP)) {
+ //get outlines
+ out_it.set_to_list (blob->out_list ());
+ for (out_it.mark_cycle_pt (); !out_it.cycled_list ();
+ out_it.forward ()) {
+ outline = out_it.data ();
+ blob_box = outline->bounding_box ();
+ blob_size =
+ blob_box.width () >
+ blob_box.height ()? blob_box.width () : blob_box.
+ height();
+ if (blob_size < textord_noise_sizelimit * row->x_height ())
+ dot_count++; //count smal outlines
+ if (!outline->child ()->empty ()
+ && blob_box.height () <
+ (1 + textord_noise_syfract) * row->x_height ()
+ && blob_box.height () >
+ (1 - textord_noise_syfract) * row->x_height ()
+ && blob_box.width () <
+ (1 + textord_noise_sxfract) * row->x_height ()
+ && blob_box.width () >
+ (1 - textord_noise_sxfract) * row->x_height ())
+ norm_count++; //count smal outlines
+ }
+ }
+ else
+ norm_count++;
+ blob_box = blob->bounding_box ();
+ blob_size =
+ blob_box.width () >
+ blob_box.height ()? blob_box.width () : blob_box.height ();
+ if (blob_size >= textord_noise_sizelimit * row->x_height ()
+ && blob_size < row->x_height () * 2) {
+ trans_threshold = blob_size / textord_noise_sizefraction;
+ trans_count = blob->count_transitions (trans_threshold);
+ if (trans_count < textord_noise_translimit)
+ norm_count++;
+ }
+ else if (blob_box.height () > row->x_height () * 2
+ && (!word_it.at_first () || !blob_it.at_first ()))
+ dot_count += 2;
+ }
+ if (dot_count > 2 && !word->flag(W_REP_CHAR)) {
+ if (dot_count > norm_count * textord_noise_normratio * 2)
+ word_dud[word_index] = 2;
+ else if (dot_count > norm_count * textord_noise_normratio)
+ word_dud[word_index] = 1;
+ else
+ word_dud[word_index] = 0;
+ } else {
+ word_dud[word_index] = 0;
+ }
+ if (word_dud[word_index] == 2)
+ dud_words++;
+ else
+ ok_words++;
+ word_index++;
+ }
+
+ word_index = 0;
+ for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
+ if (word_dud[word_index] == 2
+ || (word_dud[word_index] == 1 && dud_words > ok_words)) {
+ word = word_it.data(); // Current word.
+ // Previously we threw away the entire word.
+ // Now just aggressively throw all small blobs into the reject list, where
+ // the classifier can decide whether they are actually needed.
+ word->CleanNoise(textord_noise_sizelimit * row->x_height());
+ }
+ word_index++;
+ }
+}
+
+// Remove outlines that are a tiny fraction in either width or height
+// of the word height.
+void Textord::clean_small_noise_from_words(ROW *row) {
+ WERD_IT word_it(row->word_list());
+ for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) {
+ WERD* word = word_it.data();
+ int min_size = static_cast<int>(
+ textord_noise_hfract * word->bounding_box().height() + 0.5);
+ C_BLOB_IT blob_it(word->cblob_list());
+ for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
+ C_BLOB* blob = blob_it.data();
+ C_OUTLINE_IT out_it(blob->out_list());
+ for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
+ C_OUTLINE* outline = out_it.data();
+ outline->RemoveSmallRecursive(min_size, &out_it);
+ }
+ if (blob->out_list()->empty()) {
+ delete blob_it.extract();
+ }
+ }
+ if (word->cblob_list()->empty()) {
+ if (!word_it.at_last()) {
+ // The next word is no longer a fuzzy non space if it was before,
+ // since the word before is about to be deleted.
+ WERD* next_word = word_it.data_relative(1);
+ if (next_word->flag(W_FUZZY_NON)) {
+ next_word->set_flag(W_FUZZY_NON, false);
+ }
+ }
+ delete word_it.extract();
+ }
+ }
+}
+
+// Local struct to hold a group of blocks.
+struct BlockGroup {
+ BlockGroup() : rotation(1.0f, 0.0f), angle(0.0f), min_xheight(1.0f) {}
+ explicit BlockGroup(BLOCK* block)
+ : bounding_box(block->pdblk.bounding_box()),
+ rotation(block->re_rotation()),
+ angle(block->re_rotation().angle()),
+ min_xheight(block->x_height()) {
+ blocks.push_back(block);
+ }
+ // Union of block bounding boxes.
+ TBOX bounding_box;
+ // Common rotation of the blocks.
+ FCOORD rotation;
+ // Angle of rotation.
+ float angle;
+ // Min xheight of the blocks.
+ float min_xheight;
+ // Collection of borrowed pointers to the blocks in the group.
+ GenericVector<BLOCK*> blocks;
+};
+
+// Groups blocks by rotation, then, for each group, makes a WordGrid and calls
+// TransferDiacriticsToWords to copy the diacritic blobs to the most
+// appropriate words in the group of blocks. Source blobs are not touched.
+void Textord::TransferDiacriticsToBlockGroups(BLOBNBOX_LIST* diacritic_blobs,
+ BLOCK_LIST* blocks) {
+ // Angle difference larger than this is too much to consider equal.
+ // They should only be in multiples of M_PI/2 anyway.
+ const double kMaxAngleDiff = 0.01; // About 0.6 degrees.
+ PointerVector<BlockGroup> groups;
+ BLOCK_IT bk_it(blocks);
+ for (bk_it.mark_cycle_pt(); !bk_it.cycled_list(); bk_it.forward()) {
+ BLOCK* block = bk_it.data();
+ if (block->pdblk.poly_block() != nullptr && !block->pdblk.poly_block()->IsText()) {
+ continue;
+ }
+ // Linear search of the groups to find a matching rotation.
+ float block_angle = block->re_rotation().angle();
+ int best_g = 0;
+ float best_angle_diff = FLT_MAX;
+ for (int g = 0; g < groups.size(); ++g) {
+ double angle_diff = fabs(block_angle - groups[g]->angle);
+ if (angle_diff > M_PI) angle_diff = fabs(angle_diff - 2.0 * M_PI);
+ if (angle_diff < best_angle_diff) {
+ best_angle_diff = angle_diff;
+ best_g = g;
+ }
+ }
+ if (best_angle_diff > kMaxAngleDiff) {
+ groups.push_back(new BlockGroup(block));
+ } else {
+ groups[best_g]->blocks.push_back(block);
+ groups[best_g]->bounding_box += block->pdblk.bounding_box();
+ float x_height = block->x_height();
+ if (x_height < groups[best_g]->min_xheight)
+ groups[best_g]->min_xheight = x_height;
+ }
+ }
+ // Now process each group of blocks.
+ PointerVector<WordWithBox> word_ptrs;
+ for (int g = 0; g < groups.size(); ++g) {
+ const BlockGroup* group = groups[g];
+ if (group->bounding_box.null_box()) continue;
+ WordGrid word_grid(group->min_xheight, group->bounding_box.botleft(),
+ group->bounding_box.topright());
+ for (int b = 0; b < group->blocks.size(); ++b) {
+ ROW_IT row_it(group->blocks[b]->row_list());
+ for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
+ ROW* row = row_it.data();
+ // Put the words of the row into the grid.
+ WERD_IT w_it(row->word_list());
+ for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) {
+ WERD* word = w_it.data();
+ auto* box_word = new WordWithBox(word);
+ word_grid.InsertBBox(true, true, box_word);
+ // Save the pointer where it will be auto-deleted.
+ word_ptrs.push_back(box_word);
+ }
+ }
+ }
+ FCOORD rotation = group->rotation;
+ // Make it a forward rotation that will transform blob coords to block.
+ rotation.set_y(-rotation.y());
+ TransferDiacriticsToWords(diacritic_blobs, rotation, &word_grid);
+ }
+}
+
+// Places a copy of blobs that are near a word (after applying rotation to the
+// blob) in the most appropriate word, unless there is doubt, in which case a
+// blob can end up in two words. Source blobs are not touched.
+void Textord::TransferDiacriticsToWords(BLOBNBOX_LIST* diacritic_blobs,
+ const FCOORD& rotation,
+ WordGrid* word_grid) {
+ WordSearch ws(word_grid);
+ BLOBNBOX_IT b_it(diacritic_blobs);
+ // Apply rotation to each blob before finding the nearest words. The rotation
+ // allows us to only consider above/below placement and not left/right on
+ // vertical text, because all text is horizontal here.
+ for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
+ BLOBNBOX* blobnbox = b_it.data();
+ TBOX blob_box = blobnbox->bounding_box();
+ blob_box.rotate(rotation);
+ ws.StartRectSearch(blob_box);
+ // Above/below refer to word position relative to diacritic. Since some
+ // scripts eg Kannada/Telugu habitually put diacritics below words, and
+ // others eg Thai/Vietnamese/Latin put most diacritics above words, try
+ // for both if there isn't much in it.
+ WordWithBox* best_above_word = nullptr;
+ WordWithBox* best_below_word = nullptr;
+ int best_above_distance = 0;
+ int best_below_distance = 0;
+ for (WordWithBox* word = ws.NextRectSearch(); word != nullptr;
+ word = ws.NextRectSearch()) {
+ if (word->word()->flag(W_REP_CHAR)) continue;
+ TBOX word_box = word->true_bounding_box();
+ int x_distance = blob_box.x_gap(word_box);
+ int y_distance = blob_box.y_gap(word_box);
+ if (x_distance > 0) {
+ // Arbitrarily divide x-distance by 2 if there is a major y overlap,
+ // and the word is to the left of the diacritic. If the
+ // diacritic is a dropped broken character between two words, this will
+ // help send all the pieces to a single word, instead of splitting them
+ // over the 2 words.
+ if (word_box.major_y_overlap(blob_box) &&
+ blob_box.left() > word_box.right()) {
+ x_distance /= 2;
+ }
+ y_distance += x_distance;
+ }
+ if (word_box.y_middle() > blob_box.y_middle() &&
+ (best_above_word == nullptr || y_distance < best_above_distance)) {
+ best_above_word = word;
+ best_above_distance = y_distance;
+ }
+ if (word_box.y_middle() <= blob_box.y_middle() &&
+ (best_below_word == nullptr || y_distance < best_below_distance)) {
+ best_below_word = word;
+ best_below_distance = y_distance;
+ }
+ }
+ bool above_good =
+ best_above_word != nullptr &&
+ (best_below_word == nullptr ||
+ best_above_distance < best_below_distance + blob_box.height());
+ bool below_good =
+ best_below_word != nullptr && best_below_word != best_above_word &&
+ (best_above_word == nullptr ||
+ best_below_distance < best_above_distance + blob_box.height());
+ if (below_good) {
+ C_BLOB* copied_blob = C_BLOB::deep_copy(blobnbox->cblob());
+ copied_blob->rotate(rotation);
+ // Put the blob into the word's reject blobs list.
+ C_BLOB_IT blob_it(best_below_word->RejBlobs());
+ blob_it.add_to_end(copied_blob);
+ }
+ if (above_good) {
+ C_BLOB* copied_blob = C_BLOB::deep_copy(blobnbox->cblob());
+ copied_blob->rotate(rotation);
+ // Put the blob into the word's reject blobs list.
+ C_BLOB_IT blob_it(best_above_word->RejBlobs());
+ blob_it.add_to_end(copied_blob);
+ }
+ }
+}
+
+/**********************************************************************
+ * tweak_row_baseline
+ *
+ * Shift baseline to fit the blobs more accurately where they are
+ * close enough.
+ **********************************************************************/
+
+void tweak_row_baseline(ROW *row,
+ double blshift_maxshift,
+ double blshift_xfraction) {
+ TBOX blob_box; //bounding box
+ C_BLOB *blob; //current blob
+ WERD *word; //current word
+ int32_t blob_count; //no of blobs
+ int32_t src_index; //source segment
+ int32_t dest_index; //destination segment
+ float ydiff; //baseline error
+ float x_centre; //centre of blob
+ //words of row
+ WERD_IT word_it = row->word_list ();
+ C_BLOB_IT blob_it; //blob iterator
+
+ blob_count = 0;
+ for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
+ word = word_it.data (); //current word
+ //get total blobs
+ blob_count += word->cblob_list ()->length ();
+ }
+ if (blob_count == 0)
+ return;
+ // spline segments
+ std::vector<int32_t> xstarts(blob_count + row->baseline.segments + 1);
+ // spline coeffs
+ std::vector<double> coeffs((blob_count + row->baseline.segments) * 3);
+
+ src_index = 0;
+ dest_index = 0;
+ xstarts[0] = row->baseline.xcoords[0];
+ for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) {
+ word = word_it.data (); //current word
+ //blobs in word
+ blob_it.set_to_list (word->cblob_list ());
+ for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
+ blob_it.forward ()) {
+ blob = blob_it.data ();
+ blob_box = blob->bounding_box ();
+ x_centre = (blob_box.left () + blob_box.right ()) / 2.0;
+ ydiff = blob_box.bottom () - row->base_line (x_centre);
+ if (ydiff < 0)
+ ydiff = -ydiff / row->x_height ();
+ else
+ ydiff = ydiff / row->x_height ();
+ if (ydiff < blshift_maxshift
+ && blob_box.height () / row->x_height () > blshift_xfraction) {
+ if (xstarts[dest_index] >= x_centre)
+ xstarts[dest_index] = blob_box.left ();
+ coeffs[dest_index * 3] = 0;
+ coeffs[dest_index * 3 + 1] = 0;
+ coeffs[dest_index * 3 + 2] = blob_box.bottom ();
+ //shift it
+ dest_index++;
+ xstarts[dest_index] = blob_box.right () + 1;
+ }
+ else {
+ if (xstarts[dest_index] <= x_centre) {
+ while (row->baseline.xcoords[src_index + 1] <= x_centre
+ && src_index < row->baseline.segments - 1) {
+ if (row->baseline.xcoords[src_index + 1] >
+ xstarts[dest_index]) {
+ coeffs[dest_index * 3] =
+ row->baseline.quadratics[src_index].a;
+ coeffs[dest_index * 3 + 1] =
+ row->baseline.quadratics[src_index].b;
+ coeffs[dest_index * 3 + 2] =
+ row->baseline.quadratics[src_index].c;
+ dest_index++;
+ xstarts[dest_index] =
+ row->baseline.xcoords[src_index + 1];
+ }
+ src_index++;
+ }
+ coeffs[dest_index * 3] =
+ row->baseline.quadratics[src_index].a;
+ coeffs[dest_index * 3 + 1] =
+ row->baseline.quadratics[src_index].b;
+ coeffs[dest_index * 3 + 2] =
+ row->baseline.quadratics[src_index].c;
+ dest_index++;
+ xstarts[dest_index] = row->baseline.xcoords[src_index + 1];
+ }
+ }
+ }
+ }
+ while (src_index < row->baseline.segments
+ && row->baseline.xcoords[src_index + 1] <= xstarts[dest_index])
+ src_index++;
+ while (src_index < row->baseline.segments) {
+ coeffs[dest_index * 3] = row->baseline.quadratics[src_index].a;
+ coeffs[dest_index * 3 + 1] = row->baseline.quadratics[src_index].b;
+ coeffs[dest_index * 3 + 2] = row->baseline.quadratics[src_index].c;
+ dest_index++;
+ src_index++;
+ xstarts[dest_index] = row->baseline.xcoords[src_index];
+ }
+ //turn to spline
+ row->baseline = QSPLINE(dest_index, &xstarts[0], &coeffs[0]);
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/textord/tordmain.h b/tesseract/src/textord/tordmain.h
new file mode 100644
index 00000000..2a6e31fa
--- /dev/null
+++ b/tesseract/src/textord/tordmain.h
@@ -0,0 +1,45 @@
+/**********************************************************************
+ * File: tordmain.h (Formerly textordp.h)
+ * Description: C++ top level textord code.
+ * Author: Ray Smith
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef TORDMAIN_H
+#define TORDMAIN_H
+
+#include "params.h"
+#include "ocrblock.h"
+#include "blobs.h"
+#include "blobbox.h"
+
+#include <ctime>
+
+struct Pix;
+
+namespace tesseract {
+
+class Tesseract;
+
+void SetBlobStrokeWidth(Pix* pix, BLOBNBOX* blob);
+void assign_blobs_to_blocks2(Pix* pix, BLOCK_LIST *blocks,
+ TO_BLOCK_LIST *port_blocks);
+
+void tweak_row_baseline(ROW *row,
+ double blshift_maxshift,
+ double blshift_xfraction);
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/textord/tospace.cpp b/tesseract/src/textord/tospace.cpp
new file mode 100644
index 00000000..6ab17a64
--- /dev/null
+++ b/tesseract/src/textord/tospace.cpp
@@ -0,0 +1,1894 @@
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+/**********************************************************************
+ * tospace.cpp
+ *
+ * Compute fuzzy word spacing thresholds for each row.
+ * I.e. set : max_nonspace
+ * space_threshold
+ * min_space
+ * kern_size
+ * space_size
+ * for each row.
+ * ONLY FOR PROPORTIONAL BLOCKS - FIXED PITCH IS ASSUMED ALREADY DONE
+ *
+ * Note: functions in this file were originally not members of any
+ * class or enclosed by any namespace. Now they are all static members
+ * of the Textord class.
+ *
+ **********************************************************************/
+
+#include "drawtord.h"
+#include "statistc.h"
+#include "textord.h"
+#include "tovars.h"
+
+// Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include <algorithm>
+#include <memory>
+
+#define MAXSPACING 128 /*max expected spacing in pix */
+
+namespace tesseract {
+void Textord::to_spacing(
+ ICOORD page_tr, //topright of page
+ TO_BLOCK_LIST *blocks //blocks on page
+ ) {
+ TO_BLOCK_IT block_it; //iterator
+ TO_BLOCK *block; //current block;
+ TO_ROW *row; //current row
+ int block_index; //block number
+ int row_index; //row number
+ //estimated width of real spaces for whole block
+ int16_t block_space_gap_width;
+ //estimated width of non space gaps for whole block
+ int16_t block_non_space_gap_width;
+ bool old_text_ord_proportional;//old fixed/prop result
+
+ block_it.set_to_list (blocks);
+ block_index = 1;
+ for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
+ block_it.forward ()) {
+ block = block_it.data ();
+ std::unique_ptr<GAPMAP> gapmap(new GAPMAP (block)); //map of big vert gaps in blk
+ block_spacing_stats(block,
+ gapmap.get(),
+ old_text_ord_proportional,
+ block_space_gap_width,
+ block_non_space_gap_width);
+ // Make sure relative values of block-level space and non-space gap
+ // widths are reasonable. The ratio of 1:3 is also used in
+ // block_spacing_stats, to corrrect the block_space_gap_width
+ // Useful for arabic and hindi, when the non-space gap width is
+ // often over-estimated and should not be trusted. A similar ratio
+ // is found in block_spacing_stats.
+ if (tosp_old_to_method && tosp_old_to_constrain_sp_kn &&
+ static_cast<float>(block_space_gap_width) / block_non_space_gap_width < 3.0) {
+ block_non_space_gap_width = static_cast<int16_t>(floor (block_space_gap_width / 3.0));
+ }
+ // row iterator
+ TO_ROW_IT row_it(block->get_rows());
+ row_index = 1;
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+ row = row_it.data ();
+ if ((row->pitch_decision == PITCH_DEF_PROP) ||
+ (row->pitch_decision == PITCH_CORR_PROP)) {
+ if ((tosp_debug_level > 0) && !old_text_ord_proportional)
+ tprintf ("Block %d Row %d: Now Proportional\n",
+ block_index, row_index);
+ row_spacing_stats(row,
+ gapmap.get(),
+ block_index,
+ row_index,
+ block_space_gap_width,
+ block_non_space_gap_width);
+ }
+ else {
+ if ((tosp_debug_level > 0) && old_text_ord_proportional)
+ tprintf
+ ("Block %d Row %d: Now Fixed Pitch Decision:%d fp flag:%f\n",
+ block_index, row_index, row->pitch_decision,
+ row->fixed_pitch);
+ }
+#ifndef GRAPHICS_DISABLED
+ if (textord_show_initial_words)
+ plot_word_decisions (to_win, static_cast<int16_t>(row->fixed_pitch), row);
+#endif
+ row_index++;
+ }
+ block_index++;
+ }
+}
+
+
+/*************************************************************************
+ * block_spacing_stats()
+ *************************************************************************/
+
+void Textord::block_spacing_stats(
+ TO_BLOCK* block,
+ GAPMAP* gapmap,
+ bool& old_text_ord_proportional,
+ int16_t& block_space_gap_width, // resulting estimate
+ int16_t& block_non_space_gap_width // resulting estimate
+) {
+ TO_ROW *row; // current row
+ BLOBNBOX_IT blob_it; // iterator
+
+ STATS centre_to_centre_stats (0, MAXSPACING);
+ // DEBUG USE ONLY
+ STATS all_gap_stats (0, MAXSPACING);
+ STATS space_gap_stats (0, MAXSPACING);
+ int16_t minwidth = MAXSPACING; // narrowest blob
+ TBOX blob_box;
+ TBOX prev_blob_box;
+ int16_t centre_to_centre;
+ int16_t gap_width;
+ float real_space_threshold;
+ float iqr_centre_to_centre; // DEBUG USE ONLY
+ float iqr_all_gap_stats; // DEBUG USE ONLY
+ int32_t end_of_row;
+ int32_t row_length;
+
+ // row iterator
+ TO_ROW_IT row_it(block->get_rows());
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+ row = row_it.data ();
+ if (!row->blob_list ()->empty () &&
+ (!tosp_only_use_prop_rows ||
+ (row->pitch_decision == PITCH_DEF_PROP) ||
+ (row->pitch_decision == PITCH_CORR_PROP))) {
+ blob_it.set_to_list (row->blob_list ());
+ blob_it.mark_cycle_pt ();
+ end_of_row = blob_it.data_relative (-1)->bounding_box ().right ();
+ if (tosp_use_pre_chopping)
+ blob_box = box_next_pre_chopped (&blob_it);
+ else if (tosp_stats_use_xht_gaps)
+ blob_box = reduced_box_next (row, &blob_it);
+ else
+ blob_box = box_next (&blob_it);
+ row_length = end_of_row - blob_box.left ();
+ if (blob_box.width () < minwidth)
+ minwidth = blob_box.width ();
+ prev_blob_box = blob_box;
+ while (!blob_it.cycled_list ()) {
+ if (tosp_use_pre_chopping)
+ blob_box = box_next_pre_chopped (&blob_it);
+ else if (tosp_stats_use_xht_gaps)
+ blob_box = reduced_box_next (row, &blob_it);
+ else
+ blob_box = box_next (&blob_it);
+ if (blob_box.width () < minwidth)
+ minwidth = blob_box.width ();
+ int16_t left = prev_blob_box.right();
+ int16_t right = blob_box.left();
+ gap_width = right - left;
+ if (!ignore_big_gap(row, row_length, gapmap, left, right)) {
+ all_gap_stats.add (gap_width, 1);
+
+ centre_to_centre = (right + blob_box.right () -
+ (prev_blob_box.left () + left)) / 2;
+ //DEBUG
+ centre_to_centre_stats.add (centre_to_centre, 1);
+ // DEBUG
+ }
+ prev_blob_box = blob_box;
+ }
+ }
+ }
+
+ //Inadequate samples
+ if (all_gap_stats.get_total () <= 1) {
+ block_non_space_gap_width = minwidth;
+ block_space_gap_width = -1; //No est. space width
+ //DEBUG
+ old_text_ord_proportional = true;
+ }
+ else {
+ /* For debug only ..... */
+ iqr_centre_to_centre = centre_to_centre_stats.ile (0.75) -
+ centre_to_centre_stats.ile (0.25);
+ iqr_all_gap_stats = all_gap_stats.ile (0.75) - all_gap_stats.ile (0.25);
+ old_text_ord_proportional =
+ iqr_centre_to_centre * 2 > iqr_all_gap_stats;
+ /* .......For debug only */
+
+ /*
+ The median of the gaps is used as an estimate of the NON-SPACE gap width.
+ This RELIES on the assumption that there are more gaps WITHIN words than
+ BETWEEN words in a block
+
+ Now try to estimate the width of a real space for all real spaces in the
+ block. Do this by using a crude threshold to ignore "narrow" gaps, then
+ find the median of the "wide" gaps and use this.
+ */
+ block_non_space_gap_width = static_cast<int16_t>(floor (all_gap_stats.median ()));
+ // median gap
+
+ row_it.set_to_list (block->get_rows ());
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+ row = row_it.data ();
+ if (!row->blob_list ()->empty () &&
+ (!tosp_only_use_prop_rows ||
+ (row->pitch_decision == PITCH_DEF_PROP) ||
+ (row->pitch_decision == PITCH_CORR_PROP))) {
+ real_space_threshold =
+ std::max (tosp_init_guess_kn_mult * block_non_space_gap_width,
+ tosp_init_guess_xht_mult * row->xheight);
+ blob_it.set_to_list (row->blob_list ());
+ blob_it.mark_cycle_pt ();
+ end_of_row =
+ blob_it.data_relative (-1)->bounding_box ().right ();
+ if (tosp_use_pre_chopping)
+ blob_box = box_next_pre_chopped (&blob_it);
+ else if (tosp_stats_use_xht_gaps)
+ blob_box = reduced_box_next (row, &blob_it);
+ else
+ blob_box = box_next (&blob_it);
+ row_length = blob_box.left () - end_of_row;
+ prev_blob_box = blob_box;
+ while (!blob_it.cycled_list ()) {
+ if (tosp_use_pre_chopping)
+ blob_box = box_next_pre_chopped (&blob_it);
+ else if (tosp_stats_use_xht_gaps)
+ blob_box = reduced_box_next (row, &blob_it);
+ else
+ blob_box = box_next (&blob_it);
+ int16_t left = prev_blob_box.right();
+ int16_t right = blob_box.left();
+ gap_width = right - left;
+ if ((gap_width > real_space_threshold) &&
+ !ignore_big_gap(row, row_length, gapmap, left, right)) {
+ /*
+ If tosp_use_cert_spaces is enabled, the estimate of the space gap is
+ restricted to obvious spaces - those wider than half the xht or those
+ with wide blobs on both sides - i.e not things that are suspect 1's or
+ punctuation that is sometimes widely spaced.
+ */
+ if (!tosp_block_use_cert_spaces ||
+ (gap_width >
+ tosp_fuzzy_space_factor2 * row->xheight)
+ ||
+ ((gap_width >
+ tosp_fuzzy_space_factor1 * row->xheight)
+ && (!tosp_narrow_blobs_not_cert
+ || (!narrow_blob (row, prev_blob_box)
+ && !narrow_blob (row, blob_box))))
+ || (wide_blob (row, prev_blob_box)
+ && wide_blob (row, blob_box)))
+ space_gap_stats.add (gap_width, 1);
+ }
+ prev_blob_box = blob_box;
+ }
+ }
+ }
+ //Inadequate samples
+ if (space_gap_stats.get_total () <= 2)
+ block_space_gap_width = -1;//No est. space width
+ else
+ block_space_gap_width =
+ std::max(static_cast<int16_t>(floor(space_gap_stats.median())),
+ static_cast<int16_t>(3 * block_non_space_gap_width));
+ }
+}
+
+
+/*************************************************************************
+ * row_spacing_stats()
+ * Set values for min_space, max_non_space based on row stats only
+ * If failure - return 0 values.
+ *************************************************************************/
+void Textord::row_spacing_stats(
+ TO_ROW *row,
+ GAPMAP *gapmap,
+ int16_t block_idx,
+ int16_t row_idx,
+ int16_t block_space_gap_width, //estimate for block
+ int16_t block_non_space_gap_width //estimate for block
+ ) {
+ //iterator
+ BLOBNBOX_IT blob_it = row->blob_list ();
+ STATS all_gap_stats (0, MAXSPACING);
+ STATS cert_space_gap_stats (0, MAXSPACING);
+ STATS all_space_gap_stats (0, MAXSPACING);
+ STATS small_gap_stats (0, MAXSPACING);
+ TBOX blob_box;
+ TBOX prev_blob_box;
+ int16_t gap_width;
+ int16_t real_space_threshold = 0;
+ int16_t max = 0;
+ int16_t index;
+ int16_t large_gap_count = 0;
+ bool suspected_table;
+ int32_t max_max_nonspace; //upper bound
+ bool good_block_space_estimate = block_space_gap_width > 0;
+ int32_t end_of_row;
+ int32_t row_length = 0;
+ float sane_space;
+ int32_t sane_threshold;
+
+ /* Collect first pass stats for row */
+
+ if (!good_block_space_estimate)
+ block_space_gap_width = int16_t (floor (row->xheight / 2));
+ if (!row->blob_list ()->empty ()) {
+ if (tosp_threshold_bias1 > 0)
+ real_space_threshold =
+ block_non_space_gap_width +
+ int16_t (floor (0.5 +
+ tosp_threshold_bias1 * (block_space_gap_width -
+ block_non_space_gap_width)));
+ else
+ real_space_threshold = //Old TO method
+ (block_space_gap_width + block_non_space_gap_width) / 2;
+ blob_it.set_to_list (row->blob_list ());
+ blob_it.mark_cycle_pt ();
+ end_of_row = blob_it.data_relative (-1)->bounding_box ().right ();
+ if (tosp_use_pre_chopping)
+ blob_box = box_next_pre_chopped (&blob_it);
+ else if (tosp_stats_use_xht_gaps)
+ blob_box = reduced_box_next (row, &blob_it);
+ else
+ blob_box = box_next (&blob_it);
+ row_length = end_of_row - blob_box.left ();
+ prev_blob_box = blob_box;
+ while (!blob_it.cycled_list ()) {
+ if (tosp_use_pre_chopping)
+ blob_box = box_next_pre_chopped (&blob_it);
+ else if (tosp_stats_use_xht_gaps)
+ blob_box = reduced_box_next (row, &blob_it);
+ else
+ blob_box = box_next (&blob_it);
+ int16_t left = prev_blob_box.right();
+ int16_t right = blob_box.left();
+ gap_width = right - left;
+ if (ignore_big_gap(row, row_length, gapmap, left, right)) {
+ large_gap_count++;
+ } else {
+ if (gap_width >= real_space_threshold) {
+ if (!tosp_row_use_cert_spaces ||
+ (gap_width > tosp_fuzzy_space_factor2 * row->xheight) ||
+ ((gap_width > tosp_fuzzy_space_factor1 * row->xheight)
+ && (!tosp_narrow_blobs_not_cert
+ || (!narrow_blob (row, prev_blob_box)
+ && !narrow_blob (row, blob_box))))
+ || (wide_blob (row, prev_blob_box)
+ && wide_blob (row, blob_box)))
+ cert_space_gap_stats.add (gap_width, 1);
+ all_space_gap_stats.add (gap_width, 1);
+ }
+ else
+ small_gap_stats.add (gap_width, 1);
+ all_gap_stats.add (gap_width, 1);
+ }
+ prev_blob_box = blob_box;
+ }
+ }
+ suspected_table = (large_gap_count > 1) ||
+ ((large_gap_count > 0) &&
+ (all_gap_stats.get_total () <= tosp_few_samples));
+
+ /* Now determine row kern size, space size and threshold */
+
+ if ((cert_space_gap_stats.get_total () >=
+ tosp_enough_space_samples_for_median) ||
+ ((suspected_table ||
+ all_gap_stats.get_total () <= tosp_short_row) &&
+ cert_space_gap_stats.get_total () > 0)) {
+ old_to_method(row,
+ &all_gap_stats,
+ &cert_space_gap_stats,
+ &small_gap_stats,
+ block_space_gap_width,
+ block_non_space_gap_width);
+ } else {
+ if (!tosp_recovery_isolated_row_stats ||
+ !isolated_row_stats (row, gapmap, &all_gap_stats, suspected_table,
+ block_idx, row_idx)) {
+ if (tosp_row_use_cert_spaces && (tosp_debug_level > 5))
+ tprintf ("B:%d R:%d -- Inadequate certain spaces.\n",
+ block_idx, row_idx);
+ if (tosp_row_use_cert_spaces1 && good_block_space_estimate) {
+ //Use block default
+ row->space_size = block_space_gap_width;
+ if (all_gap_stats.get_total () > tosp_redo_kern_limit)
+ row->kern_size = all_gap_stats.median ();
+ else
+ row->kern_size = block_non_space_gap_width;
+ row->space_threshold =
+ int32_t (floor ((row->space_size + row->kern_size) /
+ tosp_old_sp_kn_th_factor));
+ }
+ else
+ old_to_method(row,
+ &all_gap_stats,
+ &all_space_gap_stats,
+ &small_gap_stats,
+ block_space_gap_width,
+ block_non_space_gap_width);
+ }
+ }
+
+ if (tosp_improve_thresh && !suspected_table)
+ improve_row_threshold(row, &all_gap_stats);
+
+ /* Now lets try to be careful not to do anything silly with tables when we
+ are ignoring big gaps*/
+ if (tosp_sanity_method == 0) {
+ if (suspected_table &&
+ (row->space_size < tosp_table_kn_sp_ratio * row->kern_size)) {
+ if (tosp_debug_level > 5)
+ tprintf("B:%d R:%d -- DON'T BELIEVE SPACE %3.2f %d %3.2f.\n", block_idx,
+ row_idx, row->kern_size, row->space_threshold, row->space_size);
+ row->space_threshold =
+ static_cast<int32_t>(tosp_table_kn_sp_ratio * row->kern_size);
+ row->space_size = std::max(row->space_threshold + 1.0f, row->xheight);
+ }
+ }
+ else if (tosp_sanity_method == 1) {
+ sane_space = row->space_size;
+ /* NEVER let space size get too close to kern size */
+ if ((row->space_size < tosp_min_sane_kn_sp * std::max(row->kern_size, 2.5f))
+ || ((row->space_size - row->kern_size) <
+ (tosp_silly_kn_sp_gap * row->xheight))) {
+ if (good_block_space_estimate &&
+ (block_space_gap_width >= tosp_min_sane_kn_sp * row->kern_size))
+ sane_space = block_space_gap_width;
+ else
+ sane_space =
+ std::max(static_cast<float>(tosp_min_sane_kn_sp) * std::max(row->kern_size, 2.5f),
+ row->xheight / 2.0f);
+ if (tosp_debug_level > 5)
+ tprintf("B:%d R:%d -- DON'T BELIEVE SPACE %3.2f %d %3.2f -> %3.2f.\n",
+ block_idx, row_idx, row->kern_size, row->space_threshold,
+ row->space_size, sane_space);
+ row->space_size = sane_space;
+ row->space_threshold =
+ int32_t (floor ((row->space_size + row->kern_size) /
+ tosp_old_sp_kn_th_factor));
+ }
+ /* NEVER let threshold get VERY far away from kern */
+ sane_threshold = int32_t (floor (tosp_max_sane_kn_thresh *
+ std::max(row->kern_size, 2.5f)));
+ if (row->space_threshold > sane_threshold) {
+ if (tosp_debug_level > 5)
+ tprintf("B:%d R:%d -- DON'T BELIEVE THRESH %3.2f %d %3.2f->%d.\n",
+ block_idx, row_idx, row->kern_size, row->space_threshold,
+ row->space_size, sane_threshold);
+ row->space_threshold = sane_threshold;
+ if (row->space_size <= sane_threshold)
+ row->space_size = row->space_threshold + 1.0f;
+ }
+ /* Beware of tables - there may be NO spaces */
+ if (suspected_table) {
+ sane_space = std::max(tosp_table_kn_sp_ratio * row->kern_size,
+ tosp_table_xht_sp_ratio * row->xheight);
+ sane_threshold = int32_t (floor ((sane_space + row->kern_size) / 2));
+
+ if ((row->space_size < sane_space) ||
+ (row->space_threshold < sane_threshold)) {
+ if (tosp_debug_level > 5)
+ tprintf ("B:%d R:%d -- SUSPECT NO SPACES %3.2f %d %3.2f.\n",
+ block_idx, row_idx,
+ row->kern_size,
+ row->space_threshold, row->space_size);
+ //the minimum sane value
+ row->space_threshold = static_cast<int32_t>(sane_space);
+ row->space_size = std::max(row->space_threshold + 1.0f, row->xheight);
+ }
+ }
+ }
+
+ /* Now lets try to put some error limits on the threshold */
+
+ if (tosp_old_to_method) {
+ /* Old textord made a space if gap >= threshold */
+ //NO FUZZY SPACES YET
+ row->max_nonspace = row->space_threshold;
+ //NO FUZZY SPACES YET
+ row->min_space = row->space_threshold + 1;
+ }
+ else {
+ /* Any gap greater than 0.6 x-ht is bound to be a space (isn't it:-) */
+ row->min_space =
+ std::min(int32_t (ceil (tosp_fuzzy_space_factor * row->xheight)),
+ int32_t (row->space_size));
+ if (row->min_space <= row->space_threshold)
+ // Don't be silly
+ row->min_space = row->space_threshold + 1;
+ /*
+ Lets try to guess the max certain kern gap by looking at the cluster of
+ kerns for the row. The row is proportional so the kerns should cluster
+ tightly at the bottom of the distribution. We also expect most gaps to be
+ kerns. Find the maximum of the kern piles between 0 and twice the kern
+ estimate. Piles before the first one with less than 1/10 the maximum
+ number of samples can be taken as certain kerns.
+
+ Of course, there are some cases where the kern peak and space peaks merge,
+ so we will put an UPPER limit on the max certain kern gap of some fraction
+ below the threshold.
+ */
+
+ max_max_nonspace = int32_t ((row->space_threshold + row->kern_size) / 2);
+
+ //default
+ row->max_nonspace = max_max_nonspace;
+ for (index = 0; index <= max_max_nonspace; index++) {
+ if (all_gap_stats.pile_count (index) > max)
+ max = all_gap_stats.pile_count (index);
+ if ((index > row->kern_size) &&
+ (all_gap_stats.pile_count (index) < 0.1 * max)) {
+ row->max_nonspace = index;
+ break;
+ }
+ }
+ }
+
+ /* Yet another algorithm - simpler this time - just choose a fraction of the
+ threshold to space range */
+
+ if ((tosp_fuzzy_sp_fraction > 0) &&
+ (row->space_size > row->space_threshold))
+ row->min_space = std::max(row->min_space,
+ static_cast<int32_t>(ceil (row->space_threshold +
+ tosp_fuzzy_sp_fraction *
+ (row->space_size -
+ row->space_threshold))));
+
+ /* Ensure that ANY space less than some multiplier times the kern size is
+ fuzzy. In tables there is a risk of erroneously setting a small space size
+ when there are no real spaces. Sometimes tables have text squashed into
+ columns so that the kn->sp ratio is small anyway - this means that we can't
+ use this to force a wider separation - hence we rely on context to join any
+ dubious breaks. */
+
+ if ((tosp_table_fuzzy_kn_sp_ratio > 0) &&
+ (suspected_table || tosp_fuzzy_limit_all))
+ row->min_space = std::max(row->min_space,
+ static_cast<int32_t>(ceil (tosp_table_fuzzy_kn_sp_ratio *
+ row->kern_size)));
+
+ if ((tosp_fuzzy_kn_fraction > 0) && (row->kern_size < row->space_threshold)) {
+ row->max_nonspace = static_cast<int32_t>(floor (0.5 + row->kern_size +
+ tosp_fuzzy_kn_fraction *
+ (row->space_threshold -
+ row->kern_size)));
+ }
+ if (row->max_nonspace > row->space_threshold) {
+ // Don't be silly
+ row->max_nonspace = row->space_threshold;
+ }
+
+ if (tosp_debug_level > 5)
+ tprintf
+ ("B:%d R:%d L:%d-- Kn:%d Sp:%d Thr:%d -- Kn:%3.2f (%d) Thr:%d (%d) Sp:%3.2f\n",
+ block_idx, row_idx, row_length, block_non_space_gap_width,
+ block_space_gap_width, real_space_threshold, row->kern_size,
+ row->max_nonspace, row->space_threshold, row->min_space,
+ row->space_size);
+ if (tosp_debug_level > 10)
+ tprintf("row->kern_size = %3.2f, row->space_size = %3.2f, "
+ "row->space_threshold = %d\n",
+ row->kern_size, row->space_size, row->space_threshold);
+}
+
+void Textord::old_to_method(
+ TO_ROW *row,
+ STATS *all_gap_stats,
+ STATS *space_gap_stats,
+ STATS *small_gap_stats,
+ int16_t block_space_gap_width, //estimate for block
+ int16_t block_non_space_gap_width //estimate for block
+ ) {
+ /* First, estimate row space size */
+ /* Old to condition was > 2 */
+ if (space_gap_stats->get_total () >= tosp_enough_space_samples_for_median) {
+ //Adequate samples
+ /* Set space size to median of spaces BUT limits it if it seems wildly out */
+ row->space_size = space_gap_stats->median ();
+ if (row->space_size > block_space_gap_width * 1.5) {
+ if (tosp_old_to_bug_fix)
+ row->space_size = block_space_gap_width * 1.5;
+ else
+ //BUG??? should be *1.5
+ row->space_size = block_space_gap_width;
+ }
+ if (row->space_size < (block_non_space_gap_width * 2) + 1)
+ row->space_size = (block_non_space_gap_width * 2) + 1;
+ }
+ //Only 1 or 2 samples
+ else if (space_gap_stats->get_total () >= 1) {
+ //hence mean not median
+ row->space_size = space_gap_stats->mean ();
+ if (row->space_size > block_space_gap_width * 1.5) {
+ if (tosp_old_to_bug_fix)
+ row->space_size = block_space_gap_width * 1.5;
+ else
+ //BUG??? should be *1.5
+ row->space_size = block_space_gap_width;
+ }
+ if (row->space_size < (block_non_space_gap_width * 3) + 1)
+ row->space_size = (block_non_space_gap_width * 3) + 1;
+ }
+ else {
+ //Use block default
+ row->space_size = block_space_gap_width;
+ }
+
+ /* Next, estimate row kern size */
+ if ((tosp_only_small_gaps_for_kern) &&
+ (small_gap_stats->get_total () > tosp_redo_kern_limit))
+ row->kern_size = small_gap_stats->median ();
+ else if (all_gap_stats->get_total () > tosp_redo_kern_limit)
+ row->kern_size = all_gap_stats->median ();
+ else //old TO -SAME FOR ALL ROWS
+ row->kern_size = block_non_space_gap_width;
+
+ /* Finally, estimate row space threshold */
+ if (tosp_threshold_bias2 > 0) {
+ row->space_threshold =
+ int32_t (floor (0.5 + row->kern_size +
+ tosp_threshold_bias2 * (row->space_size -
+ row->kern_size)));
+ } else {
+ /*
+ NOTE old text ord uses (space_size + kern_size + 1)/2 as the threshold
+ and holds this in a float. The use is with a >= test
+ NEW textord uses an integer threshold and a > test
+ It comes to the same thing.
+ (Though there is a difference in that old textor has integer space_size
+ and kern_size.)
+ */
+ row->space_threshold =
+ int32_t (floor ((row->space_size + row->kern_size) / 2));
+ }
+
+ // Apply the same logic and ratios as in row_spacing_stats to
+ // restrict relative values of the row's space_size, kern_size, and
+ // space_threshold
+ if (tosp_old_to_constrain_sp_kn && tosp_sanity_method == 1 &&
+ ((row->space_size <
+ tosp_min_sane_kn_sp * std::max(row->kern_size, 2.5f)) ||
+ ((row->space_size - row->kern_size) <
+ tosp_silly_kn_sp_gap * row->xheight))) {
+ if (row->kern_size > 2.5)
+ row->kern_size = row->space_size / tosp_min_sane_kn_sp;
+ row->space_threshold = int32_t (floor ((row->space_size + row->kern_size) /
+ tosp_old_sp_kn_th_factor));
+ }
+}
+
+
+/*************************************************************************
+ * isolated_row_stats()
+ * Set values for min_space, max_non_space based on row stats only
+ *************************************************************************/
+bool Textord::isolated_row_stats(TO_ROW* row,
+ GAPMAP* gapmap,
+ STATS* all_gap_stats,
+ bool suspected_table,
+ int16_t block_idx,
+ int16_t row_idx) {
+ float kern_estimate;
+ float crude_threshold_estimate;
+ int16_t small_gaps_count;
+ int16_t total;
+ //iterator
+ BLOBNBOX_IT blob_it = row->blob_list ();
+ STATS cert_space_gap_stats (0, MAXSPACING);
+ STATS all_space_gap_stats (0, MAXSPACING);
+ STATS small_gap_stats (0, MAXSPACING);
+ TBOX blob_box;
+ TBOX prev_blob_box;
+ int16_t gap_width;
+ int32_t end_of_row;
+ int32_t row_length;
+
+ kern_estimate = all_gap_stats->median ();
+ crude_threshold_estimate = std::max(tosp_init_guess_kn_mult * kern_estimate,
+ tosp_init_guess_xht_mult * row->xheight);
+ small_gaps_count = stats_count_under (all_gap_stats,
+ static_cast<int16_t>(ceil (crude_threshold_estimate)));
+ total = all_gap_stats->get_total ();
+
+ if ((total <= tosp_redo_kern_limit) ||
+ ((small_gaps_count / static_cast<float>(total)) < tosp_enough_small_gaps) ||
+ (total - small_gaps_count < 1)) {
+ if (tosp_debug_level > 5)
+ tprintf("B:%d R:%d -- Can't do isolated row stats.\n", block_idx,
+ row_idx);
+ return false;
+ }
+ blob_it.set_to_list (row->blob_list ());
+ blob_it.mark_cycle_pt ();
+ end_of_row = blob_it.data_relative (-1)->bounding_box ().right ();
+ if (tosp_use_pre_chopping)
+ blob_box = box_next_pre_chopped (&blob_it);
+ else if (tosp_stats_use_xht_gaps)
+ blob_box = reduced_box_next (row, &blob_it);
+ else
+ blob_box = box_next (&blob_it);
+ row_length = end_of_row - blob_box.left ();
+ prev_blob_box = blob_box;
+ while (!blob_it.cycled_list ()) {
+ if (tosp_use_pre_chopping)
+ blob_box = box_next_pre_chopped (&blob_it);
+ else if (tosp_stats_use_xht_gaps)
+ blob_box = reduced_box_next (row, &blob_it);
+ else
+ blob_box = box_next (&blob_it);
+ int16_t left = prev_blob_box.right();
+ int16_t right = blob_box.left();
+ gap_width = right - left;
+ if (!ignore_big_gap(row, row_length, gapmap, left, right) &&
+ (gap_width > crude_threshold_estimate)) {
+ if ((gap_width > tosp_fuzzy_space_factor2 * row->xheight) ||
+ ((gap_width > tosp_fuzzy_space_factor1 * row->xheight) &&
+ (!tosp_narrow_blobs_not_cert ||
+ (!narrow_blob (row, prev_blob_box) &&
+ !narrow_blob (row, blob_box)))) ||
+ (wide_blob (row, prev_blob_box) && wide_blob (row, blob_box)))
+ cert_space_gap_stats.add (gap_width, 1);
+ all_space_gap_stats.add (gap_width, 1);
+ }
+ if (gap_width < crude_threshold_estimate)
+ small_gap_stats.add (gap_width, 1);
+
+ prev_blob_box = blob_box;
+ }
+ if (cert_space_gap_stats.get_total () >=
+ tosp_enough_space_samples_for_median)
+ //median
+ row->space_size = cert_space_gap_stats.median ();
+ else if (suspected_table && (cert_space_gap_stats.get_total () > 0))
+ //to avoid spaced
+ row->space_size = cert_space_gap_stats.mean ();
+ // 1's in tables
+ else if (all_space_gap_stats.get_total () >=
+ tosp_enough_space_samples_for_median)
+ //median
+ row->space_size = all_space_gap_stats.median ();
+ else
+ row->space_size = all_space_gap_stats.mean ();
+
+ if (tosp_only_small_gaps_for_kern)
+ row->kern_size = small_gap_stats.median ();
+ else
+ row->kern_size = all_gap_stats->median ();
+ row->space_threshold =
+ int32_t (floor ((row->space_size + row->kern_size) / 2));
+ /* Sanity check */
+ if ((row->kern_size >= row->space_threshold) ||
+ (row->space_threshold >= row->space_size) ||
+ (row->space_threshold <= 0)) {
+ if (tosp_debug_level > 5)
+ tprintf ("B:%d R:%d -- Isolated row stats SANITY FAILURE: %f %d %f\n",
+ block_idx, row_idx,
+ row->kern_size, row->space_threshold, row->space_size);
+ row->kern_size = 0.0f;
+ row->space_threshold = 0;
+ row->space_size = 0.0f;
+ return false;
+ }
+
+ if (tosp_debug_level > 5)
+ tprintf ("B:%d R:%d -- Isolated row stats: %f %d %f\n",
+ block_idx, row_idx,
+ row->kern_size, row->space_threshold, row->space_size);
+ return true;
+}
+
+int16_t Textord::stats_count_under(STATS *stats, int16_t threshold) {
+ int16_t index;
+ int16_t total = 0;
+
+ for (index = 0; index < threshold; index++)
+ total += stats->pile_count (index);
+ return total;
+}
+
+
+/*************************************************************************
+ * improve_row_threshold()
+ * Try to recognise a "normal line" -
+ * > 25 gaps
+ * && space > 3 * kn && space > 10
+ * (I.e. reasonably large space and kn:sp ratio)
+ * && > 3/4 # gaps < kn + (sp - kn)/3
+ * (I.e. most gaps are well away from space estimate)
+ * && a gap of max(3, (sp - kn) / 3) empty histogram positions is found
+ * somewhere in the histogram between kn and sp
+ * THEN set the threshold and fuzzy limits to this gap - ie NO fuzzies
+ * NO!!!!! the bristol line has "11" with a gap of 12 between the 1's!!!
+ * try moving the default threshold to within this band but leave the
+ * fuzzy limit calculation as at present.
+ *************************************************************************/
+void Textord::improve_row_threshold(TO_ROW *row, STATS *all_gap_stats) {
+ float sp = row->space_size;
+ float kn = row->kern_size;
+ int16_t reqd_zero_width = 0;
+ int16_t zero_width = 0;
+ int16_t zero_start = 0;
+ int16_t index = 0;
+
+ if (tosp_debug_level > 10)
+ tprintf ("Improve row threshold 0");
+ if ((all_gap_stats->get_total () <= 25) ||
+ (sp <= 10) ||
+ (sp <= 3 * kn) ||
+ (stats_count_under (all_gap_stats,
+ static_cast<int16_t>(ceil (kn + (sp - kn) / 3 + 0.5))) <
+ (0.75 * all_gap_stats->get_total ())))
+ return;
+ if (tosp_debug_level > 10)
+ tprintf (" 1");
+ /*
+ Look for the first region of all 0's in the histogram which is wider than
+ max(3, (sp - kn) / 3) and starts between kn and sp. If found, and current
+ threshold is not within it, move the threshold so that is is just inside it.
+ */
+ reqd_zero_width = static_cast<int16_t>(floor ((sp - kn) / 3 + 0.5));
+ if (reqd_zero_width < 3)
+ reqd_zero_width = 3;
+
+ for (index = int16_t (ceil (kn)); index < int16_t (floor (sp)); index++) {
+ if (all_gap_stats->pile_count (index) == 0) {
+ if (zero_width == 0)
+ zero_start = index;
+ zero_width++;
+ }
+ else {
+ if (zero_width >= reqd_zero_width)
+ break;
+ else {
+ zero_width = 0;
+ }
+ }
+ }
+ index--;
+ if (tosp_debug_level > 10)
+ tprintf (" reqd_z_width: %d found %d 0's, starting %d; thresh: %d/n",
+ reqd_zero_width, zero_width, zero_start, row->space_threshold);
+ if ((zero_width < reqd_zero_width) ||
+ ((row->space_threshold >= zero_start) &&
+ (row->space_threshold <= index)))
+ return;
+ if (tosp_debug_level > 10)
+ tprintf (" 2");
+ if (row->space_threshold < zero_start) {
+ if (tosp_debug_level > 5)
+ tprintf
+ ("Improve row kn:%5.2f sp:%5.2f 0's: %d -> %d thresh:%d -> %d\n",
+ kn, sp, zero_start, index, row->space_threshold, zero_start);
+ row->space_threshold = zero_start;
+ }
+ if (row->space_threshold > index) {
+ if (tosp_debug_level > 5)
+ tprintf
+ ("Improve row kn:%5.2f sp:%5.2f 0's: %d -> %d thresh:%d -> %d\n",
+ kn, sp, zero_start, index, row->space_threshold, index);
+ row->space_threshold = index;
+ }
+}
+
+
+/**********************************************************************
+ * make_prop_words
+ *
+ * Convert a TO_ROW to a ROW.
+ **********************************************************************/
+ROW *Textord::make_prop_words(
+ TO_ROW *row, // row to make
+ FCOORD rotation // for drawing
+ ) {
+ bool bol; // start of line
+ /* prev_ values are for start of word being built. non prev_ values are for
+ the gap between the word being built and the next one. */
+ bool prev_fuzzy_sp; // probably space
+ bool prev_fuzzy_non; // probably not
+ uint8_t prev_blanks; // in front of word
+ bool fuzzy_sp = false; // probably space
+ bool fuzzy_non = false; // probably not
+ uint8_t blanks = 0; // in front of word
+ bool prev_gap_was_a_space = false;
+ bool break_at_next_gap = false;
+ ROW *real_row; // output row
+ C_OUTLINE_IT cout_it;
+ C_BLOB_LIST cblobs;
+ C_BLOB_IT cblob_it = &cblobs;
+ WERD_LIST words;
+ WERD *word; // new word
+ int32_t next_rep_char_word_right = INT32_MAX;
+ float repetition_spacing; // gap between repetitions
+ int32_t xstarts[2]; // row ends
+ int32_t prev_x; // end of prev blob
+ BLOBNBOX *bblob; // current blob
+ TBOX blob_box; // bounding box
+ BLOBNBOX_IT box_it; // iterator
+ TBOX prev_blob_box;
+ TBOX next_blob_box;
+ int16_t prev_gap = INT16_MAX;
+ int16_t current_gap = INT16_MAX;
+ int16_t next_gap = INT16_MAX;
+ int16_t prev_within_xht_gap = INT16_MAX;
+ int16_t current_within_xht_gap = INT16_MAX;
+ int16_t next_within_xht_gap = INT16_MAX;
+ int16_t word_count = 0;
+
+ // repeated char words
+ WERD_IT rep_char_it(&(row->rep_words));
+ if (!rep_char_it.empty ()) {
+ next_rep_char_word_right =
+ rep_char_it.data ()->bounding_box ().right ();
+ }
+
+ prev_x = -INT16_MAX;
+ cblob_it.set_to_list (&cblobs);
+ box_it.set_to_list (row->blob_list ());
+ // new words
+ WERD_IT word_it(&words);
+ bol = true;
+ prev_blanks = 0;
+ prev_fuzzy_sp = false;
+ prev_fuzzy_non = false;
+ if (!box_it.empty ()) {
+ xstarts[0] = box_it.data ()->bounding_box ().left ();
+ if (xstarts[0] > next_rep_char_word_right) {
+ /* We need to insert a repeated char word at the start of the row */
+ word = rep_char_it.extract ();
+ word_it.add_after_then_move (word);
+ /* Set spaces before repeated char word */
+ word->set_flag (W_BOL, true);
+ bol = false;
+ word->set_blanks (0);
+ //NO uncertainty
+ word->set_flag (W_FUZZY_SP, false);
+ word->set_flag (W_FUZZY_NON, false);
+ xstarts[0] = word->bounding_box ().left ();
+ /* Set spaces after repeated char word (and leave current word set) */
+ repetition_spacing = find_mean_blob_spacing (word);
+ current_gap = box_it.data ()->bounding_box ().left () -
+ next_rep_char_word_right;
+ current_within_xht_gap = current_gap;
+ if (current_gap > tosp_rep_space * repetition_spacing) {
+ prev_blanks = static_cast<uint8_t>(floor (current_gap / row->space_size));
+ if (prev_blanks < 1)
+ prev_blanks = 1;
+ }
+ else
+ prev_blanks = 0;
+ if (tosp_debug_level > 5)
+ tprintf ("Repch wd at BOL(%d, %d). rep spacing %5.2f; Rgap:%d ",
+ box_it.data ()->bounding_box ().left (),
+ box_it.data ()->bounding_box ().bottom (),
+ repetition_spacing, current_gap);
+ prev_fuzzy_sp = false;
+ prev_fuzzy_non = false;
+ if (rep_char_it.empty ()) {
+ next_rep_char_word_right = INT32_MAX;
+ }
+ else {
+ rep_char_it.forward ();
+ next_rep_char_word_right =
+ rep_char_it.data ()->bounding_box ().right ();
+ }
+ }
+
+ peek_at_next_gap(row,
+ box_it,
+ next_blob_box,
+ next_gap,
+ next_within_xht_gap);
+ do {
+ bblob = box_it.data ();
+ blob_box = bblob->bounding_box ();
+ if (bblob->joined_to_prev ()) {
+ if (bblob->cblob () != nullptr) {
+ cout_it.set_to_list (cblob_it.data ()->out_list ());
+ cout_it.move_to_last ();
+ cout_it.add_list_after (bblob->cblob ()->out_list ());
+ delete bblob->cblob ();
+ }
+ } else {
+ if (bblob->cblob() != nullptr)
+ cblob_it.add_after_then_move (bblob->cblob ());
+ prev_x = blob_box.right ();
+ }
+ box_it.forward (); //next one
+ bblob = box_it.data ();
+ blob_box = bblob->bounding_box ();
+
+ if (!bblob->joined_to_prev() && bblob->cblob() != nullptr) {
+ /* Real Blob - not multiple outlines or pre-chopped */
+ prev_gap = current_gap;
+ prev_within_xht_gap = current_within_xht_gap;
+ prev_blob_box = next_blob_box;
+ current_gap = next_gap;
+ current_within_xht_gap = next_within_xht_gap;
+ peek_at_next_gap(row,
+ box_it,
+ next_blob_box,
+ next_gap,
+ next_within_xht_gap);
+
+ int16_t prev_gap_arg = prev_gap;
+ int16_t next_gap_arg = next_gap;
+ if (tosp_only_use_xht_gaps) {
+ prev_gap_arg = prev_within_xht_gap;
+ next_gap_arg = next_within_xht_gap;
+ }
+ // Decide if a word-break should be inserted
+ if (blob_box.left () > next_rep_char_word_right ||
+ make_a_word_break(row, blob_box, prev_gap_arg, prev_blob_box,
+ current_gap, current_within_xht_gap,
+ next_blob_box, next_gap_arg,
+ blanks, fuzzy_sp, fuzzy_non,
+ prev_gap_was_a_space,
+ break_at_next_gap) ||
+ box_it.at_first()) {
+ /* Form a new word out of the blobs collected */
+ word = new WERD (&cblobs, prev_blanks, nullptr);
+ word_count++;
+ word_it.add_after_then_move (word);
+ if (bol) {
+ word->set_flag (W_BOL, true);
+ bol = false;
+ }
+ if (prev_fuzzy_sp)
+ //probably space
+ word->set_flag (W_FUZZY_SP, true);
+ else if (prev_fuzzy_non)
+ word->set_flag (W_FUZZY_NON, true);
+ //probably not
+
+ if (blob_box.left () > next_rep_char_word_right) {
+ /* We need to insert a repeated char word */
+ word = rep_char_it.extract ();
+ word_it.add_after_then_move (word);
+
+ /* Set spaces before repeated char word */
+ repetition_spacing = find_mean_blob_spacing (word);
+ current_gap = word->bounding_box ().left () - prev_x;
+ current_within_xht_gap = current_gap;
+ if (current_gap > tosp_rep_space * repetition_spacing) {
+ blanks =
+ static_cast<uint8_t>(floor (current_gap / row->space_size));
+ if (blanks < 1)
+ blanks = 1;
+ }
+ else
+ blanks = 0;
+ if (tosp_debug_level > 5)
+ tprintf
+ ("Repch wd (%d,%d) rep gap %5.2f; Lgap:%d (%d blanks);",
+ word->bounding_box ().left (),
+ word->bounding_box ().bottom (),
+ repetition_spacing, current_gap, blanks);
+ word->set_blanks (blanks);
+ //NO uncertainty
+ word->set_flag (W_FUZZY_SP, false);
+ word->set_flag (W_FUZZY_NON, false);
+
+ /* Set spaces after repeated char word (and leave current word set) */
+ current_gap =
+ blob_box.left () - next_rep_char_word_right;
+ if (current_gap > tosp_rep_space * repetition_spacing) {
+ blanks = static_cast<uint8_t>(current_gap / row->space_size);
+ if (blanks < 1)
+ blanks = 1;
+ }
+ else
+ blanks = 0;
+ if (tosp_debug_level > 5)
+ tprintf (" Rgap:%d (%d blanks)\n",
+ current_gap, blanks);
+ fuzzy_sp = false;
+ fuzzy_non = false;
+
+ if (rep_char_it.empty ()) {
+ next_rep_char_word_right = INT32_MAX;
+ }
+ else {
+ rep_char_it.forward ();
+ next_rep_char_word_right =
+ rep_char_it.data ()->bounding_box ().right ();
+ }
+ }
+
+ if (box_it.at_first () && rep_char_it.empty ()) {
+ //at end of line
+ word->set_flag (W_EOL, true);
+ xstarts[1] = prev_x;
+ }
+ else {
+ prev_blanks = blanks;
+ prev_fuzzy_sp = fuzzy_sp;
+ prev_fuzzy_non = fuzzy_non;
+ }
+ }
+ }
+ }
+ while (!box_it.at_first ()); //until back at start
+
+ /* Insert any further repeated char words */
+ while (!rep_char_it.empty ()) {
+ word = rep_char_it.extract ();
+ word_it.add_after_then_move (word);
+
+ /* Set spaces before repeated char word */
+ repetition_spacing = find_mean_blob_spacing (word);
+ current_gap = word->bounding_box ().left () - prev_x;
+ if (current_gap > tosp_rep_space * repetition_spacing) {
+ blanks = static_cast<uint8_t>(floor (current_gap / row->space_size));
+ if (blanks < 1)
+ blanks = 1;
+ }
+ else
+ blanks = 0;
+ if (tosp_debug_level > 5)
+ tprintf(
+ "Repch wd at EOL (%d,%d). rep spacing %5.2f; Lgap:%d (%d blanks)\n",
+ word->bounding_box().left(), word->bounding_box().bottom(),
+ repetition_spacing, current_gap, blanks);
+ word->set_blanks (blanks);
+ //NO uncertainty
+ word->set_flag (W_FUZZY_SP, false);
+ word->set_flag (W_FUZZY_NON, false);
+ prev_x = word->bounding_box ().right ();
+ if (rep_char_it.empty ()) {
+ //at end of line
+ word->set_flag (W_EOL, true);
+ xstarts[1] = prev_x;
+ }
+ else {
+ rep_char_it.forward ();
+ }
+ }
+ real_row = new ROW (row,
+ static_cast<int16_t>(row->kern_size), static_cast<int16_t>(row->space_size));
+ word_it.set_to_list (real_row->word_list ());
+ //put words in row
+ word_it.add_list_after (&words);
+ real_row->recalc_bounding_box ();
+
+ if (tosp_debug_level > 4) {
+ tprintf ("Row: Made %d words in row ((%d,%d)(%d,%d))\n",
+ word_count,
+ real_row->bounding_box ().left (),
+ real_row->bounding_box ().bottom (),
+ real_row->bounding_box ().right (),
+ real_row->bounding_box ().top ());
+ }
+ return real_row;
+ }
+ return nullptr;
+}
+
+/**********************************************************************
+ * make_blob_words
+ *
+ * Converts words into blobs so that each blob is a single character.
+ * Used for chopper test.
+ **********************************************************************/
+ROW *Textord::make_blob_words(
+ TO_ROW *row, // row to make
+ FCOORD rotation // for drawing
+ ) {
+ bool bol; // start of line
+ ROW *real_row; // output row
+ C_OUTLINE_IT cout_it;
+ C_BLOB_LIST cblobs;
+ C_BLOB_IT cblob_it = &cblobs;
+ WERD_LIST words;
+ WERD *word; // new word
+ BLOBNBOX *bblob; // current blob
+ TBOX blob_box; // bounding box
+ BLOBNBOX_IT box_it; // iterator
+ int16_t word_count = 0;
+
+ cblob_it.set_to_list(&cblobs);
+ box_it.set_to_list(row->blob_list());
+ // new words
+ WERD_IT word_it(&words);
+ bol = true;
+ if (!box_it.empty()) {
+
+ do {
+ bblob = box_it.data();
+ blob_box = bblob->bounding_box();
+ if (bblob->joined_to_prev()) {
+ if (bblob->cblob() != nullptr) {
+ cout_it.set_to_list(cblob_it.data()->out_list());
+ cout_it.move_to_last();
+ cout_it.add_list_after(bblob->cblob()->out_list());
+ delete bblob->cblob();
+ }
+ } else {
+ if (bblob->cblob() != nullptr)
+ cblob_it.add_after_then_move(bblob->cblob());
+ }
+ box_it.forward(); // next one
+ bblob = box_it.data();
+ blob_box = bblob->bounding_box();
+
+ if (!bblob->joined_to_prev() && !cblobs.empty()) {
+ word = new WERD(&cblobs, 1, nullptr);
+ word_count++;
+ word_it.add_after_then_move(word);
+ if (bol) {
+ word->set_flag(W_BOL, true);
+ bol = false;
+ }
+ if (box_it.at_first()) { // at end of line
+ word->set_flag(W_EOL, true);
+ }
+ }
+ }
+ while (!box_it.at_first()); // until back at start
+ /* Setup the row with created words. */
+ real_row = new ROW(row, static_cast<int16_t>(row->kern_size), static_cast<int16_t>(row->space_size));
+ word_it.set_to_list(real_row->word_list());
+ //put words in row
+ word_it.add_list_after(&words);
+ real_row->recalc_bounding_box();
+ if (tosp_debug_level > 4) {
+ tprintf ("Row:Made %d words in row ((%d,%d)(%d,%d))\n",
+ word_count,
+ real_row->bounding_box().left(),
+ real_row->bounding_box().bottom(),
+ real_row->bounding_box().right(),
+ real_row->bounding_box().top());
+ }
+ return real_row;
+ }
+ return nullptr;
+}
+
+bool Textord::make_a_word_break(
+ TO_ROW* row, // row being made
+ TBOX blob_box, // for next_blob // how many blanks?
+ int16_t prev_gap,
+ TBOX prev_blob_box,
+ int16_t real_current_gap,
+ int16_t within_xht_current_gap,
+ TBOX next_blob_box,
+ int16_t next_gap,
+ uint8_t& blanks,
+ bool& fuzzy_sp,
+ bool& fuzzy_non,
+ bool& prev_gap_was_a_space,
+ bool& break_at_next_gap) {
+ bool space;
+ int16_t current_gap;
+ float fuzzy_sp_to_kn_limit;
+
+ if (break_at_next_gap) {
+ break_at_next_gap = false;
+ return true;
+ }
+ /* Inhibit using the reduced gap if
+ The kerning is large - chars are not kerned and reducing "f"s can cause
+ erroneous blanks
+ OR The real gap is less than 0
+ OR The real gap is less than the kerning estimate
+ */
+ if ((row->kern_size > tosp_large_kerning * row->xheight) ||
+ ((tosp_dont_fool_with_small_kerns >= 0) &&
+ (real_current_gap < tosp_dont_fool_with_small_kerns * row->kern_size)))
+ //Ignore the difference
+ within_xht_current_gap = real_current_gap;
+
+ if (tosp_use_xht_gaps && tosp_only_use_xht_gaps)
+ current_gap = within_xht_current_gap;
+ else
+ current_gap = real_current_gap;
+
+ if (tosp_old_to_method) {
+ //Boring old method
+ space = current_gap > row->max_nonspace;
+ if (space && (current_gap < INT16_MAX)) {
+ if (current_gap < row->min_space) {
+ if (current_gap > row->space_threshold) {
+ blanks = 1;
+ fuzzy_sp = true;
+ fuzzy_non = false;
+ }
+ else {
+ blanks = 0;
+ fuzzy_sp = false;
+ fuzzy_non = true;
+ }
+ }
+ else {
+ blanks = static_cast<uint8_t>(current_gap / row->space_size);
+ if (blanks < 1)
+ blanks = 1;
+ fuzzy_sp = false;
+ fuzzy_non = false;
+ }
+ }
+ return space;
+ }
+ else {
+ /* New exciting heuristic method */
+ if (prev_blob_box.null_box ()) // Beginning of row
+ prev_gap_was_a_space = true;
+
+ //Default as old TO
+ space = current_gap > row->space_threshold;
+
+ /* Set defaults for the word break in case we find one. Currently there are
+ no fuzzy spaces. Depending on the reliability of the different heuristics
+ we may need to set PARTICULAR spaces to fuzzy or not. The values will ONLY
+ be used if the function returns true - ie the word is to be broken.
+ */
+ int num_blanks = current_gap;
+ if (row->space_size > 1.0f)
+ num_blanks = IntCastRounded(current_gap / row->space_size);
+ blanks = static_cast<uint8_t>(ClipToRange<int>(num_blanks, 1, UINT8_MAX));
+ fuzzy_sp = false;
+ fuzzy_non = false;
+ /*
+ If xht measure causes gap to flip one of the 3 thresholds act accordingly -
+ despite any other heuristics - the MINIMUM action is to pass a fuzzy kern to
+ context.
+ */
+ if (tosp_use_xht_gaps &&
+ (real_current_gap <= row->max_nonspace) &&
+ (within_xht_current_gap > row->max_nonspace)) {
+ space = true;
+ fuzzy_non = true;
+#ifndef GRAPHICS_DISABLED
+ mark_gap (blob_box, 20,
+ prev_gap, prev_blob_box.width (),
+ current_gap, next_blob_box.width (), next_gap);
+#endif
+ }
+ else if (tosp_use_xht_gaps &&
+ (real_current_gap <= row->space_threshold) &&
+ (within_xht_current_gap > row->space_threshold)) {
+ space = true;
+ if (tosp_flip_fuzz_kn_to_sp)
+ fuzzy_sp = true;
+ else
+ fuzzy_non = true;
+#ifndef GRAPHICS_DISABLED
+ mark_gap (blob_box, 21,
+ prev_gap, prev_blob_box.width (),
+ current_gap, next_blob_box.width (), next_gap);
+#endif
+ }
+ else if (tosp_use_xht_gaps &&
+ (real_current_gap < row->min_space) &&
+ (within_xht_current_gap >= row->min_space)) {
+ space = true;
+#ifndef GRAPHICS_DISABLED
+ mark_gap (blob_box, 22,
+ prev_gap, prev_blob_box.width (),
+ current_gap, next_blob_box.width (), next_gap);
+#endif
+ }
+ else if (tosp_force_wordbreak_on_punct &&
+ !suspected_punct_blob(row, prev_blob_box) &&
+ suspected_punct_blob(row, blob_box)) {
+ break_at_next_gap = true;
+ }
+ /* Now continue with normal heuristics */
+ else if ((current_gap < row->min_space) &&
+ (current_gap > row->space_threshold)) {
+ /* Heuristics to turn dubious spaces to kerns */
+ if (tosp_pass_wide_fuzz_sp_to_context > 0)
+ fuzzy_sp_to_kn_limit = row->kern_size +
+ tosp_pass_wide_fuzz_sp_to_context *
+ (row->space_size - row->kern_size);
+ else
+ fuzzy_sp_to_kn_limit = 99999.0f;
+
+ /* If current gap is significantly smaller than the previous space the other
+ side of a narrow blob then this gap is a kern. */
+ if ((prev_blob_box.width () > 0) &&
+ narrow_blob (row, prev_blob_box) &&
+ prev_gap_was_a_space &&
+ (current_gap <= tosp_gap_factor * prev_gap)) {
+ if ((tosp_all_flips_fuzzy) ||
+ (current_gap > fuzzy_sp_to_kn_limit)) {
+ if (tosp_flip_fuzz_sp_to_kn)
+ fuzzy_non = true;
+ else
+ fuzzy_sp = true;
+ }
+ else
+ space = false;
+#ifndef GRAPHICS_DISABLED
+ mark_gap (blob_box, 1,
+ prev_gap, prev_blob_box.width (),
+ current_gap, next_blob_box.width (), next_gap);
+#endif
+ }
+ /* If current gap not much bigger than the previous kern the other side of a
+ narrow blob then this gap is a kern as well */
+ else if ((prev_blob_box.width () > 0) &&
+ narrow_blob (row, prev_blob_box) &&
+ !prev_gap_was_a_space &&
+ (current_gap * tosp_gap_factor <= prev_gap)) {
+ if ((tosp_all_flips_fuzzy) ||
+ (current_gap > fuzzy_sp_to_kn_limit)) {
+ if (tosp_flip_fuzz_sp_to_kn)
+ fuzzy_non = true;
+ else
+ fuzzy_sp = true;
+ }
+ else
+ space = false;
+#ifndef GRAPHICS_DISABLED
+ mark_gap (blob_box, 2,
+ prev_gap, prev_blob_box.width (),
+ current_gap, next_blob_box.width (), next_gap);
+#endif
+ }
+ else if ((next_blob_box.width () > 0) &&
+ narrow_blob (row, next_blob_box) &&
+ (next_gap > row->space_threshold) &&
+ (current_gap <= tosp_gap_factor * next_gap)) {
+ if ((tosp_all_flips_fuzzy) ||
+ (current_gap > fuzzy_sp_to_kn_limit)) {
+ if (tosp_flip_fuzz_sp_to_kn)
+ fuzzy_non = true;
+ else
+ fuzzy_sp = true;
+ }
+ else
+ space = false;
+#ifndef GRAPHICS_DISABLED
+ mark_gap (blob_box, 3,
+ prev_gap, prev_blob_box.width (),
+ current_gap, next_blob_box.width (), next_gap);
+#endif
+ }
+ else if ((next_blob_box.width () > 0) &&
+ narrow_blob (row, next_blob_box) &&
+ (next_gap <= row->space_threshold) &&
+ (current_gap * tosp_gap_factor <= next_gap)) {
+ if ((tosp_all_flips_fuzzy) ||
+ (current_gap > fuzzy_sp_to_kn_limit)) {
+ if (tosp_flip_fuzz_sp_to_kn)
+ fuzzy_non = true;
+ else
+ fuzzy_sp = true;
+ }
+ else
+ space = false;
+#ifndef GRAPHICS_DISABLED
+ mark_gap (blob_box, 4,
+ prev_gap, prev_blob_box.width (),
+ current_gap, next_blob_box.width (), next_gap);
+#endif
+ }
+ else if ((((next_blob_box.width () > 0) &&
+ narrow_blob (row, next_blob_box)) ||
+ ((prev_blob_box.width () > 0) &&
+ narrow_blob (row, prev_blob_box)))) {
+ fuzzy_sp = true;
+#ifndef GRAPHICS_DISABLED
+ mark_gap (blob_box, 6,
+ prev_gap, prev_blob_box.width (),
+ current_gap, next_blob_box.width (), next_gap);
+#endif
+ }
+ }
+ else if ((current_gap > row->max_nonspace) &&
+ (current_gap <= row->space_threshold)) {
+
+ /* Heuristics to turn dubious kerns to spaces */
+ /* TRIED THIS BUT IT MADE THINGS WORSE
+ if (prev_gap == INT16_MAX)
+ prev_gap = 0; // start of row
+ if (next_gap == INT16_MAX)
+ next_gap = 0; // end of row
+ */
+ if ((prev_blob_box.width () > 0) &&
+ (next_blob_box.width () > 0) &&
+ (current_gap >=
+ tosp_kern_gap_factor1 * std::max(prev_gap, next_gap)) &&
+ wide_blob (row, prev_blob_box) &&
+ wide_blob (row, next_blob_box)) {
+
+ space = true;
+ /*
+ tosp_flip_caution is an attempt to stop the default changing in cases
+ where there is a large difference between the kern and space estimates.
+ See problem in 'chiefs' where "have" gets split in the quotation.
+ */
+ if ((tosp_flip_fuzz_kn_to_sp) &&
+ ((tosp_flip_caution <= 0) ||
+ (tosp_flip_caution * row->kern_size > row->space_size)))
+ fuzzy_sp = true;
+ else
+ fuzzy_non = true;
+#ifndef GRAPHICS_DISABLED
+ mark_gap (blob_box, 7,
+ prev_gap, prev_blob_box.width (),
+ current_gap, next_blob_box.width (), next_gap);
+#endif
+ } else if (prev_blob_box.width() > 0 &&
+ next_blob_box.width() > 0 &&
+ current_gap > 5 && // Rule 9 handles small gap, big ratio.
+ current_gap >=
+ tosp_kern_gap_factor2 * std::max(prev_gap, next_gap) &&
+ !(narrow_blob(row, prev_blob_box) ||
+ suspected_punct_blob(row, prev_blob_box)) &&
+ !(narrow_blob(row, next_blob_box) ||
+ suspected_punct_blob(row, next_blob_box))) {
+ space = true;
+ fuzzy_non = true;
+#ifndef GRAPHICS_DISABLED
+ mark_gap (blob_box, 8,
+ prev_gap, prev_blob_box.width (),
+ current_gap, next_blob_box.width (), next_gap);
+#endif
+ }
+ else if ((tosp_kern_gap_factor3 > 0) &&
+ (prev_blob_box.width () > 0) &&
+ (next_blob_box.width () > 0) &&
+ (current_gap >= tosp_kern_gap_factor3 * std::max(prev_gap, next_gap)) &&
+ (!tosp_rule_9_test_punct ||
+ (!suspected_punct_blob (row, prev_blob_box) &&
+ !suspected_punct_blob (row, next_blob_box)))) {
+ space = true;
+ fuzzy_non = true;
+#ifndef GRAPHICS_DISABLED
+ mark_gap (blob_box, 9,
+ prev_gap, prev_blob_box.width (),
+ current_gap, next_blob_box.width (), next_gap);
+#endif
+ }
+ }
+ if (tosp_debug_level > 10)
+ tprintf("word break = %d current_gap = %d, prev_gap = %d, "
+ "next_gap = %d\n", space ? 1 : 0, current_gap,
+ prev_gap, next_gap);
+ prev_gap_was_a_space = space && !(fuzzy_non);
+ return space;
+ }
+}
+
+bool Textord::narrow_blob(TO_ROW* row, TBOX blob_box) {
+ bool result;
+ result = ((blob_box.width () <= tosp_narrow_fraction * row->xheight) ||
+ ((static_cast<float>(blob_box.width ()) / blob_box.height ()) <=
+ tosp_narrow_aspect_ratio));
+ return result;
+}
+
+bool Textord::wide_blob(TO_ROW* row, TBOX blob_box) {
+ bool result;
+ if (tosp_wide_fraction > 0) {
+ if (tosp_wide_aspect_ratio > 0)
+ result = ((blob_box.width () >= tosp_wide_fraction * row->xheight) &&
+ ((static_cast<float>(blob_box.width ()) / blob_box.height ()) >
+ tosp_wide_aspect_ratio));
+ else
+ result = (blob_box.width () >= tosp_wide_fraction * row->xheight);
+ }
+ else
+ result = !narrow_blob (row, blob_box);
+ return result;
+}
+
+bool Textord::suspected_punct_blob(TO_ROW* row, TBOX box) {
+ bool result;
+ float baseline;
+ float blob_x_centre;
+ /* Find baseline of centre of blob */
+ blob_x_centre = (box.right () + box.left ()) / 2.0;
+ baseline = row->baseline.y (blob_x_centre);
+
+ result = (box.height () <= 0.66 * row->xheight) ||
+ (box.top () < baseline + row->xheight / 2.0) ||
+ (box.bottom () > baseline + row->xheight / 2.0);
+ return result;
+}
+
+
+void Textord::peek_at_next_gap(TO_ROW *row,
+ BLOBNBOX_IT box_it,
+ TBOX &next_blob_box,
+ int16_t &next_gap,
+ int16_t &next_within_xht_gap) {
+ TBOX next_reduced_blob_box;
+ TBOX bit_beyond;
+ BLOBNBOX_IT reduced_box_it = box_it;
+
+ next_blob_box = box_next (&box_it);
+ next_reduced_blob_box = reduced_box_next (row, &reduced_box_it);
+ if (box_it.at_first ()) {
+ next_gap = INT16_MAX;
+ next_within_xht_gap = INT16_MAX;
+ }
+ else {
+ bit_beyond = box_it.data ()->bounding_box ();
+ next_gap = bit_beyond.left () - next_blob_box.right ();
+ bit_beyond = reduced_box_next (row, &reduced_box_it);
+ next_within_xht_gap =
+ bit_beyond.left () - next_reduced_blob_box.right ();
+ }
+}
+
+
+#ifndef GRAPHICS_DISABLED
+void Textord::mark_gap(
+ TBOX blob, // blob following gap
+ int16_t rule, // heuristic id
+ int16_t prev_gap,
+ int16_t prev_blob_width,
+ int16_t current_gap,
+ int16_t next_blob_width,
+ int16_t next_gap) {
+ ScrollView::Color col; //of ellipse marking flipped gap
+
+ switch (rule) {
+ case 1:
+ col = ScrollView::RED;
+ break;
+ case 2:
+ col = ScrollView::CYAN;
+ break;
+ case 3:
+ col = ScrollView::GREEN;
+ break;
+ case 4:
+ col = ScrollView::BLACK;
+ break;
+ case 5:
+ col = ScrollView::MAGENTA;
+ break;
+ case 6:
+ col = ScrollView::BLUE;
+ break;
+
+ case 7:
+ col = ScrollView::WHITE;
+ break;
+ case 8:
+ col = ScrollView::YELLOW;
+ break;
+ case 9:
+ col = ScrollView::BLACK;
+ break;
+
+ case 20:
+ col = ScrollView::CYAN;
+ break;
+ case 21:
+ col = ScrollView::GREEN;
+ break;
+ case 22:
+ col = ScrollView::MAGENTA;
+ break;
+ default:
+ col = ScrollView::BLACK;
+ }
+ if (textord_show_initial_words) {
+ to_win->Pen(col);
+ /* if (rule < 20)
+ //interior_style(to_win, INT_SOLID, false);
+ else
+ //interior_style(to_win, INT_HOLLOW, true);*/
+ //x radius
+ to_win->Ellipse (current_gap / 2.0f,
+ blob.height () / 2.0f, //y radius
+ //x centre
+ blob.left () - current_gap / 2.0f,
+ //y centre
+ blob.bottom () + blob.height () / 2.0f);
+ }
+ if (tosp_debug_level > 5)
+ tprintf(" (%d,%d) Sp<->Kn Rule %d %d %d %d %d %d\n",
+ blob.left() - current_gap / 2, blob.bottom(), rule, prev_gap,
+ prev_blob_width, current_gap, next_blob_width, next_gap);
+}
+#endif
+
+float Textord::find_mean_blob_spacing(WERD *word) {
+ C_BLOB_IT cblob_it;
+ TBOX blob_box;
+ int32_t gap_sum = 0;
+ int16_t gap_count = 0;
+ int16_t prev_right;
+
+ cblob_it.set_to_list (word->cblob_list ());
+ if (!cblob_it.empty ()) {
+ cblob_it.mark_cycle_pt ();
+ prev_right = cblob_it.data ()->bounding_box ().right ();
+ //first blob
+ cblob_it.forward ();
+ for (; !cblob_it.cycled_list (); cblob_it.forward ()) {
+ blob_box = cblob_it.data ()->bounding_box ();
+ gap_sum += blob_box.left () - prev_right;
+ gap_count++;
+ prev_right = blob_box.right ();
+ }
+ }
+ if (gap_count > 0)
+ return (gap_sum / static_cast<float>(gap_count));
+ else
+ return 0.0f;
+}
+
+
+bool Textord::ignore_big_gap(TO_ROW* row,
+ int32_t row_length,
+ GAPMAP* gapmap,
+ int16_t left,
+ int16_t right) {
+ int16_t gap = right - left + 1;
+
+ if (tosp_ignore_big_gaps > 999) return false; // Don't ignore
+ if (tosp_ignore_big_gaps > 0)
+ return (gap > tosp_ignore_big_gaps * row->xheight);
+ if (gap > tosp_ignore_very_big_gaps * row->xheight)
+ return true;
+ if (tosp_ignore_big_gaps == 0) {
+ if ((gap > 2.1 * row->xheight) && (row_length > 20 * row->xheight))
+ return true;
+ if ((gap > 1.75 * row->xheight) &&
+ ((row_length > 35 * row->xheight) ||
+ gapmap->table_gap (left, right)))
+ return true;
+ }
+ else {
+ /* ONLY time gaps < 3.0 * xht are ignored is when they are part of a table */
+ if ((gap > gapmap_big_gaps * row->xheight) &&
+ gapmap->table_gap (left, right))
+ return true;
+ }
+ return false;
+}
+
+/**********************************************************************
+ * reduced_box_next
+ *
+ * Compute the bounding box of this blob with merging of x overlaps
+ * but no pre-chopping.
+ * Then move the iterator on to the start of the next blob.
+ * DON'T reduce the box for small things - eg punctuation.
+ **********************************************************************/
+TBOX Textord::reduced_box_next(
+ TO_ROW *row, // current row
+ BLOBNBOX_IT *it // iterator to blobds
+ ) {
+ BLOBNBOX *blob; //current blob
+ BLOBNBOX *head_blob; //place to store box
+ TBOX full_box; //full blob boundg box
+ TBOX reduced_box; //box of significant part
+ int16_t left_above_xht; //ABOVE xht left limit
+ int16_t new_left_above_xht; //ABOVE xht left limit
+
+ blob = it->data ();
+ if (blob->red_box_set ()) {
+ reduced_box = blob->reduced_box ();
+ do {
+ it->forward();
+ blob = it->data();
+ }
+ while (blob->cblob() == nullptr || blob->joined_to_prev());
+ return reduced_box;
+ }
+ head_blob = blob;
+ full_box = blob->bounding_box ();
+ reduced_box = reduced_box_for_blob (blob, row, &left_above_xht);
+ do {
+ it->forward ();
+ blob = it->data ();
+ if (blob->cblob() == nullptr)
+ //was pre-chopped
+ full_box += blob->bounding_box ();
+ else if (blob->joined_to_prev ()) {
+ reduced_box +=
+ reduced_box_for_blob(blob, row, &new_left_above_xht);
+ left_above_xht = std::min(left_above_xht, new_left_above_xht);
+ }
+ }
+ //until next real blob
+ while (blob->cblob() == nullptr || blob->joined_to_prev());
+
+ if ((reduced_box.width () > 0) &&
+ ((reduced_box.left () + tosp_near_lh_edge * reduced_box.width ())
+ < left_above_xht) && (reduced_box.height () > 0.7 * row->xheight)) {
+#ifndef GRAPHICS_DISABLED
+ if (textord_show_initial_words)
+ reduced_box.plot (to_win, ScrollView::YELLOW, ScrollView::YELLOW);
+#endif
+ }
+ else
+ reduced_box = full_box;
+ head_blob->set_reduced_box (reduced_box);
+ return reduced_box;
+}
+
+
+/*************************************************************************
+ * reduced_box_for_blob()
+ * Find box for blob which is the same height and y position as the whole blob,
+ * but whose left limit is the left most position of the blob ABOVE the
+ * baseline and whose right limit is the right most position of the blob BELOW
+ * the xheight.
+ *
+ *
+ * !!!!!!! WONT WORK WITH LARGE UPPER CASE CHARS - T F V W - look at examples on
+ * "home". Perhaps we need something which say if the width ABOVE the
+ * xht alone includes the whole of the reduced width, then use the full
+ * blob box - Might still fail on italic F
+ *
+ * Alternatively we could be a little less severe and only reduce the
+ * left and right edges by half the difference between the full box and
+ * the reduced box.
+ *
+ * NOTE that we need to rotate all the coordinates as
+ * find_blob_limits finds the y min and max within a specified x band
+ *************************************************************************/
+TBOX Textord::reduced_box_for_blob(
+ BLOBNBOX *blob,
+ TO_ROW *row,
+ int16_t *left_above_xht) {
+ float baseline;
+ float blob_x_centre;
+ float left_limit;
+ float right_limit;
+ float junk;
+ TBOX blob_box;
+
+ /* Find baseline of centre of blob */
+
+ blob_box = blob->bounding_box ();
+ blob_x_centre = (blob_box.left () + blob_box.right ()) / 2.0;
+ baseline = row->baseline.y (blob_x_centre);
+
+ /*
+ Find LH limit of blob ABOVE the xht. This is so that we can detect certain
+ caps ht chars which should NOT have their box reduced: T, Y, V, W etc
+ */
+ left_limit = static_cast<float>(INT32_MAX);
+ junk = static_cast<float>(-INT32_MAX);
+ find_cblob_hlimits(blob->cblob(), (baseline + 1.1 * row->xheight),
+ static_cast<float>(INT16_MAX), left_limit, junk);
+ if (left_limit > junk)
+ *left_above_xht = INT16_MAX; //No area above xht
+ else
+ *left_above_xht = static_cast<int16_t>(floor (left_limit));
+ /*
+ Find reduced LH limit of blob - the left extent of the region ABOVE the
+ baseline.
+ */
+ left_limit = static_cast<float>(INT32_MAX);
+ junk = static_cast<float>(-INT32_MAX);
+ find_cblob_hlimits(blob->cblob(), baseline, static_cast<float>(INT16_MAX),
+ left_limit, junk);
+
+ if (left_limit > junk)
+ return TBOX (); //no area within xht so return empty box
+ /*
+ Find reduced RH limit of blob - the right extent of the region BELOW the xht.
+ */
+ junk = static_cast<float>(INT32_MAX);
+ right_limit = static_cast<float>(-INT32_MAX);
+ find_cblob_hlimits(blob->cblob(), static_cast<float>(-INT16_MAX),
+ (baseline + row->xheight), junk, right_limit);
+ if (junk > right_limit)
+ return TBOX (); //no area within xht so return empty box
+
+ return TBOX (ICOORD (static_cast<int16_t>(floor (left_limit)), blob_box.bottom ()),
+ ICOORD (static_cast<int16_t>(ceil (right_limit)), blob_box.top ()));
+}
+} // namespace tesseract
diff --git a/tesseract/src/textord/tovars.cpp b/tesseract/src/textord/tovars.cpp
new file mode 100644
index 00000000..22e838f0
--- /dev/null
+++ b/tesseract/src/textord/tovars.cpp
@@ -0,0 +1,85 @@
+/**********************************************************************
+ * File: tovars.cpp (Formerly to_vars.c)
+ * Description: Variables used by textord.
+ * Author: Ray Smith
+ * Created: Tue Aug 24 16:55:02 BST 1993
+ *
+ * (C) Copyright 1993, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#include "tovars.h"
+#include "params.h"
+
+namespace tesseract {
+
+BOOL_VAR (textord_show_initial_words, false, "Display separate words");
+BOOL_VAR (textord_show_new_words, false, "Display separate words");
+BOOL_VAR (textord_show_fixed_words, false,
+"Display forced fixed pitch words");
+BOOL_VAR (textord_blocksall_fixed, false, "Moan about prop blocks");
+BOOL_VAR (textord_blocksall_prop, false,
+"Moan about fixed pitch blocks");
+BOOL_VAR (textord_blocksall_testing, false, "Dump stats when moaning");
+BOOL_VAR (textord_test_mode, false, "Do current test");
+INT_VAR (textord_dotmatrix_gap, 3,
+"Max pixel gap for broken pixed pitch");
+INT_VAR (textord_debug_block, 0, "Block to do debug on");
+INT_VAR (textord_pitch_range, 2, "Max range test on pitch");
+double_VAR (textord_wordstats_smooth_factor, 0.05,
+"Smoothing gap stats");
+double_VAR (textord_width_smooth_factor, 0.10,
+"Smoothing width stats");
+double_VAR (textord_words_width_ile, 0.4,
+"Ile of blob widths for space est");
+double_VAR (textord_words_maxspace, 4.0, "Multiple of xheight");
+double_VAR (textord_words_default_maxspace, 3.5,
+"Max believable third space");
+double_VAR (textord_words_default_minspace, 0.6,
+"Fraction of xheight");
+double_VAR (textord_words_min_minspace, 0.3, "Fraction of xheight");
+double_VAR (textord_words_default_nonspace, 0.2,
+"Fraction of xheight");
+double_VAR(textord_words_initial_lower, 0.25,
+ "Max initial cluster size");
+double_VAR (textord_words_initial_upper, 0.15,
+"Min initial cluster spacing");
+double_VAR (textord_words_minlarge, 0.75,
+"Fraction of valid gaps needed");
+double_VAR (textord_words_pitchsd_threshold, 0.040,
+"Pitch sync threshold");
+double_VAR (textord_words_def_fixed, 0.016,
+"Threshold for definite fixed");
+double_VAR (textord_words_def_prop, 0.090,
+"Threshold for definite prop");
+INT_VAR (textord_words_veto_power, 5,
+"Rows required to outvote a veto");
+double_VAR (textord_pitch_rowsimilarity, 0.08,
+"Fraction of xheight for sameness");
+BOOL_VAR (textord_pitch_scalebigwords, false,
+"Scale scores on big words");
+double_VAR(words_initial_lower, 0.5, "Max initial cluster size");
+double_VAR (words_initial_upper, 0.15, "Min initial cluster spacing");
+double_VAR (words_default_prop_nonspace, 0.25, "Fraction of xheight");
+double_VAR (words_default_fixed_space, 0.75, "Fraction of xheight");
+double_VAR (words_default_fixed_limit, 0.6, "Allowed size variance");
+double_VAR (textord_words_definite_spread, 0.30,
+"Non-fuzzy spacing region");
+double_VAR (textord_spacesize_ratiofp, 2.8,
+"Min ratio space/nonspace");
+double_VAR (textord_spacesize_ratioprop, 2.0,
+"Min ratio space/nonspace");
+double_VAR (textord_fpiqr_ratio, 1.5, "Pitch IQR/Gap IQR threshold");
+double_VAR (textord_max_pitch_iqr, 0.20, "Xh fraction noise in pitch");
+double_VAR (textord_fp_min_width, 0.5, "Min width of decent blobs");
+
+} // namespace tesseract
diff --git a/tesseract/src/textord/tovars.h b/tesseract/src/textord/tovars.h
new file mode 100644
index 00000000..79d297a4
--- /dev/null
+++ b/tesseract/src/textord/tovars.h
@@ -0,0 +1,94 @@
+/**********************************************************************
+ * File: tovars.h (Formerly to_vars.h)
+ * Description: Variables used by textord.
+ * Author: Ray Smith
+ * Created: Tue Aug 24 16:55:02 BST 1993
+ *
+ * (C) Copyright 1993, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef TOVARS_H
+#define TOVARS_H
+
+#include "params.h"
+
+namespace tesseract {
+
+extern BOOL_VAR_H (textord_show_initial_words, false,
+"Display separate words");
+extern BOOL_VAR_H (textord_show_new_words, false, "Display separate words");
+extern BOOL_VAR_H (textord_show_fixed_words, false,
+"Display forced fixed pitch words");
+extern BOOL_VAR_H (textord_blocksall_fixed, false, "Moan about prop blocks");
+extern BOOL_VAR_H (textord_blocksall_prop, false,
+"Moan about fixed pitch blocks");
+extern BOOL_VAR_H (textord_blocksall_testing, false,
+"Dump stats when moaning");
+extern BOOL_VAR_H (textord_test_mode, false, "Do current test");
+extern INT_VAR_H (textord_dotmatrix_gap, 3,
+"Max pixel gap for broken pixed pitch");
+extern INT_VAR_H (textord_debug_block, 0, "Block to do debug on");
+extern INT_VAR_H (textord_pitch_range, 2, "Max range test on pitch");
+extern double_VAR_H (textord_wordstats_smooth_factor, 0.05,
+"Smoothing gap stats");
+extern double_VAR_H (textord_width_smooth_factor, 0.10,
+"Smoothing width stats");
+extern double_VAR_H (textord_words_width_ile, 0.4,
+"Ile of blob widths for space est");
+extern double_VAR_H (textord_words_maxspace, 4.0, "Multiple of xheight");
+extern double_VAR_H (textord_words_default_maxspace, 3.5,
+"Max believable third space");
+extern double_VAR_H (textord_words_default_minspace, 0.6,
+"Fraction of xheight");
+extern double_VAR_H (textord_words_min_minspace, 0.3, "Fraction of xheight");
+extern double_VAR_H (textord_words_default_nonspace, 0.2,
+"Fraction of xheight");
+extern double_VAR_H(textord_words_initial_lower, 0.25,
+ "Max initial cluster size");
+extern double_VAR_H (textord_words_initial_upper, 0.15,
+"Min initial cluster spacing");
+extern double_VAR_H (textord_words_minlarge, 0.75,
+"Fraction of valid gaps needed");
+extern double_VAR_H (textord_words_pitchsd_threshold, 0.025,
+"Pitch sync threshold");
+extern double_VAR_H (textord_words_def_fixed, 0.01,
+"Threshold for definite fixed");
+extern double_VAR_H (textord_words_def_prop, 0.06,
+"Threshold for definite prop");
+extern INT_VAR_H (textord_words_veto_power, 5,
+"Rows required to outvote a veto");
+extern double_VAR_H (textord_pitch_rowsimilarity, 0.08,
+"Fraction of xheight for sameness");
+extern BOOL_VAR_H (textord_pitch_scalebigwords, false,
+"Scale scores on big words");
+extern double_VAR_H(words_initial_lower, 0.5, "Max initial cluster size");
+extern double_VAR_H (words_initial_upper, 0.15,
+"Min initial cluster spacing");
+extern double_VAR_H (words_default_prop_nonspace, 0.25,
+"Fraction of xheight");
+extern double_VAR_H (words_default_fixed_space, 0.75, "Fraction of xheight");
+extern double_VAR_H (words_default_fixed_limit, 0.6, "Allowed size variance");
+extern double_VAR_H (textord_words_definite_spread, 0.30,
+"Non-fuzzy spacing region");
+extern double_VAR_H (textord_spacesize_ratiofp, 2.8,
+"Min ratio space/nonspace");
+extern double_VAR_H (textord_spacesize_ratioprop, 2.0,
+"Min ratio space/nonspace");
+extern double_VAR_H (textord_fpiqr_ratio, 1.5, "Pitch IQR/Gap IQR threshold");
+extern double_VAR_H (textord_max_pitch_iqr, 0.20,
+"Xh fraction noise in pitch");
+extern double_VAR_H (textord_fp_min_width, 0.5, "Min width of decent blobs");
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/textord/underlin.cpp b/tesseract/src/textord/underlin.cpp
new file mode 100644
index 00000000..6a732f27
--- /dev/null
+++ b/tesseract/src/textord/underlin.cpp
@@ -0,0 +1,278 @@
+/**********************************************************************
+ * File: underlin.cpp (Formerly undrline.c)
+ * Description: Code to chop blobs apart from underlines.
+ * Author: Ray Smith
+ *
+ * (C) Copyright 1994, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#include "underlin.h"
+
+namespace tesseract {
+
+double_VAR (textord_underline_offset, 0.1, "Fraction of x to ignore");
+BOOL_VAR (textord_restore_underlines, true, "Chop underlines & put back");
+
+/**********************************************************************
+ * restore_underlined_blobs
+ *
+ * Find underlined blobs and put them back in the row.
+ **********************************************************************/
+
+void restore_underlined_blobs( //get chop points
+ TO_BLOCK *block //block to do
+ ) {
+ int16_t chop_coord; //chop boundary
+ TBOX blob_box; //of underline
+ BLOBNBOX *u_line; //underline bit
+ TO_ROW *row; //best row for blob
+ ICOORDELT_LIST chop_cells; //blobs to cut out
+ //real underlines
+ BLOBNBOX_LIST residual_underlines;
+ C_OUTLINE_LIST left_coutlines;
+ C_OUTLINE_LIST right_coutlines;
+ ICOORDELT_IT cell_it = &chop_cells;
+ //under lines
+ BLOBNBOX_IT under_it = &block->underlines;
+ BLOBNBOX_IT ru_it = &residual_underlines;
+
+ if (block->get_rows()->empty())
+ return; // Don't crash if there are no rows.
+ for (under_it.mark_cycle_pt (); !under_it.cycled_list ();
+ under_it.forward ()) {
+ u_line = under_it.extract ();
+ blob_box = u_line->bounding_box ();
+ row = most_overlapping_row (block->get_rows (), u_line);
+ if (row == nullptr)
+ return; // Don't crash if there is no row.
+ find_underlined_blobs (u_line, &row->baseline, row->xheight,
+ row->xheight * textord_underline_offset,
+ &chop_cells);
+ cell_it.set_to_list (&chop_cells);
+ for (cell_it.mark_cycle_pt (); !cell_it.cycled_list ();
+ cell_it.forward ()) {
+ chop_coord = cell_it.data ()->x ();
+ if (cell_it.data ()->y () - chop_coord > textord_fp_chop_error + 1) {
+ split_to_blob (u_line, chop_coord,
+ textord_fp_chop_error + 0.5,
+ &left_coutlines,
+ &right_coutlines);
+ if (!left_coutlines.empty()) {
+ ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines)));
+ }
+ chop_coord = cell_it.data ()->y ();
+ split_to_blob(nullptr, chop_coord, textord_fp_chop_error + 0.5,
+ &left_coutlines, &right_coutlines);
+ if (!left_coutlines.empty()) {
+ row->insert_blob(new BLOBNBOX(new C_BLOB(&left_coutlines)));
+ }
+ u_line = nullptr; //no more blobs to add
+ }
+ delete cell_it.extract();
+ }
+ if (!right_coutlines.empty ()) {
+ split_to_blob(nullptr, blob_box.right(), textord_fp_chop_error + 0.5,
+ &left_coutlines, &right_coutlines);
+ if (!left_coutlines.empty())
+ ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines)));
+ }
+ if (u_line != nullptr) {
+ delete u_line->cblob();
+ delete u_line;
+ }
+ }
+ if (!ru_it.empty()) {
+ ru_it.move_to_first();
+ for (ru_it.mark_cycle_pt(); !ru_it.cycled_list(); ru_it.forward()) {
+ under_it.add_after_then_move(ru_it.extract());
+ }
+ }
+}
+
+
+/**********************************************************************
+ * most_overlapping_row
+ *
+ * Return the row which most overlaps the blob.
+ **********************************************************************/
+
+TO_ROW *most_overlapping_row( //find best row
+ TO_ROW_LIST *rows, //list of rows
+ BLOBNBOX *blob //blob to place
+ ) {
+ int16_t x = (blob->bounding_box ().left ()
+ + blob->bounding_box ().right ()) / 2;
+ TO_ROW_IT row_it = rows; //row iterator
+ TO_ROW *row; //current row
+ TO_ROW *best_row; //output row
+ float overlap; //of blob & row
+ float bestover; //best overlap
+
+ best_row = nullptr;
+ bestover = static_cast<float>(-INT32_MAX);
+ if (row_it.empty ())
+ return nullptr;
+ row = row_it.data ();
+ row_it.mark_cycle_pt ();
+ while (row->baseline.y (x) + row->descdrop > blob->bounding_box ().top ()
+ && !row_it.cycled_list ()) {
+ best_row = row;
+ bestover =
+ blob->bounding_box ().top () - row->baseline.y (x) + row->descdrop;
+ row_it.forward ();
+ row = row_it.data ();
+ }
+ while (row->baseline.y (x) + row->xheight + row->ascrise
+ >= blob->bounding_box ().bottom () && !row_it.cycled_list ()) {
+ overlap = row->baseline.y (x) + row->xheight + row->ascrise;
+ if (blob->bounding_box ().top () < overlap)
+ overlap = blob->bounding_box ().top ();
+ if (blob->bounding_box ().bottom () >
+ row->baseline.y (x) + row->descdrop)
+ overlap -= blob->bounding_box ().bottom ();
+ else
+ overlap -= row->baseline.y (x) + row->descdrop;
+ if (overlap > bestover) {
+ bestover = overlap;
+ best_row = row;
+ }
+ row_it.forward ();
+ row = row_it.data ();
+ }
+ if (bestover < 0
+ && row->baseline.y (x) + row->xheight + row->ascrise
+ - blob->bounding_box ().bottom () > bestover)
+ best_row = row;
+ return best_row;
+}
+
+
+/**********************************************************************
+ * find_underlined_blobs
+ *
+ * Find the start and end coords of blobs in the underline.
+ **********************************************************************/
+
+void find_underlined_blobs( //get chop points
+ BLOBNBOX *u_line, //underlined unit
+ QSPLINE *baseline, //actual baseline
+ float xheight, //height of line
+ float baseline_offset, //amount to shrinke it
+ ICOORDELT_LIST *chop_cells //places to chop
+ ) {
+ int16_t x, y; //sides of blob
+ ICOORD blob_chop; //sides of blob
+ TBOX blob_box = u_line->bounding_box ();
+ //cell iterator
+ ICOORDELT_IT cell_it = chop_cells;
+ STATS upper_proj (blob_box.left (), blob_box.right () + 1);
+ STATS middle_proj (blob_box.left (), blob_box.right () + 1);
+ STATS lower_proj (blob_box.left (), blob_box.right () + 1);
+ C_OUTLINE_IT out_it; //outlines of blob
+
+ ASSERT_HOST (u_line->cblob () != nullptr);
+
+ out_it.set_to_list (u_line->cblob ()->out_list ());
+ for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
+ vertical_cunderline_projection (out_it.data (),
+ baseline, xheight, baseline_offset,
+ &lower_proj, &middle_proj, &upper_proj);
+ }
+
+ for (x = blob_box.left (); x < blob_box.right (); x++) {
+ if (middle_proj.pile_count (x) > 0) {
+ for (y = x + 1;
+ y < blob_box.right () && middle_proj.pile_count (y) > 0; y++);
+ blob_chop = ICOORD (x, y);
+ cell_it.add_after_then_move (new ICOORDELT (blob_chop));
+ x = y;
+ }
+ }
+}
+
+
+/**********************************************************************
+ * vertical_cunderline_projection
+ *
+ * Compute the vertical projection of a outline from its outlines
+ * and add to the given STATS.
+ **********************************************************************/
+
+void vertical_cunderline_projection( //project outlines
+ C_OUTLINE *outline, //outline to project
+ QSPLINE *baseline, //actual baseline
+ float xheight, //height of line
+ float baseline_offset, //amount to shrinke it
+ STATS *lower_proj, //below baseline
+ STATS *middle_proj, //centre region
+ STATS *upper_proj //top region
+ ) {
+ ICOORD pos; //current point
+ ICOORD step; //edge step
+ int16_t lower_y, upper_y; //region limits
+ int32_t length; //of outline
+ int16_t stepindex; //current step
+ C_OUTLINE_IT out_it = outline->child ();
+
+ pos = outline->start_pos ();
+ length = outline->pathlength ();
+ for (stepindex = 0; stepindex < length; stepindex++) {
+ step = outline->step (stepindex);
+ if (step.x () > 0) {
+ lower_y =
+ static_cast<int16_t>(floor (baseline->y (pos.x ()) + baseline_offset + 0.5));
+ upper_y =
+ static_cast<int16_t>(floor (baseline->y (pos.x ()) + baseline_offset +
+ xheight + 0.5));
+ if (pos.y () >= lower_y) {
+ lower_proj->add (pos.x (), -lower_y);
+ if (pos.y () >= upper_y) {
+ middle_proj->add (pos.x (), lower_y - upper_y);
+ upper_proj->add (pos.x (), upper_y - pos.y ());
+ }
+ else
+ middle_proj->add (pos.x (), lower_y - pos.y ());
+ }
+ else
+ lower_proj->add (pos.x (), -pos.y ());
+ }
+ else if (step.x () < 0) {
+ lower_y =
+ static_cast<int16_t>(floor (baseline->y (pos.x () - 1) + baseline_offset +
+ 0.5));
+ upper_y =
+ static_cast<int16_t>(floor (baseline->y (pos.x () - 1) + baseline_offset +
+ xheight + 0.5));
+ if (pos.y () >= lower_y) {
+ lower_proj->add (pos.x () - 1, lower_y);
+ if (pos.y () >= upper_y) {
+ middle_proj->add (pos.x () - 1, upper_y - lower_y);
+ upper_proj->add (pos.x () - 1, pos.y () - upper_y);
+ }
+ else
+ middle_proj->add (pos.x () - 1, pos.y () - lower_y);
+ }
+ else
+ lower_proj->add (pos.x () - 1, pos.y ());
+ }
+ pos += step;
+ }
+
+ for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
+ vertical_cunderline_projection (out_it.data (),
+ baseline, xheight, baseline_offset,
+ lower_proj, middle_proj, upper_proj);
+ }
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/textord/underlin.h b/tesseract/src/textord/underlin.h
new file mode 100644
index 00000000..09be1b40
--- /dev/null
+++ b/tesseract/src/textord/underlin.h
@@ -0,0 +1,56 @@
+/**********************************************************************
+ * File: underlin.h (Formerly undrline.h)
+ * Description: Code to chop blobs apart from underlines.
+ * Author: Ray Smith
+ *
+ * (C) Copyright 1994, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef UNDERLIN_H
+#define UNDERLIN_H
+
+#include "fpchop.h"
+
+namespace tesseract {
+
+extern double_VAR_H (textord_underline_offset, 0.1,
+"Fraction of x to ignore");
+extern BOOL_VAR_H (textord_restore_underlines, false,
+"Chop underlines & put back");
+void restore_underlined_blobs( //get chop points
+ TO_BLOCK *block //block to do
+ );
+TO_ROW *most_overlapping_row( //find best row
+ TO_ROW_LIST *rows, //list of rows
+ BLOBNBOX *blob //blob to place
+ );
+void find_underlined_blobs( //get chop points
+ BLOBNBOX *u_line, //underlined unit
+ QSPLINE *baseline, //actual baseline
+ float xheight, //height of line
+ float baseline_offset, //amount to shrinke it
+ ICOORDELT_LIST *chop_cells //places to chop
+ );
+void vertical_cunderline_projection( //project outlines
+ C_OUTLINE *outline, //outline to project
+ QSPLINE *baseline, //actual baseline
+ float xheight, //height of line
+ float baseline_offset, //amount to shrinke it
+ STATS *lower_proj, //below baseline
+ STATS *middle_proj, //centre region
+ STATS *upper_proj //top region
+ );
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/textord/wordseg.cpp b/tesseract/src/textord/wordseg.cpp
new file mode 100644
index 00000000..d8b5516e
--- /dev/null
+++ b/tesseract/src/textord/wordseg.cpp
@@ -0,0 +1,625 @@
+/**********************************************************************
+ * File: wordseg.cpp (Formerly wspace.c)
+ * Description: Code to segment the blobs into words.
+ * Author: Ray Smith
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+ // Include automatically generated configuration file if running autoconf.
+#ifdef HAVE_CONFIG_H
+#include "config_auto.h"
+#endif
+
+#include "wordseg.h"
+
+#include "blobbox.h"
+#include "statistc.h"
+#include "drawtord.h"
+#include "makerow.h"
+#include "pitsync1.h"
+#include "tovars.h"
+#include "topitch.h"
+#include "cjkpitch.h"
+#include "textord.h"
+#include "fpchop.h"
+
+namespace tesseract {
+
+BOOL_VAR(textord_fp_chopping, true, "Do fixed pitch chopping");
+BOOL_VAR(textord_force_make_prop_words, false,
+ "Force proportional word segmentation on all rows");
+BOOL_VAR(textord_chopper_test, false,
+ "Chopper is being tested.");
+
+#define BLOCK_STATS_CLUSTERS 10
+
+
+/**
+ * @name make_single_word
+ *
+ * For each row, arrange the blobs into one word. There is no fixed
+ * pitch detection.
+ */
+
+void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST* real_rows) {
+ TO_ROW_IT to_row_it(rows);
+ ROW_IT row_it(real_rows);
+ for (to_row_it.mark_cycle_pt(); !to_row_it.cycled_list();
+ to_row_it.forward()) {
+ TO_ROW* row = to_row_it.data();
+ // The blobs have to come out of the BLOBNBOX into the C_BLOB_LIST ready
+ // to create the word.
+ C_BLOB_LIST cblobs;
+ C_BLOB_IT cblob_it(&cblobs);
+ BLOBNBOX_IT box_it(row->blob_list());
+ for (;!box_it.empty(); box_it.forward()) {
+ BLOBNBOX* bblob= box_it.extract();
+ if (bblob->joined_to_prev() || (one_blob && !cblob_it.empty())) {
+ if (bblob->cblob() != nullptr) {
+ C_OUTLINE_IT cout_it(cblob_it.data()->out_list());
+ cout_it.move_to_last();
+ cout_it.add_list_after(bblob->cblob()->out_list());
+ delete bblob->cblob();
+ }
+ } else {
+ if (bblob->cblob() != nullptr)
+ cblob_it.add_after_then_move(bblob->cblob());
+ }
+ delete bblob;
+ }
+ // Convert the TO_ROW to a ROW.
+ ROW* real_row = new ROW(row, static_cast<int16_t>(row->kern_size),
+ static_cast<int16_t>(row->space_size));
+ WERD_IT word_it(real_row->word_list());
+ WERD* word = new WERD(&cblobs, 0, nullptr);
+ word->set_flag(W_BOL, true);
+ word->set_flag(W_EOL, true);
+ word->set_flag(W_DONT_CHOP, one_blob);
+ word_it.add_after_then_move(word);
+ row_it.add_after_then_move(real_row);
+ }
+}
+
+/**
+ * make_words
+ *
+ * Arrange the blobs into words.
+ */
+void make_words(tesseract::Textord *textord,
+ ICOORD page_tr, // top right
+ float gradient, // page skew
+ BLOCK_LIST *blocks, // block list
+ TO_BLOCK_LIST *port_blocks) { // output list
+ TO_BLOCK_IT block_it; // iterator
+ TO_BLOCK *block; // current block
+
+ if (textord->use_cjk_fp_model()) {
+ compute_fixed_pitch_cjk(page_tr, port_blocks);
+ } else {
+ compute_fixed_pitch(page_tr, port_blocks, gradient, FCOORD(0.0f, -1.0f),
+ !bool(textord_test_landscape));
+ }
+ textord->to_spacing(page_tr, port_blocks);
+ block_it.set_to_list(port_blocks);
+ for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
+ block = block_it.data();
+ make_real_words(textord, block, FCOORD(1.0f, 0.0f));
+ }
+}
+
+
+/**
+ * @name set_row_spaces
+ *
+ * Set the min_space and max_nonspace members of the row so that
+ * the blobs can be arranged into words.
+ */
+
+void set_row_spaces( //find space sizes
+ TO_BLOCK* block, //block to do
+ FCOORD rotation, //for drawing
+ bool testing_on //correct orientation
+) {
+ TO_ROW *row; //current row
+ TO_ROW_IT row_it = block->get_rows ();
+
+ if (row_it.empty ())
+ return; //empty block
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+ row = row_it.data ();
+ if (row->fixed_pitch == 0) {
+ row->min_space =
+ static_cast<int32_t>(ceil (row->pr_space -
+ (row->pr_space -
+ row->pr_nonsp) * textord_words_definite_spread));
+ row->max_nonspace =
+ static_cast<int32_t>(floor (row->pr_nonsp +
+ (row->pr_space -
+ row->pr_nonsp) * textord_words_definite_spread));
+ if (testing_on && textord_show_initial_words) {
+ tprintf ("Assigning defaults %d non, %d space to row at %g\n",
+ row->max_nonspace, row->min_space, row->intercept ());
+ }
+ row->space_threshold = (row->max_nonspace + row->min_space) / 2;
+ row->space_size = row->pr_space;
+ row->kern_size = row->pr_nonsp;
+ }
+#ifndef GRAPHICS_DISABLED
+ if (textord_show_initial_words && testing_on) {
+ plot_word_decisions (to_win, static_cast<int16_t>(row->fixed_pitch), row);
+ }
+#endif
+ }
+}
+
+
+/**
+ * @name row_words
+ *
+ * Compute the max nonspace and min space for the row.
+ */
+
+int32_t row_words( //compute space size
+ TO_BLOCK* block, //block it came from
+ TO_ROW* row, //row to operate on
+ int32_t maxwidth, //max expected space size
+ FCOORD rotation, //for drawing
+ bool testing_on //for debug
+) {
+ bool testing_row; //contains testpt
+ bool prev_valid; //if decent size
+ int32_t prev_x; //end of prev blob
+ int32_t cluster_count; //no of clusters
+ int32_t gap_index; //which cluster
+ int32_t smooth_factor; //for smoothing stats
+ BLOBNBOX *blob; //current blob
+ float lower, upper; //clustering parameters
+ float gaps[3]; //gap clusers
+ ICOORD testpt;
+ TBOX blob_box; //bounding box
+ //iterator
+ BLOBNBOX_IT blob_it = row->blob_list ();
+ STATS gap_stats (0, maxwidth);
+ STATS cluster_stats[4]; //clusters
+
+ testpt = ICOORD (textord_test_x, textord_test_y);
+ smooth_factor =
+ static_cast<int32_t>(block->xheight * textord_wordstats_smooth_factor + 1.5);
+ // if (testing_on)
+ // tprintf("Row smooth factor=%d\n",smooth_factor);
+ prev_valid = false;
+ prev_x = -INT32_MAX;
+ testing_row = false;
+ for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
+ blob = blob_it.data ();
+ blob_box = blob->bounding_box ();
+ if (blob_box.contains (testpt))
+ testing_row = true;
+ gap_stats.add (blob_box.width (), 1);
+ }
+ gap_stats.clear ();
+ for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
+ blob = blob_it.data ();
+ if (!blob->joined_to_prev ()) {
+ blob_box = blob->bounding_box ();
+ if (prev_valid && blob_box.left () - prev_x < maxwidth) {
+ gap_stats.add (blob_box.left () - prev_x, 1);
+ }
+ prev_valid = true;
+ prev_x = blob_box.right ();
+ }
+ }
+ if (gap_stats.get_total () == 0) {
+ row->min_space = 0; //no evidence
+ row->max_nonspace = 0;
+ return 0;
+ }
+ gap_stats.smooth (smooth_factor);
+ lower = row->xheight * textord_words_initial_lower;
+ upper = row->xheight * textord_words_initial_upper;
+ cluster_count = gap_stats.cluster (lower, upper,
+ textord_spacesize_ratioprop, 3,
+ cluster_stats);
+ while (cluster_count < 2 && ceil (lower) < floor (upper)) {
+ //shrink gap
+ upper = (upper * 3 + lower) / 4;
+ lower = (lower * 3 + upper) / 4;
+ cluster_count = gap_stats.cluster (lower, upper,
+ textord_spacesize_ratioprop, 3,
+ cluster_stats);
+ }
+ if (cluster_count < 2) {
+ row->min_space = 0; //no evidence
+ row->max_nonspace = 0;
+ return 0;
+ }
+ for (gap_index = 0; gap_index < cluster_count; gap_index++)
+ gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
+ //get medians
+ if (cluster_count > 2) {
+ if (testing_on && textord_show_initial_words) {
+ tprintf ("Row at %g has 3 sizes of gap:%g,%g,%g\n",
+ row->intercept (),
+ cluster_stats[1].ile (0.5),
+ cluster_stats[2].ile (0.5), cluster_stats[3].ile (0.5));
+ }
+ lower = gaps[0];
+ if (gaps[1] > lower) {
+ upper = gaps[1]; //prefer most frequent
+ if (upper < block->xheight * textord_words_min_minspace
+ && gaps[2] > gaps[1]) {
+ upper = gaps[2];
+ }
+ }
+ else if (gaps[2] > lower
+ && gaps[2] >= block->xheight * textord_words_min_minspace)
+ upper = gaps[2];
+ else if (lower >= block->xheight * textord_words_min_minspace) {
+ upper = lower; //not nice
+ lower = gaps[1];
+ if (testing_on && textord_show_initial_words) {
+ tprintf ("Had to switch most common from lower to upper!!\n");
+ gap_stats.print();
+ }
+ }
+ else {
+ row->min_space = 0; //no evidence
+ row->max_nonspace = 0;
+ return 0;
+ }
+ }
+ else {
+ if (gaps[1] < gaps[0]) {
+ if (testing_on && textord_show_initial_words) {
+ tprintf ("Had to switch most common from lower to upper!!\n");
+ gap_stats.print();
+ }
+ lower = gaps[1];
+ upper = gaps[0];
+ }
+ else {
+ upper = gaps[1];
+ lower = gaps[0];
+ }
+ }
+ if (upper < block->xheight * textord_words_min_minspace) {
+ row->min_space = 0; //no evidence
+ row->max_nonspace = 0;
+ return 0;
+ }
+ if (upper * 3 < block->min_space * 2 + block->max_nonspace
+ || lower * 3 > block->min_space * 2 + block->max_nonspace) {
+ if (testing_on && textord_show_initial_words) {
+ tprintf ("Disagreement between block and row at %g!!\n",
+ row->intercept ());
+ tprintf ("Lower=%g, upper=%g, Stats:\n", lower, upper);
+ gap_stats.print();
+ }
+ }
+ row->min_space =
+ static_cast<int32_t>(ceil (upper - (upper - lower) * textord_words_definite_spread));
+ row->max_nonspace =
+ static_cast<int32_t>(floor (lower + (upper - lower) * textord_words_definite_spread));
+ row->space_threshold = (row->max_nonspace + row->min_space) / 2;
+ row->space_size = upper;
+ row->kern_size = lower;
+ if (testing_on && textord_show_initial_words) {
+ if (testing_row) {
+ tprintf ("GAP STATS\n");
+ gap_stats.print();
+ tprintf ("SPACE stats\n");
+ cluster_stats[2].print_summary();
+ tprintf ("NONSPACE stats\n");
+ cluster_stats[1].print_summary();
+ }
+ tprintf ("Row at %g has minspace=%d(%g), max_non=%d(%g)\n",
+ row->intercept (), row->min_space, upper,
+ row->max_nonspace, lower);
+ }
+ return cluster_stats[2].get_total ();
+}
+
+
+/**
+ * @name row_words2
+ *
+ * Compute the max nonspace and min space for the row.
+ */
+
+int32_t row_words2( //compute space size
+ TO_BLOCK* block, //block it came from
+ TO_ROW* row, //row to operate on
+ int32_t maxwidth, //max expected space size
+ FCOORD rotation, //for drawing
+ bool testing_on //for debug
+) {
+ bool prev_valid; //if decent size
+ bool this_valid; //current blob big enough
+ int32_t prev_x; //end of prev blob
+ int32_t min_width; //min interesting width
+ int32_t valid_count; //good gaps
+ int32_t total_count; //total gaps
+ int32_t cluster_count; //no of clusters
+ int32_t prev_count; //previous cluster_count
+ int32_t gap_index; //which cluster
+ int32_t smooth_factor; //for smoothing stats
+ BLOBNBOX *blob; //current blob
+ float lower, upper; //clustering parameters
+ ICOORD testpt;
+ TBOX blob_box; //bounding box
+ //iterator
+ BLOBNBOX_IT blob_it = row->blob_list ();
+ STATS gap_stats (0, maxwidth);
+ //gap sizes
+ float gaps[BLOCK_STATS_CLUSTERS];
+ STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1];
+ //clusters
+
+ testpt = ICOORD (textord_test_x, textord_test_y);
+ smooth_factor =
+ static_cast<int32_t>(block->xheight * textord_wordstats_smooth_factor + 1.5);
+ // if (testing_on)
+ // tprintf("Row smooth factor=%d\n",smooth_factor);
+ prev_valid = false;
+ prev_x = -INT16_MAX;
+ const bool testing_row = false;
+ //min blob size
+ min_width = static_cast<int32_t>(block->pr_space);
+ total_count = 0;
+ for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
+ blob = blob_it.data ();
+ if (!blob->joined_to_prev ()) {
+ blob_box = blob->bounding_box ();
+ this_valid = blob_box.width () >= min_width;
+ if (this_valid && prev_valid
+ && blob_box.left () - prev_x < maxwidth) {
+ gap_stats.add (blob_box.left () - prev_x, 1);
+ }
+ total_count++; //count possibles
+ prev_x = blob_box.right ();
+ prev_valid = this_valid;
+ }
+ }
+ valid_count = gap_stats.get_total ();
+ if (valid_count < total_count * textord_words_minlarge) {
+ gap_stats.clear ();
+ prev_x = -INT16_MAX;
+ for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
+ blob_it.forward ()) {
+ blob = blob_it.data ();
+ if (!blob->joined_to_prev ()) {
+ blob_box = blob->bounding_box ();
+ if (blob_box.left () - prev_x < maxwidth) {
+ gap_stats.add (blob_box.left () - prev_x, 1);
+ }
+ prev_x = blob_box.right ();
+ }
+ }
+ }
+ if (gap_stats.get_total () == 0) {
+ row->min_space = 0; //no evidence
+ row->max_nonspace = 0;
+ return 0;
+ }
+
+ cluster_count = 0;
+ lower = block->xheight * words_initial_lower;
+ upper = block->xheight * words_initial_upper;
+ gap_stats.smooth (smooth_factor);
+ do {
+ prev_count = cluster_count;
+ cluster_count = gap_stats.cluster (lower, upper,
+ textord_spacesize_ratioprop,
+ BLOCK_STATS_CLUSTERS, cluster_stats);
+ }
+ while (cluster_count > prev_count && cluster_count < BLOCK_STATS_CLUSTERS);
+ if (cluster_count < 1) {
+ row->min_space = 0;
+ row->max_nonspace = 0;
+ return 0;
+ }
+ for (gap_index = 0; gap_index < cluster_count; gap_index++)
+ gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
+ //get medians
+ if (testing_on) {
+ tprintf ("cluster_count=%d:", cluster_count);
+ for (gap_index = 0; gap_index < cluster_count; gap_index++)
+ tprintf (" %g(%d)", gaps[gap_index],
+ cluster_stats[gap_index + 1].get_total ());
+ tprintf ("\n");
+ }
+
+ //Try to find proportional non-space and space for row.
+ for (gap_index = 0; gap_index < cluster_count
+ && gaps[gap_index] > block->max_nonspace; gap_index++);
+ if (gap_index < cluster_count)
+ lower = gaps[gap_index]; //most frequent below
+ else {
+ if (testing_on)
+ tprintf ("No cluster below block threshold!, using default=%g\n",
+ block->pr_nonsp);
+ lower = block->pr_nonsp;
+ }
+ for (gap_index = 0; gap_index < cluster_count
+ && gaps[gap_index] <= block->max_nonspace; gap_index++);
+ if (gap_index < cluster_count)
+ upper = gaps[gap_index]; //most frequent above
+ else {
+ if (testing_on)
+ tprintf ("No cluster above block threshold!, using default=%g\n",
+ block->pr_space);
+ upper = block->pr_space;
+ }
+ row->min_space =
+ static_cast<int32_t>(ceil (upper - (upper - lower) * textord_words_definite_spread));
+ row->max_nonspace =
+ static_cast<int32_t>(floor (lower + (upper - lower) * textord_words_definite_spread));
+ row->space_threshold = (row->max_nonspace + row->min_space) / 2;
+ row->space_size = upper;
+ row->kern_size = lower;
+ if (testing_on) {
+ if (testing_row) {
+ tprintf ("GAP STATS\n");
+ gap_stats.print();
+ tprintf ("SPACE stats\n");
+ cluster_stats[2].print_summary();
+ tprintf ("NONSPACE stats\n");
+ cluster_stats[1].print_summary();
+ }
+ tprintf ("Row at %g has minspace=%d(%g), max_non=%d(%g)\n",
+ row->intercept (), row->min_space, upper,
+ row->max_nonspace, lower);
+ }
+ return 1;
+}
+
+
+/**
+ * @name make_real_words
+ *
+ * Convert a TO_BLOCK to a BLOCK.
+ */
+
+void make_real_words(
+ tesseract::Textord *textord,
+ TO_BLOCK *block, //block to do
+ FCOORD rotation //for drawing
+ ) {
+ TO_ROW *row; //current row
+ TO_ROW_IT row_it = block->get_rows ();
+ ROW *real_row = nullptr; //output row
+ ROW_IT real_row_it = block->block->row_list ();
+
+ if (row_it.empty ())
+ return; //empty block
+ for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
+ row = row_it.data ();
+ if (row->blob_list ()->empty () && !row->rep_words.empty ()) {
+ real_row = make_rep_words (row, block);
+ } else if (!row->blob_list()->empty()) {
+ // In a fixed pitch document, some lines may be detected as fixed pitch
+ // while others don't, and will go through different path.
+ // For non-space delimited language like CJK, fixed pitch chop always
+ // leave the entire line as one word. We can force consistent chopping
+ // with force_make_prop_words flag.
+ POLY_BLOCK* pb = block->block->pdblk.poly_block();
+ if (textord_chopper_test) {
+ real_row = textord->make_blob_words (row, rotation);
+ } else if (textord_force_make_prop_words ||
+ (pb != nullptr && !pb->IsText()) ||
+ row->pitch_decision == PITCH_DEF_PROP ||
+ row->pitch_decision == PITCH_CORR_PROP) {
+ real_row = textord->make_prop_words (row, rotation);
+ } else if (row->pitch_decision == PITCH_DEF_FIXED ||
+ row->pitch_decision == PITCH_CORR_FIXED) {
+ real_row = fixed_pitch_words (row, rotation);
+ } else {
+ ASSERT_HOST(false);
+ }
+ }
+ if (real_row != nullptr) {
+ //put row in block
+ real_row_it.add_after_then_move (real_row);
+ }
+ }
+ block->block->set_stats (block->fixed_pitch == 0, static_cast<int16_t>(block->kern_size),
+ static_cast<int16_t>(block->space_size),
+ static_cast<int16_t>(block->fixed_pitch));
+ block->block->check_pitch ();
+}
+
+
+/**
+ * @name make_rep_words
+ *
+ * Fabricate a real row from only the repeated blob words.
+ * Get the xheight from the block as it may be more meaningful.
+ */
+
+ROW *make_rep_words( //make a row
+ TO_ROW *row, //row to convert
+ TO_BLOCK *block //block it lives in
+ ) {
+ ROW *real_row; //output row
+ TBOX word_box; //bounding box
+ //iterator
+ WERD_IT word_it = &row->rep_words;
+
+ if (word_it.empty ())
+ return nullptr;
+ word_box = word_it.data ()->bounding_box ();
+ for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ())
+ word_box += word_it.data ()->bounding_box ();
+ row->xheight = block->xheight;
+ real_row = new ROW(row,
+ static_cast<int16_t>(block->kern_size), static_cast<int16_t>(block->space_size));
+ word_it.set_to_list (real_row->word_list ());
+ //put words in row
+ word_it.add_list_after (&row->rep_words);
+ real_row->recalc_bounding_box ();
+ return real_row;
+}
+
+
+/**
+ * @name make_real_word
+ *
+ * Construct a WERD from a given number of adjacent entries in a
+ * list of BLOBNBOXs.
+ */
+
+WERD *make_real_word(BLOBNBOX_IT *box_it, //iterator
+ int32_t blobcount, //no of blobs to use
+ bool bol, //start of line
+ uint8_t blanks //no of blanks
+ ) {
+ C_OUTLINE_IT cout_it;
+ C_BLOB_LIST cblobs;
+ C_BLOB_IT cblob_it = &cblobs;
+ WERD *word; // new word
+ BLOBNBOX *bblob; // current blob
+ int32_t blobindex; // in row
+
+ for (blobindex = 0; blobindex < blobcount; blobindex++) {
+ bblob = box_it->extract();
+ if (bblob->joined_to_prev()) {
+ if (bblob->cblob() != nullptr) {
+ cout_it.set_to_list(cblob_it.data()->out_list());
+ cout_it.move_to_last();
+ cout_it.add_list_after(bblob->cblob()->out_list());
+ delete bblob->cblob();
+ }
+ }
+ else {
+ if (bblob->cblob() != nullptr)
+ cblob_it.add_after_then_move(bblob->cblob());
+ }
+ delete bblob;
+ box_it->forward(); // next one
+ }
+
+ if (blanks < 1)
+ blanks = 1;
+
+ word = new WERD(&cblobs, blanks, nullptr);
+
+ if (bol)
+ word->set_flag(W_BOL, true);
+ if (box_it->at_first())
+ word->set_flag(W_EOL, true); // at end of line
+
+ return word;
+}
+
+} // namespace tesseract
diff --git a/tesseract/src/textord/wordseg.h b/tesseract/src/textord/wordseg.h
new file mode 100644
index 00000000..88e9cfdc
--- /dev/null
+++ b/tesseract/src/textord/wordseg.h
@@ -0,0 +1,78 @@
+/**********************************************************************
+ * File: wordseg.h (Formerly wspace.h)
+ * Description: Code to segment the blobs into words.
+ * Author: Ray Smith
+ *
+ * (C) Copyright 1992, Hewlett-Packard Ltd.
+ ** Licensed under the Apache License, Version 2.0 (the "License");
+ ** you may not use this file except in compliance with the License.
+ ** You may obtain a copy of the License at
+ ** http://www.apache.org/licenses/LICENSE-2.0
+ ** Unless required by applicable law or agreed to in writing, software
+ ** distributed under the License is distributed on an "AS IS" BASIS,
+ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ ** See the License for the specific language governing permissions and
+ ** limitations under the License.
+ *
+ **********************************************************************/
+
+#ifndef WORDSEG_H
+#define WORDSEG_H
+
+#include "params.h"
+#include "blobbox.h"
+#include "textord.h"
+
+namespace tesseract {
+class Tesseract;
+
+extern BOOL_VAR_H (textord_fp_chopping, true, "Do fixed pitch chopping");
+extern BOOL_VAR_H(textord_force_make_prop_words, false,
+ "Force proportional word segmentation on all rows");
+extern BOOL_VAR_H (textord_chopper_test, false,
+ "Chopper is being tested.");
+
+void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST* real_rows);
+void make_words(tesseract::Textord *textord,
+ ICOORD page_tr, // top right
+ float gradient, // page skew
+ BLOCK_LIST *blocks, // block list
+ TO_BLOCK_LIST *port_blocks); // output list
+void set_row_spaces( //find space sizes
+ TO_BLOCK* block, //block to do
+ FCOORD rotation, //for drawing
+ bool testing_on //correct orientation
+);
+int32_t row_words( //compute space size
+ TO_BLOCK* block, //block it came from
+ TO_ROW* row, //row to operate on
+ int32_t maxwidth, //max expected space size
+ FCOORD rotation, //for drawing
+ bool testing_on //for debug
+);
+int32_t row_words2( //compute space size
+ TO_BLOCK* block, //block it came from
+ TO_ROW* row, //row to operate on
+ int32_t maxwidth, //max expected space size
+ FCOORD rotation, //for drawing
+ bool testing_on //for debug
+);
+void make_real_words(
+ tesseract::Textord *textord,
+ TO_BLOCK *block, //block to do
+ FCOORD rotation //for drawing
+ );
+ROW *make_rep_words( //make a row
+ TO_ROW *row, //row to convert
+ TO_BLOCK *block //block it lives in
+ );
+WERD *make_real_word( //make a WERD
+ BLOBNBOX_IT* box_it, //iterator
+ int32_t blobcount, //no of blobs to use
+ bool bol, //start of line
+ uint8_t blanks //no of blanks
+);
+
+} // namespace tesseract
+
+#endif
diff --git a/tesseract/src/textord/workingpartset.cpp b/tesseract/src/textord/workingpartset.cpp
new file mode 100644
index 00000000..97ce70ae
--- /dev/null
+++ b/tesseract/src/textord/workingpartset.cpp
@@ -0,0 +1,144 @@
+///////////////////////////////////////////////////////////////////////
+// File: workingpartset.cpp
+// Description: Class to hold a working set of partitions of the page
+// during construction of text/image regions.
+// Author: Ray Smith
+// Created: Tue Ocr 28 17:21:01 PDT 2008
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#include "workingpartset.h"
+#include "colpartition.h"
+
+namespace tesseract {
+
+ELISTIZE(WorkingPartSet)
+
+// Add the partition to this WorkingPartSet. Unrelated partitions are
+// stored in the order in which they are received, but if the partition
+// has a SingletonPartner, make sure that it stays with its partner.
+void WorkingPartSet::AddPartition(ColPartition* part) {
+ ColPartition* partner = part->SingletonPartner(true);
+ if (partner != nullptr) {
+ ASSERT_HOST(partner->SingletonPartner(false) == part);
+ }
+ if (latest_part_ == nullptr || partner == nullptr) {
+ // This partition goes at the end of the list
+ part_it_.move_to_last();
+ } else if (latest_part_->SingletonPartner(false) != part) {
+ // Reposition the iterator to the correct partner, or at the end.
+ for (part_it_.move_to_first(); !part_it_.at_last() &&
+ part_it_.data() != partner;
+ part_it_.forward());
+ }
+ part_it_.add_after_then_move(part);
+ latest_part_ = part;
+}
+
+// Make blocks out of any partitions in this WorkingPartSet, and append
+// them to the end of the blocks list. bleft, tright and resolution give
+// the bounds and resolution of the source image, so that blocks can be
+// made to fit in the bounds.
+// All ColPartitions go in the used_parts list, as they need to be kept
+// around, but are no longer needed.
+void WorkingPartSet::ExtractCompletedBlocks(const ICOORD& bleft,
+ const ICOORD& tright,
+ int resolution,
+ ColPartition_LIST* used_parts,
+ BLOCK_LIST* blocks,
+ TO_BLOCK_LIST* to_blocks) {
+ MakeBlocks(bleft, tright, resolution, used_parts);
+ BLOCK_IT block_it(blocks);
+ block_it.move_to_last();
+ block_it.add_list_after(&completed_blocks_);
+ TO_BLOCK_IT to_block_it(to_blocks);
+ to_block_it.move_to_last();
+ to_block_it.add_list_after(&to_blocks_);
+}
+
+// Insert the given blocks at the front of the completed_blocks_ list so
+// they can be kept in the correct reading order.
+void WorkingPartSet::InsertCompletedBlocks(BLOCK_LIST* blocks,
+ TO_BLOCK_LIST* to_blocks) {
+ BLOCK_IT block_it(&completed_blocks_);
+ block_it.add_list_before(blocks);
+ TO_BLOCK_IT to_block_it(&to_blocks_);
+ to_block_it.add_list_before(to_blocks);
+}
+
+// Make a block using lines parallel to the given vector that fit between
+// the min and max coordinates specified by the ColPartitions.
+// Construct a block from the given list of partitions.
+void WorkingPartSet::MakeBlocks(const ICOORD& bleft, const ICOORD& tright,
+ int resolution, ColPartition_LIST* used_parts) {
+ part_it_.move_to_first();
+ while (!part_it_.empty()) {
+ // Gather a list of ColPartitions in block_parts that will be split
+ // by linespacing into smaller blocks.
+ ColPartition_LIST block_parts;
+ ColPartition_IT block_it(&block_parts);
+ ColPartition* next_part = nullptr;
+ bool text_block = false;
+ do {
+ ColPartition* part = part_it_.extract();
+ if (part->blob_type() == BRT_UNKNOWN ||
+ (part->IsTextType() && part->type() != PT_TABLE))
+ text_block = true;
+ part->set_working_set(nullptr);
+ part_it_.forward();
+ block_it.add_after_then_move(part);
+ next_part = part->SingletonPartner(false);
+ if (part_it_.empty() || next_part != part_it_.data()) {
+ // Sequences of partitions can get split by titles.
+ next_part = nullptr;
+ }
+ // Merge adjacent blocks that are of the same type and let the
+ // linespacing determine the real boundaries.
+ if (next_part == nullptr && !part_it_.empty()) {
+ ColPartition* next_block_part = part_it_.data();
+ const TBOX& part_box = part->bounding_box();
+ const TBOX& next_box = next_block_part->bounding_box();
+
+ // In addition to the same type, the next box must not be above the
+ // current box, nor (if image) too far below.
+ PolyBlockType type = part->type(), next_type = next_block_part->type();
+ if (ColPartition::TypesSimilar(type, next_type) &&
+ !part->IsLineType() && !next_block_part->IsLineType() &&
+ next_box.bottom() <= part_box.top() &&
+ (text_block || part_box.bottom() <= next_box.top()))
+ next_part = next_block_part;
+ }
+ } while (!part_it_.empty() && next_part != nullptr);
+ if (!text_block) {
+ TO_BLOCK* to_block = ColPartition::MakeBlock(bleft, tright,
+ &block_parts, used_parts);
+ if (to_block != nullptr) {
+ TO_BLOCK_IT to_block_it(&to_blocks_);
+ to_block_it.add_to_end(to_block);
+ BLOCK_IT block_it(&completed_blocks_);
+ block_it.add_to_end(to_block->block);
+ }
+ } else {
+ // Further sub-divide text blocks where linespacing changes.
+ ColPartition::LineSpacingBlocks(bleft, tright, resolution, &block_parts,
+ used_parts,
+ &completed_blocks_, &to_blocks_);
+ }
+ }
+ part_it_.set_to_list(&part_set_);
+ latest_part_ = nullptr;
+ ASSERT_HOST(completed_blocks_.length() == to_blocks_.length());
+}
+
+} // namespace tesseract.
diff --git a/tesseract/src/textord/workingpartset.h b/tesseract/src/textord/workingpartset.h
new file mode 100644
index 00000000..6fb342aa
--- /dev/null
+++ b/tesseract/src/textord/workingpartset.h
@@ -0,0 +1,88 @@
+///////////////////////////////////////////////////////////////////////
+// File: workingpartset.h
+// Description: Class to hold a working set of partitions of the page
+// during construction of text/image regions.
+// Author: Ray Smith
+// Created: Tue Ocr 28 17:21:01 PDT 2008
+//
+// (C) Copyright 2008, Google Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//
+///////////////////////////////////////////////////////////////////////
+
+#ifndef TESSERACT_TEXTORD_WORKINGPARSET_H_
+#define TESSERACT_TEXTORD_WORKINGPARSET_H_
+
+#include "blobbox.h" // For TO_BLOCK_LIST and BLOCK_LIST.
+#include "colpartition.h" // For ColPartition_LIST.
+
+namespace tesseract {
+
+// WorkingPartSet holds a working set of ColPartitions during transformation
+// from the grid-based storage to regions in logical reading order, and is
+// therefore only used during construction of the regions.
+class WorkingPartSet : public ELIST_LINK {
+ public:
+ explicit WorkingPartSet(ColPartition* column)
+ : column_(column), latest_part_(nullptr), part_it_(&part_set_) {
+ }
+
+ // Simple accessors.
+ ColPartition* column() const {
+ return column_;
+ }
+ void set_column(ColPartition* col) {
+ column_ = col;
+ }
+
+ // Add the partition to this WorkingPartSet. Partitions are generally
+ // stored in the order in which they are received, but if the partition
+ // has a SingletonPartner, make sure that it stays with its partner.
+ void AddPartition(ColPartition* part);
+
+ // Make blocks out of any partitions in this WorkingPartSet, and append
+ // them to the end of the blocks list. bleft, tright and resolution give
+ // the bounds and resolution of the source image, so that blocks can be
+ // made to fit in the bounds.
+ // All ColPartitions go in the used_parts list, as they need to be kept
+ // around, but are no longer needed.
+ void ExtractCompletedBlocks(const ICOORD& bleft, const ICOORD& tright,
+ int resolution, ColPartition_LIST* used_parts,
+ BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks);
+
+ // Insert the given blocks at the front of the completed_blocks_ list so
+ // they can be kept in the correct reading order.
+ void InsertCompletedBlocks(BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks);
+
+ private:
+ // Convert the part_set_ into blocks, starting a new block at a break
+ // in partnerships, or a change in linespacing (for text).
+ void MakeBlocks(const ICOORD& bleft, const ICOORD& tright, int resolution,
+ ColPartition_LIST* used_parts);
+
+ // The column that this working set applies to. Used by the caller.
+ ColPartition* column_;
+ // The most recently added partition.
+ ColPartition* latest_part_;
+ // All the partitions in the block that is currently under construction.
+ ColPartition_LIST part_set_;
+ // Iteratorn on part_set_ pointing to the most recent addition.
+ ColPartition_IT part_it_;
+ // The blocks that have been made so far and belong before the current block.
+ BLOCK_LIST completed_blocks_;
+ TO_BLOCK_LIST to_blocks_;
+};
+
+ELISTIZEH(WorkingPartSet)
+
+} // namespace tesseract.
+
+#endif // TESSERACT_TEXTORD_WORKINGPARSET_H_