diff options
Diffstat (limited to 'tesseract/src/textord')
79 files changed, 43799 insertions, 0 deletions
diff --git a/tesseract/src/textord/alignedblob.cpp b/tesseract/src/textord/alignedblob.cpp new file mode 100644 index 00000000..4c17584b --- /dev/null +++ b/tesseract/src/textord/alignedblob.cpp @@ -0,0 +1,535 @@ +/////////////////////////////////////////////////////////////////////// +// File: alignedblob.cpp +// Description: Subclass of BBGrid to find vertically aligned blobs. +// Author: Ray Smith +// +// (C) Copyright 2008, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#include "alignedblob.h" + +#include <algorithm> + +namespace tesseract { + +INT_VAR(textord_debug_tabfind, 0, "Debug tab finding"); +INT_VAR(textord_debug_bugs, 0, "Turn on output related to bugs in tab finding"); +static INT_VAR(textord_testregion_left, -1, "Left edge of debug reporting rectangle in Leptonica coords (bottom=0/top=height), with horizontal lines x/y-flipped"); +static INT_VAR(textord_testregion_top, INT32_MAX, "Top edge of debug reporting rectangle in Leptonica coords (bottom=0/top=height), with horizontal lines x/y-flipped"); +static INT_VAR(textord_testregion_right, INT32_MAX, "Right edge of debug rectangle in Leptonica coords (bottom=0/top=height), with horizontal lines x/y-flipped"); +static INT_VAR(textord_testregion_bottom, -1, "Bottom edge of debug rectangle in Leptonica coords (bottom=0/top=height), with horizontal lines x/y-flipped"); +BOOL_VAR(textord_debug_printable, false, "Make debug windows printable"); + +// Fraction of resolution used as alignment tolerance for aligned tabs. +const double kAlignedFraction = 0.03125; +// Fraction of resolution used as alignment tolerance for ragged tabs. +const double kRaggedFraction = 2.5; +// Fraction of height used as a minimum gutter gap for aligned blobs. +const double kAlignedGapFraction = 0.75; +// Fraction of height used as a minimum gutter gap for ragged tabs. +const double kRaggedGapFraction = 1.0; +// Constant number of pixels used as alignment tolerance for line finding. +const int kVLineAlignment = 3; +// Constant number of pixels used as gutter gap tolerance for line finding. +const int kVLineGutter = 1; +// Constant number of pixels used as the search size for line finding. +const int kVLineSearchSize = 150; +// Min number of points to accept for a ragged tab stop. +const int kMinRaggedTabs = 5; +// Min number of points to accept for an aligned tab stop. +const int kMinAlignedTabs = 4; +// Constant number of pixels minimum height of a vertical line. +const int kVLineMinLength = 300; +// Minimum gradient for a vertical tab vector. Used to prune away junk +// tab vectors with what would be a ridiculously large skew angle. +// Value corresponds to tan(90 - max allowed skew angle) +const double kMinTabGradient = 4.0; +// Tolerance to skew on top of current estimate of skew. Divide x or y length +// by kMaxSkewFactor to get the y or x skew distance. +// If the angle is small, the angle in degrees is roughly 60/kMaxSkewFactor. +const int kMaxSkewFactor = 15; + +// Constructor to set the parameters for finding aligned and ragged tabs. +// Vertical_x and vertical_y are the current estimates of the true vertical +// direction (up) in the image. Height is the height of the starter blob. +// v_gap_multiple is the multiple of height that will be used as a limit +// on vertical gap before giving up and calling the line ended. +// resolution is the original image resolution, and align0 indicates the +// type of tab stop to be found. +AlignedBlobParams::AlignedBlobParams(int vertical_x, int vertical_y, + int height, int v_gap_multiple, + int min_gutter_width, + int resolution, TabAlignment align0) + : right_tab(align0 == TA_RIGHT_RAGGED || align0 == TA_RIGHT_ALIGNED), + ragged(align0 == TA_LEFT_RAGGED || align0 == TA_RIGHT_RAGGED), + alignment(align0), + confirmed_type(TT_CONFIRMED), + min_length(0) { + // Set the tolerances according to the type of line sought. + // For tab search, these are based on the image resolution for most, or + // the height of the starting blob for the maximum vertical gap. + max_v_gap = height * v_gap_multiple; + if (ragged) { + // In the case of a ragged edge, we are much more generous with the + // inside alignment fraction, but also require a much bigger gutter. + gutter_fraction = kRaggedGapFraction; + if (alignment == TA_RIGHT_RAGGED) { + l_align_tolerance = static_cast<int>(resolution * kRaggedFraction + 0.5); + r_align_tolerance = static_cast<int>(resolution * kAlignedFraction + 0.5); + } else { + l_align_tolerance = static_cast<int>(resolution * kAlignedFraction + 0.5); + r_align_tolerance = static_cast<int>(resolution * kRaggedFraction + 0.5); + } + min_points = kMinRaggedTabs; + } else { + gutter_fraction = kAlignedGapFraction; + l_align_tolerance = static_cast<int>(resolution * kAlignedFraction + 0.5); + r_align_tolerance = static_cast<int>(resolution * kAlignedFraction + 0.5); + min_points = kMinAlignedTabs; + } + min_gutter = static_cast<int>(height * gutter_fraction + 0.5); + if (min_gutter < min_gutter_width) + min_gutter = min_gutter_width; + // Fit the vertical vector into an ICOORD, which is 16 bit. + set_vertical(vertical_x, vertical_y); +} + +// Constructor to set the parameters for finding vertical lines. +// Vertical_x and vertical_y are the current estimates of the true vertical +// direction (up) in the image. Width is the width of the starter blob. +AlignedBlobParams::AlignedBlobParams(int vertical_x, int vertical_y, + int width) + : gutter_fraction(0.0), + right_tab(false), + ragged(false), + alignment(TA_SEPARATOR), + confirmed_type(TT_VLINE), + max_v_gap(kVLineSearchSize), + min_gutter(kVLineGutter), + min_points(1), + min_length(kVLineMinLength) { + // Compute threshold for left and right alignment. + l_align_tolerance = std::max(kVLineAlignment, width); + r_align_tolerance = std::max(kVLineAlignment, width); + + // Fit the vertical vector into an ICOORD, which is 16 bit. + set_vertical(vertical_x, vertical_y); +} + +// Fit the vertical vector into an ICOORD, which is 16 bit. +void AlignedBlobParams::set_vertical(int vertical_x, int vertical_y) { + int factor = 1; + if (vertical_y > INT16_MAX) + factor = vertical_y / INT16_MAX + 1; + vertical.set_x(vertical_x / factor); + vertical.set_y(vertical_y / factor); +} + + +AlignedBlob::AlignedBlob(int gridsize, + const ICOORD& bleft, const ICOORD& tright) + : BlobGrid(gridsize, bleft, tright) { +} + +// Return true if the given coordinates are within the test rectangle +// and the debug level is at least the given detail level. +bool AlignedBlob::WithinTestRegion(int detail_level, int x, int y) { + if (textord_debug_tabfind < detail_level) + return false; + return x >= textord_testregion_left && x <= textord_testregion_right && + y <= textord_testregion_top && y >= textord_testregion_bottom; +} + +#ifndef GRAPHICS_DISABLED + +// Display the tab codes of the BLOBNBOXes in this grid. +ScrollView* AlignedBlob::DisplayTabs(const char* window_name, + ScrollView* tab_win) { + if (tab_win == nullptr) + tab_win = MakeWindow(0, 50, window_name); + // For every tab in the grid, display it. + GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> gsearch(this); + gsearch.StartFullSearch(); + BLOBNBOX* bbox; + while ((bbox = gsearch.NextFullSearch()) != nullptr) { + const TBOX& box = bbox->bounding_box(); + int left_x = box.left(); + int right_x = box.right(); + int top_y = box.top(); + int bottom_y = box.bottom(); + TabType tabtype = bbox->left_tab_type(); + if (tabtype != TT_NONE) { + if (tabtype == TT_MAYBE_ALIGNED) + tab_win->Pen(ScrollView::BLUE); + else if (tabtype == TT_MAYBE_RAGGED) + tab_win->Pen(ScrollView::YELLOW); + else if (tabtype == TT_CONFIRMED) + tab_win->Pen(ScrollView::GREEN); + else + tab_win->Pen(ScrollView::GREY); + tab_win->Line(left_x, top_y, left_x, bottom_y); + } + tabtype = bbox->right_tab_type(); + if (tabtype != TT_NONE) { + if (tabtype == TT_MAYBE_ALIGNED) + tab_win->Pen(ScrollView::MAGENTA); + else if (tabtype == TT_MAYBE_RAGGED) + tab_win->Pen(ScrollView::ORANGE); + else if (tabtype == TT_CONFIRMED) + tab_win->Pen(ScrollView::RED); + else + tab_win->Pen(ScrollView::GREY); + tab_win->Line(right_x, top_y, right_x, bottom_y); + } + } + tab_win->Update(); + return tab_win; +} + +#endif // !GRAPHICS_DISABLED + +// Helper returns true if the total number of line_crossings of all the blobs +// in the list is at least 2. +static bool AtLeast2LineCrossings(BLOBNBOX_CLIST* blobs) { + BLOBNBOX_C_IT it(blobs); + int total_crossings = 0; + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + total_crossings += it.data()->line_crossings(); + } + return total_crossings >= 2; +} + +// Destructor. +// It is defined here, so the compiler can create a single vtable +// instead of weak vtables in every compilation unit. +AlignedBlob::~AlignedBlob() = default; + +// Finds a vector corresponding to a set of vertically aligned blob edges +// running through the given box. The type of vector returned and the +// search parameters are determined by the AlignedBlobParams. +// vertical_x and y are updated with an estimate of the real +// vertical direction. (skew finding.) +// Returns nullptr if no decent vector can be found. +TabVector* AlignedBlob::FindVerticalAlignment(AlignedBlobParams align_params, + BLOBNBOX* bbox, + int* vertical_x, + int* vertical_y) { + int ext_start_y, ext_end_y; + BLOBNBOX_CLIST good_points; + // Search up and then down from the starting bbox. + TBOX box = bbox->bounding_box(); + bool debug = WithinTestRegion(2, box.left(), box.bottom()); + int pt_count = AlignTabs(align_params, false, bbox, &good_points, &ext_end_y); + pt_count += AlignTabs(align_params, true, bbox, &good_points, &ext_start_y); + BLOBNBOX_C_IT it(&good_points); + it.move_to_last(); + box = it.data()->bounding_box(); + int end_y = box.top(); + int end_x = align_params.right_tab ? box.right() : box.left(); + it.move_to_first(); + box = it.data()->bounding_box(); + int start_x = align_params.right_tab ? box.right() : box.left(); + int start_y = box.bottom(); + // Acceptable tab vectors must have a minimum number of points, + // have a minimum acceptable length, and have a minimum gradient. + // The gradient corresponds to the skew angle. + // Ragged tabs don't need to satisfy the gradient condition, as they + // will always end up parallel to the vertical direction. + bool at_least_2_crossings = AtLeast2LineCrossings(&good_points); + if ((pt_count >= align_params.min_points && + end_y - start_y >= align_params.min_length && + (align_params.ragged || + end_y - start_y >= abs(end_x - start_x) * kMinTabGradient)) || + at_least_2_crossings) { + int confirmed_points = 0; + // Count existing confirmed points to see if vector is acceptable. + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + bbox = it.data(); + if (align_params.right_tab) { + if (bbox->right_tab_type() == align_params.confirmed_type) + ++confirmed_points; + } else { + if (bbox->left_tab_type() == align_params.confirmed_type) + ++confirmed_points; + } + } + // Ragged vectors are not allowed to use too many already used points. + if (!align_params.ragged || + confirmed_points + confirmed_points < pt_count) { + const TBOX& box = bbox->bounding_box(); + if (debug) { + tprintf("Confirming tab vector of %d pts starting at %d,%d\n", + pt_count, box.left(), box.bottom()); + } + // Flag all the aligned neighbours as confirmed . + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + bbox = it.data(); + if (align_params.right_tab) { + bbox->set_right_tab_type(align_params.confirmed_type); + } else { + bbox->set_left_tab_type(align_params.confirmed_type); + } + if (debug) { + bbox->bounding_box().print(); + } + } + // Now make the vector and return it. + TabVector* result = TabVector::FitVector(align_params.alignment, + align_params.vertical, + ext_start_y, ext_end_y, + &good_points, + vertical_x, vertical_y); + result->set_intersects_other_lines(at_least_2_crossings); + if (debug) { + tprintf("Box was %d, %d\n", box.left(), box.bottom()); + result->Print("After fitting"); + } + return result; + } else if (debug) { + tprintf("Ragged tab used too many used points: %d out of %d\n", + confirmed_points, pt_count); + } + } else if (debug) { + tprintf("Tab vector failed basic tests: pt count %d vs min %d, " + "length %d vs min %d, min grad %g\n", + pt_count, align_params.min_points, end_y - start_y, + align_params.min_length, abs(end_x - start_x) * kMinTabGradient); + } + return nullptr; +} + +// Find a set of blobs that are aligned in the given vertical +// direction with the given blob. Returns a list of aligned +// blobs and the number in the list. +// For other parameters see FindAlignedBlob below. +int AlignedBlob::AlignTabs(const AlignedBlobParams& params, + bool top_to_bottom, BLOBNBOX* bbox, + BLOBNBOX_CLIST* good_points, int* end_y) { + int ptcount = 0; + BLOBNBOX_C_IT it(good_points); + + TBOX box = bbox->bounding_box(); + bool debug = WithinTestRegion(2, box.left(), box.bottom()); + if (debug) { + tprintf("Starting alignment run at blob:"); + box.print(); + } + int x_start = params.right_tab ? box.right() : box.left(); + while (bbox != nullptr) { + // Add the blob to the list if the appropriate side is a tab candidate, + // or if we are working on a ragged tab. + TabType type = params.right_tab ? bbox->right_tab_type() + : bbox->left_tab_type(); + if (((type != TT_NONE && type != TT_MAYBE_RAGGED) || params.ragged) && + (it.empty() || it.data() != bbox)) { + if (top_to_bottom) + it.add_before_then_move(bbox); + else + it.add_after_then_move(bbox); + ++ptcount; + } + // Find the next blob that is aligned with the current one. + // FindAlignedBlob guarantees that forward progress will be made in the + // top_to_bottom direction, and therefore eventually it will return nullptr, + // making this while (bbox != nullptr) loop safe. + bbox = FindAlignedBlob(params, top_to_bottom, bbox, x_start, end_y); + if (bbox != nullptr) { + box = bbox->bounding_box(); + if (!params.ragged) + x_start = params.right_tab ? box.right() : box.left(); + } + } + if (debug) { + tprintf("Alignment run ended with %d pts at blob:", ptcount); + box.print(); + } + return ptcount; +} + +// Search vertically for a blob that is aligned with the input bbox. +// The search parameters are determined by AlignedBlobParams. +// top_to_bottom tells whether to search down or up. +// The return value is nullptr if nothing was found in the search box +// or if a blob was found in the gutter. On a nullptr return, end_y +// is set to the edge of the search box or the leading edge of the +// gutter blob if one was found. +BLOBNBOX* AlignedBlob::FindAlignedBlob(const AlignedBlobParams& p, + bool top_to_bottom, BLOBNBOX* bbox, + int x_start, int* end_y) { + TBOX box = bbox->bounding_box(); + // If there are separator lines, get the column edges. + int left_column_edge = bbox->left_rule(); + int right_column_edge = bbox->right_rule(); + // start_y is used to guarantee that forward progress is made and the + // search does not go into an infinite loop. New blobs must extend the + // line beyond start_y. + int start_y = top_to_bottom ? box.bottom() : box.top(); + if (WithinTestRegion(2, x_start, start_y)) { + tprintf("Column edges for blob at (%d,%d)->(%d,%d) are [%d, %d]\n", + box.left(), box.top(), box.right(), box.bottom(), + left_column_edge, right_column_edge); + } + // Compute skew tolerance. + int skew_tolerance = p.max_v_gap / kMaxSkewFactor; + // Calculate xmin and xmax of the search box so that it contains + // all possibly relevant boxes up to p.max_v_gap above or below according + // to top_to_bottom. + // Start with a notion of vertical with the current estimate. + int x2 = (p.max_v_gap * p.vertical.x() + p.vertical.y()/2) / p.vertical.y(); + if (top_to_bottom) { + x2 = x_start - x2; + *end_y = start_y - p.max_v_gap; + } else { + x2 = x_start + x2; + *end_y = start_y + p.max_v_gap; + } + // Expand the box by an additional skew tolerance + int xmin = std::min(x_start, x2) - skew_tolerance; + int xmax = std::max(x_start, x2) + skew_tolerance; + // Now add direction-specific tolerances. + if (p.right_tab) { + xmax += p.min_gutter; + xmin -= p.l_align_tolerance; + } else { + xmax += p.r_align_tolerance; + xmin -= p.min_gutter; + } + // Setup a vertical search for an aligned blob. + GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> vsearch(this); + if (WithinTestRegion(2, x_start, start_y)) + tprintf("Starting %s %s search at %d-%d,%d, search_size=%d, gutter=%d\n", + p.ragged ? "Ragged" : "Aligned", p.right_tab ? "Right" : "Left", + xmin, xmax, start_y, p.max_v_gap, p.min_gutter); + vsearch.StartVerticalSearch(xmin, xmax, start_y); + // result stores the best real return value. + BLOBNBOX* result = nullptr; + // The backup_result is not a tab candidate and can be used if no + // real tab candidate result is found. + BLOBNBOX* backup_result = nullptr; + // neighbour is the blob that is currently being investigated. + BLOBNBOX* neighbour = nullptr; + while ((neighbour = vsearch.NextVerticalSearch(top_to_bottom)) != nullptr) { + if (neighbour == bbox) + continue; + TBOX nbox = neighbour->bounding_box(); + int n_y = (nbox.top() + nbox.bottom()) / 2; + if ((!top_to_bottom && n_y > start_y + p.max_v_gap) || + (top_to_bottom && n_y < start_y - p.max_v_gap)) { + if (WithinTestRegion(2, x_start, start_y)) + tprintf("Neighbour too far at (%d,%d)->(%d,%d)\n", + nbox.left(), nbox.bottom(), nbox.right(), nbox.top()); + break; // Gone far enough. + } + // It is CRITICAL to ensure that forward progress is made, (strictly + // in/decreasing n_y) or the caller could loop infinitely, while + // waiting for a sequence of blobs in a line to end. + // NextVerticalSearch alone does not guarantee this, as there may be + // more than one blob in a grid cell. See comment in AlignTabs. + if ((n_y < start_y) != top_to_bottom || nbox.y_overlap(box)) + continue; // Only look in the required direction. + if (result != nullptr && result->bounding_box().y_gap(nbox) > gridsize()) + return result; // This result is clear. + if (backup_result != nullptr && p.ragged && result == nullptr && + backup_result->bounding_box().y_gap(nbox) > gridsize()) + return backup_result; // This result is clear. + + // If the neighbouring blob is the wrong side of a separator line, then it + // "doesn't exist" as far as we are concerned. + int x_at_n_y = x_start + (n_y - start_y) * p.vertical.x() / p.vertical.y(); + if (x_at_n_y < neighbour->left_crossing_rule() || + x_at_n_y > neighbour->right_crossing_rule()) + continue; // Separator line in the way. + int n_left = nbox.left(); + int n_right = nbox.right(); + int n_x = p.right_tab ? n_right : n_left; + if (WithinTestRegion(2, x_start, start_y)) + tprintf("neighbour at (%d,%d)->(%d,%d), n_x=%d, n_y=%d, xatn=%d\n", + nbox.left(), nbox.bottom(), nbox.right(), nbox.top(), + n_x, n_y, x_at_n_y); + if (p.right_tab && + n_left < x_at_n_y + p.min_gutter && + n_right > x_at_n_y + p.r_align_tolerance && + (p.ragged || n_left < x_at_n_y + p.gutter_fraction * nbox.height())) { + // In the gutter so end of line. + if (bbox->right_tab_type() >= TT_MAYBE_ALIGNED) + bbox->set_right_tab_type(TT_DELETED); + *end_y = top_to_bottom ? nbox.top() : nbox.bottom(); + if (WithinTestRegion(2, x_start, start_y)) + tprintf("gutter\n"); + return nullptr; + } + if (!p.right_tab && + n_left < x_at_n_y - p.l_align_tolerance && + n_right > x_at_n_y - p.min_gutter && + (p.ragged || n_right > x_at_n_y - p.gutter_fraction * nbox.height())) { + // In the gutter so end of line. + if (bbox->left_tab_type() >= TT_MAYBE_ALIGNED) + bbox->set_left_tab_type(TT_DELETED); + *end_y = top_to_bottom ? nbox.top() : nbox.bottom(); + if (WithinTestRegion(2, x_start, start_y)) + tprintf("gutter\n"); + return nullptr; + } + if ((p.right_tab && neighbour->leader_on_right()) || + (!p.right_tab && neighbour->leader_on_left())) + continue; // Neighbours of leaders are not allowed to be used. + if (n_x <= x_at_n_y + p.r_align_tolerance && + n_x >= x_at_n_y - p.l_align_tolerance) { + // Aligned so keep it. If it is a marked tab save it as result, + // otherwise keep it as backup_result to return in case of later failure. + if (WithinTestRegion(2, x_start, start_y)) + tprintf("aligned, seeking%d, l=%d, r=%d\n", + p.right_tab, neighbour->left_tab_type(), + neighbour->right_tab_type()); + TabType n_type = p.right_tab ? neighbour->right_tab_type() + : neighbour->left_tab_type(); + if (n_type != TT_NONE && (p.ragged || n_type != TT_MAYBE_RAGGED)) { + if (result == nullptr) { + result = neighbour; + } else { + // Keep the closest neighbour by Euclidean distance. + // This prevents it from picking a tab blob in another column. + const TBOX& old_box = result->bounding_box(); + int x_diff = p.right_tab ? old_box.right() : old_box.left(); + x_diff -= x_at_n_y; + int y_diff = (old_box.top() + old_box.bottom()) / 2 - start_y; + int old_dist = x_diff * x_diff + y_diff * y_diff; + x_diff = n_x - x_at_n_y; + y_diff = n_y - start_y; + int new_dist = x_diff * x_diff + y_diff * y_diff; + if (new_dist < old_dist) + result = neighbour; + } + } else if (backup_result == nullptr) { + if (WithinTestRegion(2, x_start, start_y)) + tprintf("Backup\n"); + backup_result = neighbour; + } else { + TBOX backup_box = backup_result->bounding_box(); + if ((p.right_tab && backup_box.right() < nbox.right()) || + (!p.right_tab && backup_box.left() > nbox.left())) { + if (WithinTestRegion(2, x_start, start_y)) + tprintf("Better backup\n"); + backup_result = neighbour; + } + } + } + } + return result != nullptr ? result : backup_result; +} + +} // namespace tesseract. diff --git a/tesseract/src/textord/alignedblob.h b/tesseract/src/textord/alignedblob.h new file mode 100644 index 00000000..e69b3354 --- /dev/null +++ b/tesseract/src/textord/alignedblob.h @@ -0,0 +1,124 @@ +/////////////////////////////////////////////////////////////////////// +// File: alignedblob.h +// Description: A class to find vertically aligned blobs in a BBGrid, +// and a struct to hold control parameters. +// Author: Ray Smith +// +// (C) Copyright 2008, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_TEXTORD_ALIGNEDBLOB_H_ +#define TESSERACT_TEXTORD_ALIGNEDBLOB_H_ + +#include "bbgrid.h" +#include "blobbox.h" +#include "tabvector.h" + +namespace tesseract { + +extern INT_VAR_H(textord_debug_bugs, 0, + "Turn on output related to bugs in tab finding"); +extern INT_VAR_H(textord_debug_tabfind, 2, "Debug tab finding"); +extern BOOL_VAR_H(textord_debug_printable, false, + "Make debug windows printable"); + +// Simple structure to hold the search parameters for AlignedBlob. +// The members are mostly derived from constants, which are +// conditioned on the alignment parameter. +// For finding vertical lines, a different set of constants are +// used, conditioned on the different constructor. +struct AlignedBlobParams { + // Constructor to set the parameters for finding aligned and ragged tabs. + // Vertical_x and vertical_y are the current estimates of the true vertical + // direction (up) in the image. Height is the height of the starter blob. + // v_gap_multiple is the multiple of height that will be used as a limit + // on vertical gap before giving up and calling the line ended. + // resolution is the original image resolution, and align0 indicates the + // type of tab stop to be found. + AlignedBlobParams(int vertical_x, int vertical_y, int height, + int v_gap_multiple, int min_gutter_width, int resolution, + TabAlignment alignment0); + // Constructor to set the parameters for finding vertical lines. + // Vertical_x and vertical_y are the current estimates of the true vertical + // direction (up) in the image. Width is the width of the starter blob. + AlignedBlobParams(int vertical_x, int vertical_y, int width); + + // Fit the vertical vector into an ICOORD, which is 16 bit. + void set_vertical(int vertical_x, int vertical_y); + + double gutter_fraction; // Multiple of height used for min_gutter. + bool right_tab; // We are looking at right edges. + bool ragged; // We are looking for a ragged (vs aligned) edge. + TabAlignment alignment; // The type we are trying to produce. + TabType confirmed_type; // Type to flag blobs if accepted. + int max_v_gap; // Max vertical gap to be tolerated. + int min_gutter; // Minimum gutter between columns. + // Tolerances allowed on horizontal alignment of aligned edges. + int l_align_tolerance; // Left edges. + int r_align_tolerance; // Right edges. + // Conditions for accepting a line. + int min_points; // Minimum number of points to be OK. + int min_length; // Min length of completed line. + + ICOORD vertical; // Current estimate of logical vertical. +}; + +// The AlignedBlob class contains code to find vertically aligned blobs. +// This is factored out into a separate class, so it can be used by both +// vertical line finding (LineFind) and tabstop finding (TabFind). +class TESS_API AlignedBlob : public BlobGrid { + public: + AlignedBlob(int gridsize, const ICOORD& bleft, const ICOORD& tright); + ~AlignedBlob() override; + + // Return true if the given coordinates are within the test rectangle + // and the debug level is at least the given detail level. + static bool WithinTestRegion(int detail_level, int x, int y); + + // Display the tab codes of the BLOBNBOXes in this grid. + ScrollView* DisplayTabs(const char* window_name, ScrollView* tab_win); + + // Finds a vector corresponding to a set of vertically aligned blob edges + // running through the given box. The type of vector returned and the + // search parameters are determined by the AlignedBlobParams. + // vertical_x and y are updated with an estimate of the real + // vertical direction. (skew finding.) + // Returns nullptr if no decent vector can be found. + TabVector* FindVerticalAlignment(AlignedBlobParams align_params, + BLOBNBOX* bbox, + int* vertical_x, int* vertical_y); + + private: + // Find a set of blobs that are aligned in the given vertical + // direction with the given blob. Returns a list of aligned + // blobs and the number in the list. + // For other parameters see FindAlignedBlob below. + int AlignTabs(const AlignedBlobParams& params, + bool top_to_bottom, BLOBNBOX* bbox, + BLOBNBOX_CLIST* good_points, int* end_y); + + // Search vertically for a blob that is aligned with the input bbox. + // The search parameters are determined by AlignedBlobParams. + // top_to_bottom tells whether to search down or up. + // The return value is nullptr if nothing was found in the search box + // or if a blob was found in the gutter. On a nullptr return, end_y + // is set to the edge of the search box or the leading edge of the + // gutter blob if one was found. + BLOBNBOX* FindAlignedBlob(const AlignedBlobParams& p, + bool top_to_bottom, BLOBNBOX* bbox, + int x_start, int* end_y); +}; + +} // namespace tesseract. + +#endif // TESSERACT_TEXTORD_ALIGNEDBLOB_H_ diff --git a/tesseract/src/textord/baselinedetect.cpp b/tesseract/src/textord/baselinedetect.cpp new file mode 100644 index 00000000..ef3b91c8 --- /dev/null +++ b/tesseract/src/textord/baselinedetect.cpp @@ -0,0 +1,869 @@ +/////////////////////////////////////////////////////////////////////// +// File: baselinedetect.cpp +// Description: Initial Baseline Determination. +// Copyright 2012 Google Inc. All Rights Reserved. +// Author: rays@google.com (Ray Smith) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#define _USE_MATH_DEFINES // for M_PI + +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#include "baselinedetect.h" + +#include <algorithm> +#include <cfloat> // for FLT_MAX +#include <cmath> // for M_PI +#include "allheaders.h" +#include "blobbox.h" +#include "detlinefit.h" +#include "drawtord.h" +#include "helpers.h" +#include "linlsq.h" +#include "makerow.h" +#include "textord.h" +#include "tprintf.h" +#include "underlin.h" + +// Number of displacement modes kept in displacement_modes_; +const int kMaxDisplacementsModes = 3; +// Number of points to skip when retrying initial fit. +const int kNumSkipPoints = 3; +// Max angle deviation (in radians) allowed to keep the independent baseline. +const double kMaxSkewDeviation = 1.0 / 64; +// Fraction of line spacing estimate for quantization of blob displacements. +const double kOffsetQuantizationFactor = 3.0 / 64; +// Fraction of line spacing estimate for computing blob fit error. +const double kFitHalfrangeFactor = 6.0 / 64; +// Max fraction of line spacing allowed before a baseline counts as badly fitting. +const double kMaxBaselineError = 3.0 / 64; +// Multiple of linespacing that sets max_blob_size in TO_BLOCK. +// Copied from textord_excess_blobsize. +const double kMaxBlobSizeMultiple = 1.3; +// Min fraction of linespacing gaps that should be close to the model before +// we will force the linespacing model on all the lines. +const double kMinFittingLinespacings = 0.25; +// A y-coordinate within a textline that is to be debugged. +//#define kDebugYCoord 1525 + +namespace tesseract { + +BaselineRow::BaselineRow(double line_spacing, TO_ROW* to_row) + : blobs_(to_row->blob_list()), + baseline_pt1_(0.0f, 0.0f), baseline_pt2_(0.0f, 0.0f), + baseline_error_(0.0), good_baseline_(false) { + ComputeBoundingBox(); + // Compute a scale factor for rounding to ints. + disp_quant_factor_ = kOffsetQuantizationFactor * line_spacing; + fit_halfrange_ = kFitHalfrangeFactor * line_spacing; + max_baseline_error_ = kMaxBaselineError * line_spacing; +} + +// Sets the TO_ROW with the output straight line. +void BaselineRow::SetupOldLineParameters(TO_ROW* row) const { + // TODO(rays) get rid of this when m and c are no longer used. + double gradient = tan(BaselineAngle()); + // para_c is the actual intercept of the baseline on the y-axis. + float para_c = StraightYAtX(0.0); + row->set_line(gradient, para_c, baseline_error_); + row->set_parallel_line(gradient, para_c, baseline_error_); +} + +// Outputs diagnostic information. +void BaselineRow::Print() const { + tprintf("Baseline (%g,%g)->(%g,%g), angle=%g, intercept=%g\n", + baseline_pt1_.x(), baseline_pt1_.y(), + baseline_pt2_.x(), baseline_pt2_.y(), + BaselineAngle(), StraightYAtX(0.0)); + tprintf("Quant factor=%g, error=%g, good=%d, box:", + disp_quant_factor_, baseline_error_, good_baseline_); + bounding_box_.print(); +} + +// Returns the skew angle (in radians) of the current baseline in [-pi,pi]. +double BaselineRow::BaselineAngle() const { + FCOORD baseline_dir(baseline_pt2_ - baseline_pt1_); + double angle = baseline_dir.angle(); + // Baseline directions are only unique in a range of pi so constrain to + // [-pi/2, pi/2]. + return fmod(angle + M_PI * 1.5, M_PI) - M_PI * 0.5; +} + +// Computes and returns the linespacing at the middle of the overlap +// between this and other. +double BaselineRow::SpaceBetween(const BaselineRow& other) const { + // Find the x-centre of overlap of the lines. + float x = (std::max(bounding_box_.left(), other.bounding_box_.left()) + + std::min(bounding_box_.right(), other.bounding_box_.right())) / 2.0f; + // Find the vertical centre between them. + float y = (StraightYAtX(x) + other.StraightYAtX(x)) / 2.0f; + // Find the perpendicular distance of (x,y) from each line. + FCOORD pt(x, y); + return PerpDistanceFromBaseline(pt) + other.PerpDistanceFromBaseline(pt); +} + +// Computes and returns the displacement of the center of the line +// perpendicular to the given direction. +double BaselineRow::PerpDisp(const FCOORD& direction) const { + float middle_x = (bounding_box_.left() + bounding_box_.right()) / 2.0f; + FCOORD middle_pos(middle_x, StraightYAtX(middle_x)); + return direction * middle_pos / direction.length(); +} + +// Computes the y coordinate at the given x using the straight baseline +// defined by baseline_pt1_ and baseline_pt2__. +double BaselineRow::StraightYAtX(double x) const { + double denominator = baseline_pt2_.x() - baseline_pt1_.x(); + if (denominator == 0.0) + return (baseline_pt1_.y() + baseline_pt2_.y()) / 2.0; + return baseline_pt1_.y() + + (x - baseline_pt1_.x()) * (baseline_pt2_.y() - baseline_pt1_.y()) / + denominator; +} + +// Fits a straight baseline to the points. Returns true if it had enough +// points to be reasonably sure of the fitted baseline. +// If use_box_bottoms is false, baselines positions are formed by +// considering the outlines of the blobs. +bool BaselineRow::FitBaseline(bool use_box_bottoms) { + // Deterministic fitting is used wherever possible. + fitter_.Clear(); + // Linear least squares is a backup if the DetLineFit produces a bad line. + LLSQ llsq; + BLOBNBOX_IT blob_it(blobs_); + + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + BLOBNBOX* blob = blob_it.data(); + if (!use_box_bottoms) blob->EstimateBaselinePosition(); + const TBOX& box = blob->bounding_box(); + int x_middle = (box.left() + box.right()) / 2; +#ifdef kDebugYCoord + if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) { + tprintf("Box bottom = %d, baseline pos=%d for box at:", + box.bottom(), blob->baseline_position()); + box.print(); + } +#endif + fitter_.Add(ICOORD(x_middle, blob->baseline_position()), box.width() / 2); + llsq.add(x_middle, blob->baseline_position()); + } + // Fit the line. + ICOORD pt1, pt2; + baseline_error_ = fitter_.Fit(&pt1, &pt2); + baseline_pt1_ = pt1; + baseline_pt2_ = pt2; + if (baseline_error_ > max_baseline_error_ && + fitter_.SufficientPointsForIndependentFit()) { + // The fit was bad but there were plenty of points, so try skipping + // the first and last few, and use the new line if it dramatically improves + // the error of fit. + double error = fitter_.Fit(kNumSkipPoints, kNumSkipPoints, &pt1, &pt2); + if (error < baseline_error_ / 2.0) { + baseline_error_ = error; + baseline_pt1_ = pt1; + baseline_pt2_ = pt2; + } + } + int debug = 0; +#ifdef kDebugYCoord + Print(); + debug = bounding_box_.bottom() < kDebugYCoord && + bounding_box_.top() > kDebugYCoord + ? 3 : 2; +#endif + // Now we obtained a direction from that fit, see if we can improve the + // fit using the same direction and some other start point. + FCOORD direction(pt2 - pt1); + double target_offset = direction * pt1; + good_baseline_ = false; + FitConstrainedIfBetter(debug, direction, 0.0, target_offset); + // Wild lines can be produced because DetLineFit allows vertical lines, but + // vertical text has been rotated so angles over pi/4 should be disallowed. + // Near vertical lines can still be produced by vertically aligned components + // on very short lines. + double angle = BaselineAngle(); + if (fabs(angle) > M_PI * 0.25) { + // Use the llsq fit as a backup. + baseline_pt1_ = llsq.mean_point(); + baseline_pt2_ = baseline_pt1_ + FCOORD(1.0f, llsq.m()); + // TODO(rays) get rid of this when m and c are no longer used. + double m = llsq.m(); + double c = llsq.c(m); + baseline_error_ = llsq.rms(m, c); + good_baseline_ = false; + } + return good_baseline_; +} + +// Modifies an existing result of FitBaseline to be parallel to the given +// direction vector if that produces a better result. +void BaselineRow::AdjustBaselineToParallel(int debug, + const FCOORD& direction) { + SetupBlobDisplacements(direction); + if (displacement_modes_.empty()) + return; +#ifdef kDebugYCoord + if (bounding_box_.bottom() < kDebugYCoord && + bounding_box_.top() > kDebugYCoord && debug < 3) + debug = 3; +#endif + FitConstrainedIfBetter(debug, direction, 0.0, displacement_modes_[0]); +} + +// Modifies the baseline to snap to the textline grid if the existing +// result is not good enough. +double BaselineRow::AdjustBaselineToGrid(int debug, + const FCOORD& direction, + double line_spacing, + double line_offset) { + if (blobs_->empty()) { + if (debug > 1) { + tprintf("Row empty at:"); + bounding_box_.print(); + } + return line_offset; + } + // Find the displacement_modes_ entry nearest to the grid. + double best_error = 0.0; + int best_index = -1; + for (int i = 0; i < displacement_modes_.size(); ++i) { + double blob_y = displacement_modes_[i]; + double error = BaselineBlock::SpacingModelError(blob_y, line_spacing, + line_offset); + if (debug > 1) { + tprintf("Mode at %g has error %g from model \n", blob_y, error); + } + if (best_index < 0 || error < best_error) { + best_error = error; + best_index = i; + } + } + // We will move the baseline only if the chosen mode is close enough to the + // model. + double model_margin = max_baseline_error_ - best_error; + if (best_index >= 0 && model_margin > 0.0) { + // But if the current baseline is already close to the mode there is no + // point, and only the potential to damage accuracy by changing its angle. + double perp_disp = PerpDisp(direction); + double shift = displacement_modes_[best_index] - perp_disp; + if (fabs(shift) > max_baseline_error_) { + if (debug > 1) { + tprintf("Attempting linespacing model fit with mode %g to row at:", + displacement_modes_[best_index]); + bounding_box_.print(); + } + FitConstrainedIfBetter(debug, direction, model_margin, + displacement_modes_[best_index]); + } else if (debug > 1) { + tprintf("Linespacing model only moves current line by %g for row at:", + shift); + bounding_box_.print(); + } + } else if (debug > 1) { + tprintf("Linespacing model not close enough to any mode for row at:"); + bounding_box_.print(); + } + return fmod(PerpDisp(direction), line_spacing); +} + +// Sets up displacement_modes_ with the top few modes of the perpendicular +// distance of each blob from the given direction vector, after rounding. +void BaselineRow::SetupBlobDisplacements(const FCOORD& direction) { + // Set of perpendicular displacements of the blob bottoms from the required + // baseline direction. + GenericVector<double> perp_blob_dists; + displacement_modes_.truncate(0); + // Gather the skew-corrected position of every blob. + double min_dist = FLT_MAX; + double max_dist = -FLT_MAX; + BLOBNBOX_IT blob_it(blobs_); +#ifdef kDebugYCoord + bool debug = false; +#endif + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + BLOBNBOX* blob = blob_it.data(); + const TBOX& box = blob->bounding_box(); +#ifdef kDebugYCoord + if (box.bottom() < kDebugYCoord && box.top() > kDebugYCoord) debug = true; +#endif + FCOORD blob_pos((box.left() + box.right()) / 2.0f, + blob->baseline_position()); + double offset = direction * blob_pos; + perp_blob_dists.push_back(offset); +#ifdef kDebugYCoord + if (debug) { + tprintf("Displacement %g for blob at:", offset); + box.print(); + } +#endif + UpdateRange(offset, &min_dist, &max_dist); + } + // Set up a histogram using disp_quant_factor_ as the bucket size. + STATS dist_stats(IntCastRounded(min_dist / disp_quant_factor_), + IntCastRounded(max_dist / disp_quant_factor_) + 1); + for (int i = 0; i < perp_blob_dists.size(); ++i) { + dist_stats.add(IntCastRounded(perp_blob_dists[i] / disp_quant_factor_), 1); + } + GenericVector<KDPairInc<float, int> > scaled_modes; + dist_stats.top_n_modes(kMaxDisplacementsModes, &scaled_modes); +#ifdef kDebugYCoord + if (debug) { + for (int i = 0; i < scaled_modes.size(); ++i) { + tprintf("Top mode = %g * %d\n", + scaled_modes[i].key * disp_quant_factor_, scaled_modes[i].data()); + } + } +#endif + for (int i = 0; i < scaled_modes.size(); ++i) + displacement_modes_.push_back(disp_quant_factor_ * scaled_modes[i].key()); +} + +// Fits a line in the given direction to blobs that are close to the given +// target_offset perpendicular displacement from the direction. The fit +// error is allowed to be cheat_allowance worse than the existing fit, and +// will still be used. +// If cheat_allowance > 0, the new fit will be good and replace the current +// fit if it has better fit (with cheat) OR its error is below +// max_baseline_error_ and the old fit is marked bad. +// Otherwise the new fit will only replace the old if it is really better, +// or the old fit is marked bad and the new fit has sufficient points, as +// well as being within the max_baseline_error_. +void BaselineRow::FitConstrainedIfBetter(int debug, + const FCOORD& direction, + double cheat_allowance, + double target_offset) { + double halfrange = fit_halfrange_ * direction.length(); + double min_dist = target_offset - halfrange; + double max_dist = target_offset + halfrange; + ICOORD line_pt; + double new_error = fitter_.ConstrainedFit(direction, min_dist, max_dist, + debug > 2, &line_pt); + // Allow cheat_allowance off the new error + new_error -= cheat_allowance; + double old_angle = BaselineAngle(); + double new_angle = direction.angle(); + if (debug > 1) { + tprintf("Constrained error = %g, original = %g", + new_error, baseline_error_); + tprintf(" angles = %g, %g, delta=%g vs threshold %g\n", + old_angle, new_angle, + new_angle - old_angle, kMaxSkewDeviation); + } + bool new_good_baseline = new_error <= max_baseline_error_ && + (cheat_allowance > 0.0 || fitter_.SufficientPointsForIndependentFit()); + // The new will replace the old if any are true: + // 1. the new error is better + // 2. the old is NOT good, but the new is + // 3. there is a wild angular difference between them (assuming that the new + // is a better guess at the angle.) + if (new_error <= baseline_error_ || + (!good_baseline_ && new_good_baseline) || + fabs(new_angle - old_angle) > kMaxSkewDeviation) { + baseline_error_ = new_error; + baseline_pt1_ = line_pt; + baseline_pt2_ = baseline_pt1_ + direction; + good_baseline_ = new_good_baseline; + if (debug > 1) { + tprintf("Replacing with constrained baseline, good = %d\n", + good_baseline_); + } + } else if (debug > 1) { + tprintf("Keeping old baseline\n"); + } +} + +// Returns the perpendicular distance of the point from the straight +// baseline. +float BaselineRow::PerpDistanceFromBaseline(const FCOORD& pt) const { + FCOORD baseline_vector(baseline_pt2_ - baseline_pt1_); + FCOORD offset_vector(pt - baseline_pt1_); + float distance = baseline_vector * offset_vector; + float sqlength = baseline_vector.sqlength(); + if (sqlength == 0.0f) { + tprintf("unexpected baseline vector (0,0)\n"); + return 0.0f; + } + return std::sqrt(distance * distance / sqlength); +} + +// Computes the bounding box of the row. +void BaselineRow::ComputeBoundingBox() { + BLOBNBOX_IT it(blobs_); + TBOX box; + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + box += it.data()->bounding_box(); + } + bounding_box_ = box; +} + + +BaselineBlock::BaselineBlock(int debug_level, bool non_text, TO_BLOCK* block) + : block_(block), debug_level_(debug_level), non_text_block_(non_text), + good_skew_angle_(false), skew_angle_(0.0), + line_spacing_(block->line_spacing), line_offset_(0.0), model_error_(0.0) { + TO_ROW_IT row_it(block_->get_rows()); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + // Sort the blobs on the rows. + row_it.data()->blob_list()->sort(blob_x_order); + rows_.push_back(new BaselineRow(block->line_spacing, row_it.data())); + } +} + +// Computes and returns the absolute error of the given perp_disp from the +// given linespacing model. +double BaselineBlock::SpacingModelError(double perp_disp, double line_spacing, + double line_offset) { + // Round to the nearest multiple of line_spacing + line offset. + int multiple = IntCastRounded((perp_disp - line_offset) / line_spacing); + double model_y = line_spacing * multiple + line_offset; + return fabs(perp_disp - model_y); +} + +// Fits straight line baselines and computes the skew angle from the +// median angle. Returns true if a good angle is found. +// If use_box_bottoms is false, baseline positions are formed by +// considering the outlines of the blobs. +bool BaselineBlock::FitBaselinesAndFindSkew(bool use_box_bottoms) { + if (non_text_block_) return false; + GenericVector<double> angles; + for (int r = 0; r < rows_.size(); ++r) { + BaselineRow* row = rows_[r]; + if (row->FitBaseline(use_box_bottoms)) { + double angle = row->BaselineAngle(); + angles.push_back(angle); + } + if (debug_level_ > 1) + row->Print(); + } + + if (!angles.empty()) { + skew_angle_ = MedianOfCircularValues(M_PI, &angles); + good_skew_angle_ = true; + } else { + skew_angle_ = 0.0f; + good_skew_angle_ = false; + } + if (debug_level_ > 0) { + tprintf("Initial block skew angle = %g, good = %d\n", + skew_angle_, good_skew_angle_); + } + return good_skew_angle_; +} + +// Refits the baseline to a constrained angle, using the stored block +// skew if good enough, otherwise the supplied default skew. +void BaselineBlock::ParallelizeBaselines(double default_block_skew) { + if (non_text_block_) return; + if (!good_skew_angle_) skew_angle_ = default_block_skew; + if (debug_level_ > 0) + tprintf("Adjusting block to skew angle %g\n", skew_angle_); + FCOORD direction(cos(skew_angle_), sin(skew_angle_)); + for (int r = 0; r < rows_.size(); ++r) { + BaselineRow* row = rows_[r]; + row->AdjustBaselineToParallel(debug_level_, direction); + if (debug_level_ > 1) + row->Print(); + } + if (rows_.size() < 3 || !ComputeLineSpacing()) + return; + // Enforce the line spacing model on all lines that don't yet have a good + // baseline. + // Start by finding the row that is best fitted to the model. + int best_row = 0; + double best_error = SpacingModelError(rows_[0]->PerpDisp(direction), + line_spacing_, line_offset_); + for (int r = 1; r < rows_.size(); ++r) { + double error = SpacingModelError(rows_[r]->PerpDisp(direction), + line_spacing_, line_offset_); + if (error < best_error) { + best_error = error; + best_row = r; + } + } + // Starting at the best fitting row, work outwards, syncing the offset. + double offset = line_offset_; + for (int r = best_row + 1; r < rows_.size(); ++r) { + offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction, + line_spacing_, offset); + } + offset = line_offset_; + for (int r = best_row - 1; r >= 0; --r) { + offset = rows_[r]->AdjustBaselineToGrid(debug_level_, direction, + line_spacing_, offset); + } +} + +// Sets the parameters in TO_BLOCK that are needed by subsequent processes. +void BaselineBlock::SetupBlockParameters() const { + if (line_spacing_ > 0.0) { + // Where was block_line_spacing set before? + float min_spacing = std::min(block_->line_spacing, static_cast<float>(line_spacing_)); + if (min_spacing < block_->line_size) + block_->line_size = min_spacing; + block_->line_spacing = line_spacing_; + block_->baseline_offset = line_offset_; + block_->max_blob_size = line_spacing_ * kMaxBlobSizeMultiple; + } + // Setup the parameters on all the rows. + TO_ROW_IT row_it(block_->get_rows()); + for (int r = 0; r < rows_.size(); ++r, row_it.forward()) { + BaselineRow* row = rows_[r]; + TO_ROW* to_row = row_it.data(); + row->SetupOldLineParameters(to_row); + } +} + +// Processing that is required before fitting baseline splines, but requires +// linear baselines in order to be successful: +// Removes noise if required +// Separates out underlines +// Pre-associates blob fragments. +// TODO(rays/joeliu) This entire section of code is inherited from the past +// and could be improved/eliminated. +// page_tr is used to size a debug window. +void BaselineBlock::PrepareForSplineFitting(ICOORD page_tr, bool remove_noise) { + if (non_text_block_) return; + if (remove_noise) { + vigorous_noise_removal(block_); + } + FCOORD rotation(1.0f, 0.0f); + double gradient = tan(skew_angle_); + separate_underlines(block_, gradient, rotation, true); + pre_associate_blobs(page_tr, block_, rotation, true); +} + +// Fits splines to the textlines, or creates fake QSPLINES from the straight +// baselines that are already on the TO_ROWs. +// As a side-effect, computes the xheights of the rows and the block. +// Although x-height estimation is conceptually separate, it is part of +// detecting perspective distortion and therefore baseline fitting. +void BaselineBlock::FitBaselineSplines(bool enable_splines, + bool show_final_rows, + Textord* textord) { + double gradient = tan(skew_angle_); + FCOORD rotation(1.0f, 0.0f); + + if (enable_splines) { + textord->make_spline_rows(block_, gradient, show_final_rows); + } else { + // Make a fake spline from the existing line. + TBOX block_box= block_->block->pdblk.bounding_box(); + TO_ROW_IT row_it = block_->get_rows(); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + TO_ROW* row = row_it.data(); + int32_t xstarts[2] = { block_box.left(), block_box.right() }; + double coeffs[3] = { 0.0, row->line_m(), row->line_c() }; + row->baseline = QSPLINE(1, xstarts, coeffs); + textord->compute_row_xheight(row, block_->block->classify_rotation(), + row->line_m(), block_->line_size); + } + } + textord->compute_block_xheight(block_, gradient); + block_->block->set_xheight(block_->xheight); + if (textord_restore_underlines) // fix underlines + restore_underlined_blobs(block_); +} + +#ifndef GRAPHICS_DISABLED + +// Draws the (straight) baselines and final blobs colored according to +// what was discarded as noise and what is associated with each row. +void BaselineBlock::DrawFinalRows(const ICOORD& page_tr) { + if (non_text_block_) return; + double gradient = tan(skew_angle_); + FCOORD rotation(1.0f, 0.0f); + int left_edge = block_->block->pdblk.bounding_box().left(); + ScrollView* win = create_to_win(page_tr); + ScrollView::Color colour = ScrollView::RED; + TO_ROW_IT row_it = block_->get_rows(); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + plot_parallel_row(row_it.data(), gradient, left_edge, colour, rotation); + colour = static_cast<ScrollView::Color>(colour + 1); + if (colour > ScrollView::MAGENTA) + colour = ScrollView::RED; + } + plot_blob_list(win, &block_->blobs, ScrollView::MAGENTA, ScrollView::WHITE); + // Show discarded blobs. + plot_blob_list(win, &block_->underlines, + ScrollView::YELLOW, ScrollView::CORAL); + if (block_->blobs.length() > 0) + tprintf("%d blobs discarded as noise\n", block_->blobs.length()); + draw_meanlines(block_, gradient, left_edge, ScrollView::WHITE, rotation); +} + +#endif // !GRAPHICS_DISABLED + +void BaselineBlock::DrawPixSpline(Pix* pix_in) { + if (non_text_block_) return; + TO_ROW_IT row_it = block_->get_rows(); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + row_it.data()->baseline.plot(pix_in); + } +} + +// Top-level line-spacing calculation. Computes an estimate of the line- +// spacing, using the current baselines in the TO_ROWS of the block, and +// then refines it by fitting a regression line to the baseline positions +// as a function of their integer index. +// Returns true if it seems that the model is a reasonable fit to the +// observations. +bool BaselineBlock::ComputeLineSpacing() { + FCOORD direction(cos(skew_angle_), sin(skew_angle_)); + GenericVector<double> row_positions; + ComputeBaselinePositions(direction, &row_positions); + if (row_positions.size() < 2) return false; + EstimateLineSpacing(); + RefineLineSpacing(row_positions); + // Verify that the model is reasonable. + double max_baseline_error = kMaxBaselineError * line_spacing_; + int non_trivial_gaps = 0; + int fitting_gaps = 0; + for (int i = 1; i < row_positions.size(); ++i) { + double row_gap = fabs(row_positions[i - 1] - row_positions[i]); + if (row_gap > max_baseline_error) { + ++non_trivial_gaps; + if (fabs(row_gap - line_spacing_) <= max_baseline_error) + ++fitting_gaps; + } + } + if (debug_level_ > 0) { + tprintf("Spacing %g, in %d rows, %d gaps fitted out of %d non-trivial\n", + line_spacing_, row_positions.size(), fitting_gaps, + non_trivial_gaps); + } + return fitting_gaps > non_trivial_gaps * kMinFittingLinespacings; +} + +// Computes the deskewed vertical position of each baseline in the block and +// stores them in the given vector. +// This is calculated as the perpendicular distance of the middle of each +// baseline (in case it has a different skew angle) from the line passing +// through the origin parallel to the block baseline angle. +// NOTE that "distance" above is a signed quantity so we can tell which side +// of the block baseline a line sits, hence the function and argument name +// positions not distances. +void BaselineBlock::ComputeBaselinePositions(const FCOORD& direction, + GenericVector<double>* positions) { + positions->clear(); + for (int r = 0; r < rows_.size(); ++r) { + BaselineRow* row = rows_[r]; + const TBOX& row_box = row->bounding_box(); + float x_middle = (row_box.left() + row_box.right()) / 2.0f; + FCOORD row_pos(x_middle, static_cast<float>(row->StraightYAtX(x_middle))); + float offset = direction * row_pos; + positions->push_back(offset); + } +} + +// Computes an estimate of the line spacing of the block from the median +// of the spacings between adjacent overlapping textlines. +void BaselineBlock::EstimateLineSpacing() { + GenericVector<float> spacings; + for (int r = 0; r < rows_.size(); ++r) { + BaselineRow* row = rows_[r]; + // Exclude silly lines. + if (fabs(row->BaselineAngle()) > M_PI * 0.25) continue; + // Find the first row after row that overlaps it significantly. + const TBOX& row_box = row->bounding_box(); + int r2; + for (r2 = r + 1; r2 < rows_.size() && + !row_box.major_x_overlap(rows_[r2]->bounding_box()); + ++r2); + if (r2 < rows_.size()) { + BaselineRow* row2 = rows_[r2]; + // Exclude silly lines. + if (fabs(row2->BaselineAngle()) > M_PI * 0.25) continue; + float spacing = row->SpaceBetween(*row2); + spacings.push_back(spacing); + } + } + // If we have at least one value, use it, otherwise leave the previous + // value unchanged. + if (!spacings.empty()) { + line_spacing_ = spacings[spacings.choose_nth_item(spacings.size() / 2)]; + if (debug_level_ > 1) + tprintf("Estimate of linespacing = %g\n", line_spacing_); + } +} + +// Refines the line spacing of the block by fitting a regression +// line to the deskewed y-position of each baseline as a function of its +// estimated line index, allowing for a small error in the initial linespacing +// and choosing the best available model. +void BaselineBlock::RefineLineSpacing(const GenericVector<double>& positions) { + double spacings[3], offsets[3], errors[3]; + int index_range; + errors[0] = FitLineSpacingModel(positions, line_spacing_, + &spacings[0], &offsets[0], &index_range); + if (index_range > 1) { + double spacing_plus = line_spacing_ / (1.0 + 1.0 / index_range); + // Try the hypotheses that there might be index_range +/- 1 line spaces. + errors[1] = FitLineSpacingModel(positions, spacing_plus, + &spacings[1], &offsets[1], nullptr); + double spacing_minus = line_spacing_ / (1.0 - 1.0 / index_range); + errors[2] = FitLineSpacingModel(positions, spacing_minus, + &spacings[2], &offsets[2], nullptr); + for (int i = 1; i <= 2; ++i) { + if (errors[i] < errors[0]) { + spacings[0] = spacings[i]; + offsets[0] = offsets[i]; + errors[0] = errors[i]; + } + } + } + if (spacings[0] > 0.0) { + line_spacing_ = spacings[0]; + line_offset_ = offsets[0]; + model_error_ = errors[0]; + if (debug_level_ > 0) { + tprintf("Final linespacing model = %g + offset %g, error %g\n", + line_spacing_, line_offset_, model_error_); + } + } +} + +// Given an initial estimate of line spacing (m_in) and the positions of each +// baseline, computes the line spacing of the block more accurately in m_out, +// and the corresponding intercept in c_out, and the number of spacings seen +// in index_delta. Returns the error of fit to the line spacing model. +// Uses a simple linear regression, but optimized the offset using the median. +double BaselineBlock::FitLineSpacingModel( + const GenericVector<double>& positions, double m_in, + double* m_out, double* c_out, int* index_delta) { + if (m_in == 0.0f || positions.size() < 2) { + *m_out = m_in; + *c_out = 0.0; + if (index_delta != nullptr) *index_delta = 0; + return 0.0; + } + GenericVector<double> offsets; + // Get the offset (remainder) linespacing for each line and choose the median. + for (int i = 0; i < positions.size(); ++i) + offsets.push_back(fmod(positions[i], m_in)); + // Get the median offset. + double median_offset = MedianOfCircularValues(m_in, &offsets); + // Now fit a line to quantized line number and offset. + LLSQ llsq; + int min_index = INT32_MAX; + int max_index = -INT32_MAX; + for (int i = 0; i < positions.size(); ++i) { + double y_pos = positions[i]; + int row_index = IntCastRounded((y_pos - median_offset) / m_in); + UpdateRange(row_index, &min_index, &max_index); + llsq.add(row_index, y_pos); + } + // Get the refined line spacing. + *m_out = llsq.m(); + // Use the median offset rather than the mean. + offsets.truncate(0); + for (int i = 0; i < positions.size(); ++i) + offsets.push_back(fmod(positions[i], *m_out)); + // Get the median offset. + if (debug_level_ > 2) { + for (int i = 0; i < offsets.size(); ++i) + tprintf("%d: %g\n", i, offsets[i]); + } + *c_out = MedianOfCircularValues(*m_out, &offsets); + if (debug_level_ > 1) { + tprintf("Median offset = %g, compared to mean of %g.\n", + *c_out, llsq.c(*m_out)); + } + // Index_delta is the number of hypothesized line gaps present. + if (index_delta != nullptr) + *index_delta = max_index - min_index; + // Use the regression model's intercept to compute the error, as it may be + // a full line-spacing in disagreement with the median. + double rms_error = llsq.rms(*m_out, llsq.c(*m_out)); + if (debug_level_ > 1) { + tprintf("Linespacing of y=%g x + %g improved to %g x + %g, rms=%g\n", + m_in, median_offset, *m_out, *c_out, rms_error); + } + return rms_error; +} + +BaselineDetect::BaselineDetect(int debug_level, const FCOORD& page_skew, + TO_BLOCK_LIST* blocks) + : page_skew_(page_skew), debug_level_(debug_level) { + TO_BLOCK_IT it(blocks); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + TO_BLOCK* to_block = it.data(); + BLOCK* block = to_block->block; + POLY_BLOCK* pb = block->pdblk.poly_block(); + // A note about non-text blocks. + // On output, non-text blocks are supposed to contain a single empty word + // in each incoming text line. These mark out the polygonal bounds of the + // block. Ideally no baselines should be required, but currently + // make_words crashes if a baseline and xheight are not provided, so we + // include non-text blocks here, but flag them for special treatment. + bool non_text = pb != nullptr && !pb->IsText(); + blocks_.push_back(new BaselineBlock(debug_level_, non_text, to_block)); + } +} + +// Finds the initial baselines for each TO_ROW in each TO_BLOCK, gathers +// block-wise and page-wise data to smooth small blocks/rows, and applies +// smoothing based on block/page-level skew and block-level linespacing. +void BaselineDetect::ComputeStraightBaselines(bool use_box_bottoms) { + GenericVector<double> block_skew_angles; + for (int i = 0; i < blocks_.size(); ++i) { + BaselineBlock* bl_block = blocks_[i]; + if (debug_level_ > 0) + tprintf("Fitting initial baselines...\n"); + if (bl_block->FitBaselinesAndFindSkew(use_box_bottoms)) { + block_skew_angles.push_back(bl_block->skew_angle()); + } + } + // Compute a page-wide default skew for blocks with too little information. + double default_block_skew = page_skew_.angle(); + if (!block_skew_angles.empty()) { + default_block_skew = MedianOfCircularValues(M_PI, &block_skew_angles); + } + if (debug_level_ > 0) { + tprintf("Page skew angle = %g\n", default_block_skew); + } + // Set bad lines in each block to the default block skew and then force fit + // a linespacing model where it makes sense to do so. + for (int i = 0; i < blocks_.size(); ++i) { + BaselineBlock* bl_block = blocks_[i]; + bl_block->ParallelizeBaselines(default_block_skew); + bl_block->SetupBlockParameters(); // This replaced compute_row_stats. + } +} + +// Computes the baseline splines for each TO_ROW in each TO_BLOCK and +// other associated side-effects, including pre-associating blobs, computing +// x-heights and displaying debug information. +// NOTE that ComputeStraightBaselines must have been called first as this +// sets up data in the TO_ROWs upon which this function depends. +void BaselineDetect::ComputeBaselineSplinesAndXheights(const ICOORD& page_tr, + bool enable_splines, + bool remove_noise, + bool show_final_rows, + Textord* textord) { + for (int i = 0; i < blocks_.size(); ++i) { + BaselineBlock* bl_block = blocks_[i]; + if (enable_splines) + bl_block->PrepareForSplineFitting(page_tr, remove_noise); + bl_block->FitBaselineSplines(enable_splines, show_final_rows, textord); +#ifndef GRAPHICS_DISABLED + if (show_final_rows) { + bl_block->DrawFinalRows(page_tr); + } +#endif + } +} + +} // namespace tesseract. diff --git a/tesseract/src/textord/baselinedetect.h b/tesseract/src/textord/baselinedetect.h new file mode 100644 index 00000000..579558ed --- /dev/null +++ b/tesseract/src/textord/baselinedetect.h @@ -0,0 +1,276 @@ +/////////////////////////////////////////////////////////////////////// +// File: baselinedetect.h +// Description: Initial Baseline Determination. +// Copyright 2012 Google Inc. All Rights Reserved. +// Author: rays@google.com (Ray Smith) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_TEXTORD_BASELINEDETECT_H_ +#define TESSERACT_TEXTORD_BASELINEDETECT_H_ + +#include "detlinefit.h" +#include "points.h" +#include "rect.h" + +#include "genericvector.h" + +struct Pix; + +namespace tesseract { + +class Textord; +class BLOBNBOX_LIST; +class TO_BLOCK; +class TO_BLOCK_LIST; +class TO_ROW; + +// Class to compute and hold baseline data for a TO_ROW. +class BaselineRow { + public: + BaselineRow(double line_size, TO_ROW* to_row); + + const TBOX& bounding_box() const { + return bounding_box_; + } + // Sets the TO_ROW with the output straight line. + void SetupOldLineParameters(TO_ROW* row) const; + + // Outputs diagnostic information. + void Print() const; + + // Returns the skew angle (in radians) of the current baseline in [-pi,pi]. + double BaselineAngle() const; + // Computes and returns the linespacing at the middle of the overlap + // between this and other. + double SpaceBetween(const BaselineRow& other) const; + // Computes and returns the displacement of the center of the line + // perpendicular to the given direction. + double PerpDisp(const FCOORD& direction) const; + // Computes the y coordinate at the given x using the straight baseline + // defined by baseline1_ and baseline2_. + double StraightYAtX(double x) const; + + // Fits a straight baseline to the points. Returns true if it had enough + // points to be reasonably sure of the fitted baseline. + // If use_box_bottoms is false, baselines positions are formed by + // considering the outlines of the blobs. + bool FitBaseline(bool use_box_bottoms); + // Modifies an existing result of FitBaseline to be parallel to the given + // vector if that produces a better result. + void AdjustBaselineToParallel(int debug, const FCOORD& direction); + // Modifies the baseline to snap to the textline grid if the existing + // result is not good enough. + double AdjustBaselineToGrid(int debug, const FCOORD& direction, + double line_spacing, double line_offset); + + private: + // Sets up displacement_modes_ with the top few modes of the perpendicular + // distance of each blob from the given direction vector, after rounding. + void SetupBlobDisplacements(const FCOORD& direction); + + // Fits a line in the given direction to blobs that are close to the given + // target_offset perpendicular displacement from the direction. The fit + // error is allowed to be cheat_allowance worse than the existing fit, and + // will still be used. + // If cheat_allowance > 0, the new fit will be good and replace the current + // fit if it has better fit (with cheat) OR its error is below + // max_baseline_error_ and the old fit is marked bad. + // Otherwise the new fit will only replace the old if it is really better, + // or the old fit is marked bad and the new fit has sufficient points, as + // well as being within the max_baseline_error_. + void FitConstrainedIfBetter(int debug, const FCOORD& direction, + double cheat_allowance, + double target_offset); + // Returns the perpendicular distance of the point from the straight + // baseline. + float PerpDistanceFromBaseline(const FCOORD& pt) const; + // Computes the bounding box of the row. + void ComputeBoundingBox(); + + // The blobs of the row to which this BaselineRow adds extra information + // during baseline fitting. Note that blobs_ could easily come from either + // a TO_ROW or a ColPartition. + BLOBNBOX_LIST* blobs_; + // Bounding box of all the blobs. + TBOX bounding_box_; + // Fitter used to fit lines to the blobs. + DetLineFit fitter_; + // 2 points on the straight baseline. + FCOORD baseline_pt1_; + FCOORD baseline_pt2_; + // Set of modes of displacements. They indicate preferable baseline positions. + GenericVector<double> displacement_modes_; + // Quantization factor used for displacement_modes_. + double disp_quant_factor_; + // Half the acceptance range of blob displacements for computing the + // error during a constrained fit. + double fit_halfrange_; + // Max baseline error before a line is regarded as fitting badly. + double max_baseline_error_; + // The error of fit of the baseline. + double baseline_error_; + // True if this row seems to have a good baseline. + bool good_baseline_; +}; + +// Class to compute and hold baseline data for a TO_BLOCK. +class BaselineBlock { + public: + BaselineBlock(int debug_level, bool non_text, TO_BLOCK* block); + + TO_BLOCK* block() const { + return block_; + } + double skew_angle() const { + return skew_angle_; + } + + // Computes and returns the absolute error of the given perp_disp from the + // given linespacing model. + static double SpacingModelError(double perp_disp, double line_spacing, + double line_offset); + + // Fits straight line baselines and computes the skew angle from the + // median angle. Returns true if a good angle is found. + // If use_box_bottoms is false, baseline positions are formed by + // considering the outlines of the blobs. + bool FitBaselinesAndFindSkew(bool use_box_bottoms); + + // Refits the baseline to a constrained angle, using the stored block + // skew if good enough, otherwise the supplied default skew. + void ParallelizeBaselines(double default_block_skew); + + // Sets the parameters in TO_BLOCK that are needed by subsequent processes. + void SetupBlockParameters() const; + + // Processing that is required before fitting baseline splines, but requires + // linear baselines in order to be successful: + // Removes noise if required + // Separates out underlines + // Pre-associates blob fragments. + // TODO(rays/joeliu) This entire section of code is inherited from the past + // and could be improved/eliminated. + // page_tr is used to size a debug window. + void PrepareForSplineFitting(ICOORD page_tr, bool remove_noise); + + // Fits splines to the textlines, or creates fake QSPLINES from the straight + // baselines that are already on the TO_ROWs. + // As a side-effect, computes the xheights of the rows and the block. + // Although x-height estimation is conceptually separate, it is part of + // detecting perspective distortion and therefore baseline fitting. + void FitBaselineSplines(bool enable_splines, bool show_final_rows, + Textord* textord); + + // Draws the (straight) baselines and final blobs colored according to + // what was discarded as noise and what is associated with each row. + void DrawFinalRows(const ICOORD& page_tr); + + // Render the generated spline baselines for this block on pix_in. + void DrawPixSpline(Pix* pix_in); + + private: + // Top-level line-spacing calculation. Computes an estimate of the line- + // spacing, using the current baselines in the TO_ROWS of the block, and + // then refines it by fitting a regression line to the baseline positions + // as a function of their integer index. + // Returns true if it seems that the model is a reasonable fit to the + // observations. + bool ComputeLineSpacing(); + + // Computes the deskewed vertical position of each baseline in the block and + // stores them in the given vector. + void ComputeBaselinePositions(const FCOORD& direction, + GenericVector<double>* positions); + + // Computes an estimate of the line spacing of the block from the median + // of the spacings between adjacent overlapping textlines. + void EstimateLineSpacing(); + + // Refines the line spacing of the block by fitting a regression + // line to the deskewed y-position of each baseline as a function of its + // estimated line index, allowing for a small error in the initial linespacing + // and choosing the best available model. + void RefineLineSpacing(const GenericVector<double>& positions); + + // Given an initial estimate of line spacing (m_in) and the positions of each + // baseline, computes the line spacing of the block more accurately in m_out, + // and the corresponding intercept in c_out, and the number of spacings seen + // in index_delta. Returns the error of fit to the line spacing model. + double FitLineSpacingModel(const GenericVector<double>& positions, + double m_in, double* m_out, double* c_out, + int* index_delta); + + + // The block to which this class adds extra information used during baseline + // calculation. + TO_BLOCK* block_; + // The rows in the block that we will be working with. + PointerVector<BaselineRow> rows_; + // Amount of debugging output to provide. + int debug_level_; + // True if the block is non-text (graphic). + bool non_text_block_; + // True if the block has at least one good enough baseline to compute the + // skew angle and therefore skew_angle_ is valid. + bool good_skew_angle_; + // Angle of skew in radians using the conventional anticlockwise from x-axis. + double skew_angle_; + // Current best estimate line spacing in pixels perpendicular to skew_angle_. + double line_spacing_; + // Offset for baseline positions, in pixels. Each baseline is at + // line_spacing_ * n + line_offset_ for integer n, which represents + // [textline] line number in a line numbering system that has line 0 on or + // at least near the x-axis. Not equal to the actual line number of a line + // within a block as most blocks are not near the x-axis. + double line_offset_; + // The error of the line spacing model. + double model_error_; +}; + +class BaselineDetect { + public: + BaselineDetect(int debug_level, const FCOORD& page_skew, + TO_BLOCK_LIST* blocks); + + ~BaselineDetect() = default; + + // Finds the initial baselines for each TO_ROW in each TO_BLOCK, gathers + // block-wise and page-wise data to smooth small blocks/rows, and applies + // smoothing based on block/page-level skew and block-level linespacing. + void ComputeStraightBaselines(bool use_box_bottoms); + + // Computes the baseline splines for each TO_ROW in each TO_BLOCK and + // other associated side-effects, including pre-associating blobs, computing + // x-heights and displaying debug information. + // NOTE that ComputeStraightBaselines must have been called first as this + // sets up data in the TO_ROWs upon which this function depends. + void ComputeBaselineSplinesAndXheights(const ICOORD& page_tr, + bool enable_splines, + bool remove_noise, + bool show_final_rows, + Textord* textord); + + private: + // Average (median) skew of the blocks on the page among those that have + // a good angle of their own. + FCOORD page_skew_; + // Amount of debug output to produce. + int debug_level_; + // The blocks that we are working with. + PointerVector<BaselineBlock> blocks_; +}; + +} // namespace tesseract + +#endif // TESSERACT_TEXTORD_BASELINEDETECT_H_ diff --git a/tesseract/src/textord/bbgrid.cpp b/tesseract/src/textord/bbgrid.cpp new file mode 100644 index 00000000..6e3e3346 --- /dev/null +++ b/tesseract/src/textord/bbgrid.cpp @@ -0,0 +1,285 @@ +/////////////////////////////////////////////////////////////////////// +// File: bbgrid.cpp +// Description: Class to hold BLOBNBOXs in a grid for fast access +// to neighbours. +// Author: Ray Smith +// +// (C) Copyright 2007, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#include "bbgrid.h" +#include "helpers.h" +#include "ocrblock.h" + +namespace tesseract { + +/////////////////////////////////////////////////////////////////////// +// BBGrid IMPLEMENTATION. +/////////////////////////////////////////////////////////////////////// +GridBase::GridBase(int gridsize, const ICOORD& bleft, const ICOORD& tright) { + Init(gridsize, bleft, tright); +} + +// Destructor. +// It is defined here, so the compiler can create a single vtable +// instead of weak vtables in every compilation unit. +GridBase::~GridBase() = default; + +// (Re)Initialize the grid. The gridsize is the size in pixels of each cell, +// and bleft, tright are the bounding box of everything to go in it. +void GridBase::Init(int gridsize, const ICOORD& bleft, const ICOORD& tright) { + gridsize_ = gridsize; + bleft_ = bleft; + tright_ = tright; + if (gridsize_ == 0) + gridsize_ = 1; + gridwidth_ = (tright.x() - bleft.x() + gridsize_ - 1) / gridsize_; + gridheight_ = (tright.y() - bleft.y() + gridsize_ - 1) / gridsize_; + gridbuckets_ = gridwidth_ * gridheight_; +} + +// Compute the given grid coordinates from image coords. +void GridBase::GridCoords(int x, int y, int* grid_x, int* grid_y) const { + *grid_x = (x - bleft_.x()) / gridsize_; + *grid_y = (y - bleft_.y()) / gridsize_; + ClipGridCoords(grid_x, grid_y); +} + +// Clip the given grid coordinates to fit within the grid. +void GridBase::ClipGridCoords(int* x, int* y) const { + *x = ClipToRange(*x, 0, gridwidth_ - 1); + *y = ClipToRange(*y, 0, gridheight_ - 1); +} + +IntGrid::IntGrid() { + grid_ = nullptr; +} + +IntGrid::IntGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright) + : grid_(nullptr) { + Init(gridsize, bleft, tright); +} + +IntGrid::~IntGrid() { + delete [] grid_; +} + +// (Re)Initialize the grid. The gridsize is the size in pixels of each cell, +// and bleft, tright are the bounding box of everything to go in it. +void IntGrid::Init(int gridsize, const ICOORD& bleft, const ICOORD& tright) { + GridBase::Init(gridsize, bleft, tright); + delete [] grid_; + grid_ = new int[gridbuckets_]; + Clear(); +} + +// Clear all the ints in the grid to zero. +void IntGrid::Clear() { + for (int i = 0; i < gridbuckets_; ++i) { + grid_[i] = 0; + } +} + +// Rotate the grid by rotation, keeping cell contents. +// rotation must be a multiple of 90 degrees. +// NOTE: due to partial cells, cell coverage in the rotated grid will be +// inexact. This is why there is no Rotate for the generic BBGrid. +// TODO(rays) investigate fixing this inaccuracy by moving the origin after +// rotation. +void IntGrid::Rotate(const FCOORD& rotation) { + ASSERT_HOST(rotation.x() == 0.0f || rotation.y() == 0.0f); + ICOORD old_bleft(bleft()); + //ICOORD old_tright(tright()); + int old_width = gridwidth(); + int old_height = gridheight(); + TBOX box(bleft(), tright()); + box.rotate(rotation); + int* old_grid = grid_; + grid_ = nullptr; + Init(gridsize(), box.botleft(), box.topright()); + // Iterate over the old grid, copying data to the rotated position in the new. + int oldi = 0; + FCOORD x_step(rotation); + x_step *= gridsize(); + for (int oldy = 0; oldy < old_height; ++oldy) { + FCOORD line_pos(old_bleft.x(), old_bleft.y() + gridsize() * oldy); + line_pos.rotate(rotation); + for (int oldx = 0; oldx < old_width; ++oldx, line_pos += x_step, ++oldi) { + int grid_x, grid_y; + GridCoords(static_cast<int>(line_pos.x() + 0.5), + static_cast<int>(line_pos.y() + 0.5), + &grid_x, &grid_y); + grid_[grid_y * gridwidth() + grid_x] = old_grid[oldi]; + } + } + delete [] old_grid; +} + +// Returns a new IntGrid containing values equal to the sum of all the +// neighbouring cells. The returned grid must be deleted after use. +// For ease of implementation, edge cells are double counted, to make them +// have the same range as the non-edge cells. +IntGrid* IntGrid::NeighbourhoodSum() const { + auto* sumgrid = new IntGrid(gridsize(), bleft(), tright()); + for (int y = 0; y < gridheight(); ++y) { + for (int x = 0; x < gridwidth(); ++x) { + int cell_count = 0; + for (int yoffset = -1; yoffset <= 1; ++yoffset) { + for (int xoffset = -1; xoffset <= 1; ++xoffset) { + int grid_x = x + xoffset; + int grid_y = y + yoffset; + ClipGridCoords(&grid_x, &grid_y); + cell_count += GridCellValue(grid_x, grid_y); + } + } + if (GridCellValue(x, y) > 1) + sumgrid->SetGridCell(x, y, cell_count); + } + } + return sumgrid; +} + +// Returns true if more than half the area of the rect is covered by grid +// cells that are over the threshold. +bool IntGrid::RectMostlyOverThreshold(const TBOX& rect, int threshold) const { + int min_x, min_y, max_x, max_y; + GridCoords(rect.left(), rect.bottom(), &min_x, &min_y); + GridCoords(rect.right(), rect.top(), &max_x, &max_y); + int total_area = 0; + for (int y = min_y; y <= max_y; ++y) { + for (int x = min_x; x <= max_x; ++x) { + int value = GridCellValue(x, y); + if (value > threshold) { + TBOX cell_box(x * gridsize_, y * gridsize_, + (x + 1) * gridsize_, (y + 1) * gridsize_); + cell_box &= rect; // This is in-place box intersection. + total_area += cell_box.area(); + } + } + } + return total_area * 2 > rect.area(); +} + +// Returns true if any cell value in the given rectangle is zero. +bool IntGrid::AnyZeroInRect(const TBOX& rect) const { + int min_x, min_y, max_x, max_y; + GridCoords(rect.left(), rect.bottom(), &min_x, &min_y); + GridCoords(rect.right(), rect.top(), &max_x, &max_y); + for (int y = min_y; y <= max_y; ++y) { + for (int x = min_x; x <= max_x; ++x) { + if (GridCellValue(x, y) == 0) + return true; + } + } + return false; +} + +// Returns a full-resolution binary pix in which each cell over the given +// threshold is filled as a black square. pixDestroy after use. +// Edge cells, which have a zero 4-neighbour, are not marked. +Pix* IntGrid::ThresholdToPix(int threshold) const { + Pix* pix = pixCreate(tright().x() - bleft().x(), + tright().y() - bleft().y(), 1); + int cellsize = gridsize(); + for (int y = 0; y < gridheight(); ++y) { + for (int x = 0; x < gridwidth(); ++x) { + if (GridCellValue(x, y) > threshold && + GridCellValue(x - 1, y) > 0 && GridCellValue(x + 1, y) > 0 && + GridCellValue(x, y - 1) > 0 && GridCellValue(x, y + 1) > 0) { + pixRasterop(pix, x * cellsize, tright().y() - ((y + 1) * cellsize), + cellsize, cellsize, PIX_SET, nullptr, 0, 0); + } + } + } + return pix; +} + +// Make a Pix of the correct scaled size for the TraceOutline functions. +static Pix* GridReducedPix(const TBOX& box, int gridsize, + ICOORD bleft, int* left, int* bottom) { + // Compute grid bounds of the outline and pad all round by 1. + int grid_left = (box.left() - bleft.x()) / gridsize - 1; + int grid_bottom = (box.bottom() - bleft.y()) / gridsize - 1; + int grid_right = (box.right() - bleft.x()) / gridsize + 1; + int grid_top = (box.top() - bleft.y()) / gridsize + 1; + *left = grid_left; + *bottom = grid_bottom; + return pixCreate(grid_right - grid_left + 1, + grid_top - grid_bottom + 1, + 1); +} + +// Helper function to return a scaled Pix with one pixel per grid cell, +// set (black) where the given outline enters the corresponding grid cell, +// and clear where the outline does not touch the grid cell. +// Also returns the grid coords of the bottom-left of the Pix, in *left +// and *bottom, which corresponds to (0, 0) on the Pix. +// Note that the Pix is used upside-down, with (0, 0) being the bottom-left. +Pix* TraceOutlineOnReducedPix(C_OUTLINE* outline, int gridsize, + ICOORD bleft, int* left, int* bottom) { + const TBOX& box = outline->bounding_box(); + Pix* pix = GridReducedPix(box, gridsize, bleft, left, bottom); + int wpl = pixGetWpl(pix); + l_uint32* data = pixGetData(pix); + int length = outline->pathlength(); + ICOORD pos = outline->start_pos(); + for (int i = 0; i < length; ++i) { + int grid_x = (pos.x() - bleft.x()) / gridsize - *left; + int grid_y = (pos.y() - bleft.y()) / gridsize - *bottom; + SET_DATA_BIT(data + grid_y * wpl, grid_x); + pos += outline->step(i); + } + return pix; +} +#if 0 // Example code shows how to use TraceOutlineOnReducedPix. + C_OUTLINE_IT ol_it(blob->cblob()->out_list()); + int grid_left, grid_bottom; + Pix* pix = TraceOutlineOnReducedPix(ol_it.data(), gridsize_, bleft_, + &grid_left, &grid_bottom); + grid->InsertPixPtBBox(grid_left, grid_bottom, pix, blob); + pixDestroy(&pix); +#endif + +// As TraceOutlineOnReducedPix above, but on a BLOCK instead of a C_OUTLINE. +Pix* TraceBlockOnReducedPix(BLOCK* block, int gridsize, + ICOORD bleft, int* left, int* bottom) { + const TBOX& box = block->pdblk.bounding_box(); + Pix* pix = GridReducedPix(box, gridsize, bleft, left, bottom); + int wpl = pixGetWpl(pix); + l_uint32* data = pixGetData(pix); + ICOORDELT_IT it(block->pdblk.poly_block()->points()); + for (it.mark_cycle_pt(); !it.cycled_list();) { + ICOORD pos = *it.data(); + it.forward(); + ICOORD next_pos = *it.data(); + ICOORD line_vector = next_pos - pos; + int major, minor; + ICOORD major_step, minor_step; + line_vector.setup_render(&major_step, &minor_step, &major, &minor); + int accumulator = major / 2; + while (pos != next_pos) { + int grid_x = (pos.x() - bleft.x()) / gridsize - *left; + int grid_y = (pos.y() - bleft.y()) / gridsize - *bottom; + SET_DATA_BIT(data + grid_y * wpl, grid_x); + pos += major_step; + accumulator += minor; + if (accumulator >= major) { + accumulator -= major; + pos += minor_step; + } + } + } + return pix; +} + +} // namespace tesseract. diff --git a/tesseract/src/textord/bbgrid.h b/tesseract/src/textord/bbgrid.h new file mode 100644 index 00000000..5d75aa38 --- /dev/null +++ b/tesseract/src/textord/bbgrid.h @@ -0,0 +1,957 @@ +/////////////////////////////////////////////////////////////////////// +// File: bbgrid.h +// Description: Class to hold BLOBNBOXs in a grid for fast access +// to neighbours. +// Author: Ray Smith +// +// (C) Copyright 2007, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_TEXTORD_BBGRID_H_ +#define TESSERACT_TEXTORD_BBGRID_H_ + +#include <unordered_set> + +#include "clst.h" +#include "coutln.h" +#include "rect.h" +#include "scrollview.h" + +#include "allheaders.h" + +class BLOCK; + +namespace tesseract { + +// Helper function to return a scaled Pix with one pixel per grid cell, +// set (black) where the given outline enters the corresponding grid cell, +// and clear where the outline does not touch the grid cell. +// Also returns the grid coords of the bottom-left of the Pix, in *left +// and *bottom, which corresponds to (0, 0) on the Pix. +// Note that the Pix is used upside-down, with (0, 0) being the bottom-left. +Pix* TraceOutlineOnReducedPix(C_OUTLINE* outline, int gridsize, + ICOORD bleft, int* left, int* bottom); +// As TraceOutlineOnReducedPix above, but on a BLOCK instead of a C_OUTLINE. +Pix* TraceBlockOnReducedPix(BLOCK* block, int gridsize, + ICOORD bleft, int* left, int* bottom); + +template<class BBC, class BBC_CLIST, class BBC_C_IT> class GridSearch; + +// The GridBase class is the base class for BBGrid and IntGrid. +// It holds the geometry and scale of the grid. +class TESS_API GridBase { + public: + GridBase() = default; + GridBase(int gridsize, const ICOORD& bleft, const ICOORD& tright); + virtual ~GridBase(); + + // (Re)Initialize the grid. The gridsize is the size in pixels of each cell, + // and bleft, tright are the bounding box of everything to go in it. + void Init(int gridsize, const ICOORD& bleft, const ICOORD& tright); + + // Simple accessors. + int gridsize() const { + return gridsize_; + } + int gridwidth() const { + return gridwidth_; + } + int gridheight() const { + return gridheight_; + } + const ICOORD& bleft() const { + return bleft_; + } + const ICOORD& tright() const { + return tright_; + } + // Compute the given grid coordinates from image coords. + void GridCoords(int x, int y, int* grid_x, int* grid_y) const; + + // Clip the given grid coordinates to fit within the grid. + void ClipGridCoords(int* x, int* y) const; + + protected: + // TODO(rays) Make these private and migrate to the accessors in subclasses. + int gridsize_; // Pixel size of each grid cell. + int gridwidth_; // Size of the grid in cells. + int gridheight_; + int gridbuckets_; // Total cells in grid. + ICOORD bleft_; // Pixel coords of bottom-left of grid. + ICOORD tright_; // Pixel coords of top-right of grid. + + private: +}; + +// The IntGrid maintains a single int for each cell in a grid. +class IntGrid : public GridBase { + public: + IntGrid(); + IntGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright); + ~IntGrid() override; + + // (Re)Initialize the grid. The gridsize is the size in pixels of each cell, + // and bleft, tright are the bounding box of everything to go in it. + void Init(int gridsize, const ICOORD& bleft, const ICOORD& tright); + + // Clear all the ints in the grid to zero. + void Clear(); + + // Rotate the grid by rotation, keeping cell contents. + // rotation must be a multiple of 90 degrees. + // NOTE: due to partial cells, cell coverage in the rotated grid will be + // inexact. This is why there is no Rotate for the generic BBGrid. + void Rotate(const FCOORD& rotation); + + // Returns a new IntGrid containing values equal to the sum of all the + // neighbouring cells. The returned grid must be deleted after use. + IntGrid* NeighbourhoodSum() const; + + int GridCellValue(int grid_x, int grid_y) const { + ClipGridCoords(&grid_x, &grid_y); + return grid_[grid_y * gridwidth_ + grid_x]; + } + void SetGridCell(int grid_x, int grid_y, int value) { + ASSERT_HOST(grid_x >= 0 && grid_x < gridwidth()); + ASSERT_HOST(grid_y >= 0 && grid_y < gridheight()); + grid_[grid_y * gridwidth_ + grid_x] = value; + } + // Returns true if more than half the area of the rect is covered by grid + // cells that are over the threshold. + bool RectMostlyOverThreshold(const TBOX& rect, int threshold) const; + + // Returns true if any cell value in the given rectangle is zero. + bool AnyZeroInRect(const TBOX& rect) const; + + // Returns a full-resolution binary pix in which each cell over the given + // threshold is filled as a black square. pixDestroy after use. + Pix* ThresholdToPix(int threshold) const; + + private: + int* grid_; // 2-d array of ints. +}; + +// The BBGrid class holds C_LISTs of template classes BBC (bounding box class) +// in a grid for fast neighbour access. +// The BBC class must have a member const TBOX& bounding_box() const. +// The BBC class must have been CLISTIZEH'ed elsewhere to make the +// list class BBC_CLIST and the iterator BBC_C_IT. +// Use of C_LISTs enables BBCs to exist in multiple cells simultaneously. +// As a consequence, ownership of BBCs is assumed to be elsewhere and +// persistent for at least the life of the BBGrid, or at least until Clear is +// called which removes all references to inserted objects without actually +// deleting them. +// Most uses derive a class from a specific instantiation of BBGrid, +// thereby making most of the ugly template notation go away. +// The friend class GridSearch, with the same template arguments, is +// used to search a grid efficiently in one of several search patterns. +template<class BBC, class BBC_CLIST, class BBC_C_IT> class BBGrid + : public GridBase { + friend class GridSearch<BBC, BBC_CLIST, BBC_C_IT>; + public: + BBGrid(); + BBGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright); + ~BBGrid() override; + + // (Re)Initialize the grid. The gridsize is the size in pixels of each cell, + // and bleft, tright are the bounding box of everything to go in it. + void Init(int gridsize, const ICOORD& bleft, const ICOORD& tright); + + // Empty all the lists but leave the grid itself intact. + void Clear(); + // Deallocate the data in the lists but otherwise leave the lists and the grid + // intact. + void ClearGridData(void (*free_method)(BBC*)); + + // Insert a bbox into the appropriate place in the grid. + // If h_spread, then all cells covered horizontally by the box are + // used, otherwise, just the bottom-left. Similarly for v_spread. + // WARNING: InsertBBox may invalidate an active GridSearch. Call + // RepositionIterator() on any GridSearches that are active on this grid. + void InsertBBox(bool h_spread, bool v_spread, BBC* bbox); + + // Using a pix from TraceOutlineOnReducedPix or TraceBlockOnReducedPix, in + // which each pixel corresponds to a grid cell, insert a bbox into every + // place in the grid where the corresponding pixel is 1. The Pix is handled + // upside-down to match the Tesseract coordinate system. (As created by + // TraceOutlineOnReducedPix or TraceBlockOnReducedPix.) + // (0, 0) in the pix corresponds to (left, bottom) in the + // grid (in grid coords), and the pix works up the grid from there. + // WARNING: InsertPixPtBBox may invalidate an active GridSearch. Call + // RepositionIterator() on any GridSearches that are active on this grid. + void InsertPixPtBBox(int left, int bottom, Pix* pix, BBC* bbox); + + // Remove the bbox from the grid. + // WARNING: Any GridSearch operating on this grid could be invalidated! + // If a GridSearch is operating, call GridSearch::RemoveBBox() instead. + void RemoveBBox(BBC* bbox); + + // Returns true if the given rectangle has no overlapping elements. + bool RectangleEmpty(const TBOX& rect); + + // Returns an IntGrid showing the number of elements in each cell. + // Returned IntGrid must be deleted after use. + IntGrid* CountCellElements(); + + // Make a window of an appropriate size to display things in the grid. + ScrollView* MakeWindow(int x, int y, const char* window_name); + + // Display the bounding boxes of the BLOBNBOXes in this grid. + // Use of this function requires an additional member of the BBC class: + // ScrollView::Color BBC::BoxColor() const. + void DisplayBoxes(ScrollView* window); + + // ASSERT_HOST that every cell contains no more than one copy of each entry. + void AssertNoDuplicates(); + + // Handle a click event in a display window. + virtual void HandleClick(int x, int y); + + protected: + BBC_CLIST* grid_; // 2-d array of CLISTS of BBC elements. + + private: +}; + +// Hash functor for generic pointers. +template<typename T> struct PtrHash { + size_t operator()(const T* ptr) const { + return reinterpret_cast<uintptr_t>(ptr) / sizeof(T); + } +}; + + +// The GridSearch class enables neighbourhood searching on a BBGrid. +template<class BBC, class BBC_CLIST, class BBC_C_IT> class GridSearch { + public: + GridSearch(BBGrid<BBC, BBC_CLIST, BBC_C_IT>* grid) + : grid_(grid) { + } + + // Get the grid x, y coords of the most recently returned BBC. + int GridX() const { + return x_; + } + int GridY() const { + return y_; + } + + // Sets the search mode to return a box only once. + // Efficiency warning: Implementation currently uses a squared-order + // search in the number of returned elements. Use only where a small + // number of elements are spread over a wide area, eg ColPartitions. + void SetUniqueMode(bool mode) { + unique_mode_ = mode; + } + // TODO(rays) Replace calls to ReturnedSeedElement with SetUniqueMode. + // It only works if the search includes the bottom-left corner. + // Apart from full search, all other searches return a box several + // times if the box is inserted with h_spread or v_spread. + // This method will return true for only one occurrence of each box + // that was inserted with both h_spread and v_spread as true. + // It will usually return false for boxes that were not inserted with + // both h_spread=true and v_spread=true + bool ReturnedSeedElement() const { + TBOX box = previous_return_->bounding_box(); + int x_center = (box.left()+box.right())/2; + int y_center = (box.top()+box.bottom())/2; + int grid_x, grid_y; + grid_->GridCoords(x_center, y_center, &grid_x, &grid_y); + return (x_ == grid_x) && (y_ == grid_y); + } + + // Various searching iterations... Note that these iterations + // all share data members, so you can't run more than one iteration + // in parallel in a single GridSearch instance, but multiple instances + // can search the same BBGrid in parallel. + // Note that all the searches can return blobs that may not exactly + // match the search conditions, since they return everything in the + // covered grid cells. It is up to the caller to check for + // appropriateness. + // TODO(rays) NextRectSearch only returns valid elements. Make the other + // searches test before return also and remove the tests from code + // that uses GridSearch. + + // Start a new full search. Will iterate all stored blobs, from the top. + // If the blobs have been inserted using InsertBBox, (not InsertPixPtBBox) + // then the full search guarantees to return each blob in the grid once. + // Other searches may return a blob more than once if they have been + // inserted using h_spread or v_spread. + void StartFullSearch(); + // Return the next bbox in the search or nullptr if done. + BBC* NextFullSearch(); + + // Start a new radius search. Will search in a spiral up to a + // given maximum radius in grid cells from the given center in pixels. + void StartRadSearch(int x, int y, int max_radius); + // Return the next bbox in the radius search or nullptr if the + // maximum radius has been reached. + BBC* NextRadSearch(); + + // Start a new left or right-looking search. Will search to the side + // for a box that vertically overlaps the given vertical line segment. + // CAVEAT: This search returns all blobs from the cells to the side + // of the start, and somewhat below, since there is no guarantee + // that there may not be a taller object in a lower cell. The + // blobs returned will include all those that vertically overlap and + // are no more than twice as high, but may also include some that do + // not overlap and some that are more than twice as high. + void StartSideSearch(int x, int ymin, int ymax); + // Return the next bbox in the side search or nullptr if the + // edge has been reached. Searches left to right or right to left + // according to the flag. + BBC* NextSideSearch(bool right_to_left); + + // Start a vertical-looking search. Will search up or down + // for a box that horizontally overlaps the given line segment. + void StartVerticalSearch(int xmin, int xmax, int y); + // Return the next bbox in the vertical search or nullptr if the + // edge has been reached. Searches top to bottom or bottom to top + // according to the flag. + BBC* NextVerticalSearch(bool top_to_bottom); + + // Start a rectangular search. Will search for a box that overlaps the + // given rectangle. + void StartRectSearch(const TBOX& rect); + // Return the next bbox in the rectangular search or nullptr if complete. + BBC* NextRectSearch(); + + // Remove the last returned BBC. Will not invalidate this. May invalidate + // any other concurrent GridSearch on the same grid. If any others are + // in use, call RepositionIterator on those, to continue without harm. + void RemoveBBox(); + void RepositionIterator(); + + private: + // Factored out helper to start a search. + void CommonStart(int x, int y); + // Factored out helper to complete a next search. + BBC* CommonNext(); + // Factored out final return when search is exhausted. + BBC* CommonEnd(); + // Factored out function to set the iterator to the current x_, y_ + // grid coords and mark the cycle pt. + void SetIterator(); + + private: + // The grid we are searching. + BBGrid<BBC, BBC_CLIST, BBC_C_IT>* grid_ = nullptr; + // For executing a search. The different search algorithms use these in + // different ways, but most use x_origin_ and y_origin_ as the start position. + int x_origin_ = 0; + int y_origin_ = 0; + int max_radius_ = 0; + int radius_ = 0; + int rad_index_ = 0; + int rad_dir_ = 0; + TBOX rect_; + int x_ = 0; // The current location in grid coords, of the current search. + int y_ = 0; + bool unique_mode_ = false; + BBC* previous_return_ = nullptr; // Previous return from Next*. + BBC* next_return_ = nullptr; // Current value of it_.data() used for repositioning. + // An iterator over the list at (x_, y_) in the grid_. + BBC_C_IT it_; + // Set of unique returned elements used when unique_mode_ is true. + std::unordered_set<BBC*, PtrHash<BBC> > returns_; +}; + +// Sort function to sort a BBC by bounding_box().left(). +template<class BBC> +int SortByBoxLeft(const void* void1, const void* void2) { + // The void*s are actually doubly indirected, so get rid of one level. + const BBC* p1 = *static_cast<const BBC* const*>(void1); + const BBC* p2 = *static_cast<const BBC* const*>(void2); + int result = p1->bounding_box().left() - p2->bounding_box().left(); + if (result != 0) + return result; + result = p1->bounding_box().right() - p2->bounding_box().right(); + if (result != 0) + return result; + result = p1->bounding_box().bottom() - p2->bounding_box().bottom(); + if (result != 0) + return result; + return p1->bounding_box().top() - p2->bounding_box().top(); +} + +// Sort function to sort a BBC by bounding_box().right() in right-to-left order. +template<class BBC> +int SortRightToLeft(const void* void1, const void* void2) { + // The void*s are actually doubly indirected, so get rid of one level. + const BBC* p1 = *static_cast<const BBC* const*>(void1); + const BBC* p2 = *static_cast<const BBC* const*>(void2); + int result = p2->bounding_box().right() - p1->bounding_box().right(); + if (result != 0) + return result; + result = p2->bounding_box().left() - p1->bounding_box().left(); + if (result != 0) + return result; + result = p1->bounding_box().bottom() - p2->bounding_box().bottom(); + if (result != 0) + return result; + return p1->bounding_box().top() - p2->bounding_box().top(); +} + +// Sort function to sort a BBC by bounding_box().bottom(). +template<class BBC> +int SortByBoxBottom(const void* void1, const void* void2) { + // The void*s are actually doubly indirected, so get rid of one level. + const BBC* p1 = *static_cast<const BBC* const*>(void1); + const BBC* p2 = *static_cast<const BBC* const*>(void2); + int result = p1->bounding_box().bottom() - p2->bounding_box().bottom(); + if (result != 0) + return result; + result = p1->bounding_box().top() - p2->bounding_box().top(); + if (result != 0) + return result; + result = p1->bounding_box().left() - p2->bounding_box().left(); + if (result != 0) + return result; + return p1->bounding_box().right() - p2->bounding_box().right(); +} + +/////////////////////////////////////////////////////////////////////// +// BBGrid IMPLEMENTATION. +/////////////////////////////////////////////////////////////////////// +template<class BBC, class BBC_CLIST, class BBC_C_IT> +BBGrid<BBC, BBC_CLIST, BBC_C_IT>::BBGrid() : grid_(nullptr) { +} + +template<class BBC, class BBC_CLIST, class BBC_C_IT> +BBGrid<BBC, BBC_CLIST, BBC_C_IT>::BBGrid( + int gridsize, const ICOORD& bleft, const ICOORD& tright) + : grid_(nullptr) { + Init(gridsize, bleft, tright); +} + +template<class BBC, class BBC_CLIST, class BBC_C_IT> +BBGrid<BBC, BBC_CLIST, BBC_C_IT>::~BBGrid() { + delete [] grid_; +} + +// (Re)Initialize the grid. The gridsize is the size in pixels of each cell, +// and bleft, tright are the bounding box of everything to go in it. +template<class BBC, class BBC_CLIST, class BBC_C_IT> +void BBGrid<BBC, BBC_CLIST, BBC_C_IT>::Init(int gridsize, + const ICOORD& bleft, + const ICOORD& tright) { + GridBase::Init(gridsize, bleft, tright); + delete [] grid_; + grid_ = new BBC_CLIST[gridbuckets_]; +} + +// Clear all lists, but leave the array of lists present. +template<class BBC, class BBC_CLIST, class BBC_C_IT> +void BBGrid<BBC, BBC_CLIST, BBC_C_IT>::Clear() { + for (int i = 0; i < gridbuckets_; ++i) { + grid_[i].shallow_clear(); + } +} + +// Deallocate the data in the lists but otherwise leave the lists and the grid +// intact. +template<class BBC, class BBC_CLIST, class BBC_C_IT> +void BBGrid<BBC, BBC_CLIST, BBC_C_IT>::ClearGridData( + void (*free_method)(BBC*)) { + if (grid_ == nullptr) return; + GridSearch<BBC, BBC_CLIST, BBC_C_IT> search(this); + search.StartFullSearch(); + BBC* bb; + BBC_CLIST bb_list; + BBC_C_IT it(&bb_list); + while ((bb = search.NextFullSearch()) != nullptr) { + it.add_after_then_move(bb); + } + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + free_method(it.data()); + } +} + +// Insert a bbox into the appropriate place in the grid. +// If h_spread, then all cells covered horizontally by the box are +// used, otherwise, just the bottom-left. Similarly for v_spread. +// WARNING: InsertBBox may invalidate an active GridSearch. Call +// RepositionIterator() on any GridSearches that are active on this grid. +template<class BBC, class BBC_CLIST, class BBC_C_IT> +void BBGrid<BBC, BBC_CLIST, BBC_C_IT>::InsertBBox(bool h_spread, bool v_spread, + BBC* bbox) { + TBOX box = bbox->bounding_box(); + int start_x, start_y, end_x, end_y; + GridCoords(box.left(), box.bottom(), &start_x, &start_y); + GridCoords(box.right(), box.top(), &end_x, &end_y); + if (!h_spread) + end_x = start_x; + if (!v_spread) + end_y = start_y; + int grid_index = start_y * gridwidth_; + for (int y = start_y; y <= end_y; ++y, grid_index += gridwidth_) { + for (int x = start_x; x <= end_x; ++x) { + grid_[grid_index + x].add_sorted(SortByBoxLeft<BBC>, true, bbox); + } + } +} + +// Using a pix from TraceOutlineOnReducedPix or TraceBlockOnReducedPix, in +// which each pixel corresponds to a grid cell, insert a bbox into every +// place in the grid where the corresponding pixel is 1. The Pix is handled +// upside-down to match the Tesseract coordinate system. (As created by +// TraceOutlineOnReducedPix or TraceBlockOnReducedPix.) +// (0, 0) in the pix corresponds to (left, bottom) in the +// grid (in grid coords), and the pix works up the grid from there. +// WARNING: InsertPixPtBBox may invalidate an active GridSearch. Call +// RepositionIterator() on any GridSearches that are active on this grid. +template<class BBC, class BBC_CLIST, class BBC_C_IT> +void BBGrid<BBC, BBC_CLIST, BBC_C_IT>::InsertPixPtBBox(int left, int bottom, + Pix* pix, BBC* bbox) { + int width = pixGetWidth(pix); + int height = pixGetHeight(pix); + for (int y = 0; y < height; ++y) { + l_uint32* data = pixGetData(pix) + y * pixGetWpl(pix); + for (int x = 0; x < width; ++x) { + if (GET_DATA_BIT(data, x)) { + grid_[(bottom + y) * gridwidth_ + x + left]. + add_sorted(SortByBoxLeft<BBC>, true, bbox); + } + } + } +} + +// Remove the bbox from the grid. +// WARNING: Any GridSearch operating on this grid could be invalidated! +// If a GridSearch is operating, call GridSearch::RemoveBBox() instead. +template<class BBC, class BBC_CLIST, class BBC_C_IT> +void BBGrid<BBC, BBC_CLIST, BBC_C_IT>::RemoveBBox(BBC* bbox) { + TBOX box = bbox->bounding_box(); + int start_x, start_y, end_x, end_y; + GridCoords(box.left(), box.bottom(), &start_x, &start_y); + GridCoords(box.right(), box.top(), &end_x, &end_y); + int grid_index = start_y * gridwidth_; + for (int y = start_y; y <= end_y; ++y, grid_index += gridwidth_) { + for (int x = start_x; x <= end_x; ++x) { + BBC_C_IT it(&grid_[grid_index + x]); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + if (it.data() == bbox) + it.extract(); + } + } + } +} + +// Returns true if the given rectangle has no overlapping elements. +template<class BBC, class BBC_CLIST, class BBC_C_IT> +bool BBGrid<BBC, BBC_CLIST, BBC_C_IT>::RectangleEmpty(const TBOX& rect) { + GridSearch<BBC, BBC_CLIST, BBC_C_IT> rsearch(this); + rsearch.StartRectSearch(rect); + return rsearch.NextRectSearch() == nullptr; +} + +// Returns an IntGrid showing the number of elements in each cell. +// Returned IntGrid must be deleted after use. +template<class BBC, class BBC_CLIST, class BBC_C_IT> +IntGrid* BBGrid<BBC, BBC_CLIST, BBC_C_IT>::CountCellElements() { + auto* intgrid = new IntGrid(gridsize(), bleft(), tright()); + for (int y = 0; y < gridheight(); ++y) { + for (int x = 0; x < gridwidth(); ++x) { + int cell_count = grid_[y * gridwidth() + x].length(); + intgrid->SetGridCell(x, y, cell_count); + } + } + return intgrid; +} + + +template<class G> class TabEventHandler : public SVEventHandler { + public: + explicit TabEventHandler(G* grid) : grid_(grid) { + } + void Notify(const SVEvent* sv_event) override { + if (sv_event->type == SVET_CLICK) { + grid_->HandleClick(sv_event->x, sv_event->y); + } + } + private: + G* grid_; +}; + +#ifndef GRAPHICS_DISABLED + +// Make a window of an appropriate size to display things in the grid. +// Position the window at the given x,y. +template<class BBC, class BBC_CLIST, class BBC_C_IT> +ScrollView* BBGrid<BBC, BBC_CLIST, BBC_C_IT>::MakeWindow( + int x, int y, const char* window_name) { + auto tab_win = new ScrollView(window_name, x, y, + tright_.x() - bleft_.x(), + tright_.y() - bleft_.y(), + tright_.x() - bleft_.x(), + tright_.y() - bleft_.y(), + true); + auto* handler = + new TabEventHandler<BBGrid<BBC, BBC_CLIST, BBC_C_IT> >(this); + tab_win->AddEventHandler(handler); + tab_win->Pen(ScrollView::GREY); + tab_win->Rectangle(0, 0, tright_.x() - bleft_.x(), tright_.y() - bleft_.y()); + return tab_win; +} + +// Create a window at (x,y) and display the bounding boxes of the +// BLOBNBOXes in this grid. +// Use of this function requires an additional member of the BBC class: +// ScrollView::Color BBC::BoxColor() const. +template<class BBC, class BBC_CLIST, class BBC_C_IT> +void BBGrid<BBC, BBC_CLIST, BBC_C_IT>::DisplayBoxes(ScrollView* tab_win) { + tab_win->Pen(ScrollView::BLUE); + tab_win->Brush(ScrollView::NONE); + + // For every bbox in the grid, display it. + GridSearch<BBC, BBC_CLIST, BBC_C_IT> gsearch(this); + gsearch.StartFullSearch(); + BBC* bbox; + while ((bbox = gsearch.NextFullSearch()) != nullptr) { + const TBOX& box = bbox->bounding_box(); + int left_x = box.left(); + int right_x = box.right(); + int top_y = box.top(); + int bottom_y = box.bottom(); + ScrollView::Color box_color = bbox->BoxColor(); + tab_win->Pen(box_color); + tab_win->Rectangle(left_x, bottom_y, right_x, top_y); + } + tab_win->Update(); +} + +#endif // !GRAPHICS_DISABLED + +// ASSERT_HOST that every cell contains no more than one copy of each entry. +template<class BBC, class BBC_CLIST, class BBC_C_IT> +void BBGrid<BBC, BBC_CLIST, BBC_C_IT>::AssertNoDuplicates() { + // Process all grid cells. + for (int i = gridwidth_ * gridheight_ - 1; i >= 0; --i) { + // Iterate over all elements excent the last. + for (BBC_C_IT it(&grid_[i]); !it.at_last(); it.forward()) { + BBC* ptr = it.data(); + BBC_C_IT it2(it); + // None of the rest of the elements in the list should equal ptr. + for (it2.forward(); !it2.at_first(); it2.forward()) { + ASSERT_HOST(it2.data() != ptr); + } + } + } +} + +// Handle a click event in a display window. +template<class BBC, class BBC_CLIST, class BBC_C_IT> +void BBGrid<BBC, BBC_CLIST, BBC_C_IT>::HandleClick(int x, int y) { + tprintf("Click at (%d, %d)\n", x, y); +} + +/////////////////////////////////////////////////////////////////////// +// GridSearch IMPLEMENTATION. +/////////////////////////////////////////////////////////////////////// + +// Start a new full search. Will iterate all stored blobs. +template<class BBC, class BBC_CLIST, class BBC_C_IT> +void GridSearch<BBC, BBC_CLIST, BBC_C_IT>::StartFullSearch() { + // Full search uses x_ and y_ as the current grid + // cell being searched. + CommonStart(grid_->bleft_.x(), grid_->tright_.y()); +} + +// Return the next bbox in the search or nullptr if done. +// The other searches will return a box that overlaps the grid cell +// thereby duplicating boxes, but NextFullSearch only returns each box once. +template<class BBC, class BBC_CLIST, class BBC_C_IT> +BBC* GridSearch<BBC, BBC_CLIST, BBC_C_IT>::NextFullSearch() { + int x; + int y; + do { + while (it_.cycled_list()) { + ++x_; + if (x_ >= grid_->gridwidth_) { + --y_; + if (y_ < 0) + return CommonEnd(); + x_ = 0; + } + SetIterator(); + } + CommonNext(); + TBOX box = previous_return_->bounding_box(); + grid_->GridCoords(box.left(), box.bottom(), &x, &y); + } while (x != x_ || y != y_); + return previous_return_; +} + +// Start a new radius search. +template<class BBC, class BBC_CLIST, class BBC_C_IT> +void GridSearch<BBC, BBC_CLIST, BBC_C_IT>::StartRadSearch(int x, int y, + int max_radius) { + // Rad search uses x_origin_ and y_origin_ as the center of the circle. + // The radius_ is the radius of the (diamond-shaped) circle and + // rad_index/rad_dir_ combine to determine the position around it. + max_radius_ = max_radius; + radius_ = 0; + rad_index_ = 0; + rad_dir_ = 3; + CommonStart(x, y); +} + +// Return the next bbox in the radius search or nullptr if the +// maximum radius has been reached. +template<class BBC, class BBC_CLIST, class BBC_C_IT> +BBC* GridSearch<BBC, BBC_CLIST, BBC_C_IT>::NextRadSearch() { + do { + while (it_.cycled_list()) { + ++rad_index_; + if (rad_index_ >= radius_) { + ++rad_dir_; + rad_index_ = 0; + if (rad_dir_ >= 4) { + ++radius_; + if (radius_ > max_radius_) + return CommonEnd(); + rad_dir_ = 0; + } + } + ICOORD offset = C_OUTLINE::chain_step(rad_dir_); + offset *= radius_ - rad_index_; + offset += C_OUTLINE::chain_step(rad_dir_ + 1) * rad_index_; + x_ = x_origin_ + offset.x(); + y_ = y_origin_ + offset.y(); + if (x_ >= 0 && x_ < grid_->gridwidth_ && + y_ >= 0 && y_ < grid_->gridheight_) + SetIterator(); + } + CommonNext(); + } while (unique_mode_ && returns_.find(previous_return_) != returns_.end()); + if (unique_mode_) + returns_.insert(previous_return_); + return previous_return_; +} + +// Start a new left or right-looking search. Will search to the side +// for a box that vertically overlaps the given vertical line segment. +template<class BBC, class BBC_CLIST, class BBC_C_IT> +void GridSearch<BBC, BBC_CLIST, BBC_C_IT>::StartSideSearch(int x, + int ymin, int ymax) { + // Right search records the x in x_origin_, the ymax in y_origin_ + // and the size of the vertical strip to search in radius_. + // To guarantee finding overlapping objects of up to twice the + // given size, double the height. + radius_ = ((ymax - ymin) * 2 + grid_->gridsize_ - 1) / grid_->gridsize_; + rad_index_ = 0; + CommonStart(x, ymax); +} + +// Return the next bbox in the side search or nullptr if the +// edge has been reached. Searches left to right or right to left +// according to the flag. +template<class BBC, class BBC_CLIST, class BBC_C_IT> +BBC* GridSearch<BBC, BBC_CLIST, BBC_C_IT>::NextSideSearch(bool right_to_left) { + do { + while (it_.cycled_list()) { + ++rad_index_; + if (rad_index_ > radius_) { + if (right_to_left) + --x_; + else + ++x_; + rad_index_ = 0; + if (x_ < 0 || x_ >= grid_->gridwidth_) + return CommonEnd(); + } + y_ = y_origin_ - rad_index_; + if (y_ >= 0 && y_ < grid_->gridheight_) + SetIterator(); + } + CommonNext(); + } while (unique_mode_ && returns_.find(previous_return_) != returns_.end()); + if (unique_mode_) + returns_.insert(previous_return_); + return previous_return_; +} + +// Start a vertical-looking search. Will search up or down +// for a box that horizontally overlaps the given line segment. +template<class BBC, class BBC_CLIST, class BBC_C_IT> +void GridSearch<BBC, BBC_CLIST, BBC_C_IT>::StartVerticalSearch(int xmin, + int xmax, + int y) { + // Right search records the xmin in x_origin_, the y in y_origin_ + // and the size of the horizontal strip to search in radius_. + radius_ = (xmax - xmin + grid_->gridsize_ - 1) / grid_->gridsize_; + rad_index_ = 0; + CommonStart(xmin, y); +} + +// Return the next bbox in the vertical search or nullptr if the +// edge has been reached. Searches top to bottom or bottom to top +// according to the flag. +template<class BBC, class BBC_CLIST, class BBC_C_IT> +BBC* GridSearch<BBC, BBC_CLIST, BBC_C_IT>::NextVerticalSearch( + bool top_to_bottom) { + do { + while (it_.cycled_list()) { + ++rad_index_; + if (rad_index_ > radius_) { + if (top_to_bottom) + --y_; + else + ++y_; + rad_index_ = 0; + if (y_ < 0 || y_ >= grid_->gridheight_) + return CommonEnd(); + } + x_ = x_origin_ + rad_index_; + if (x_ >= 0 && x_ < grid_->gridwidth_) + SetIterator(); + } + CommonNext(); + } while (unique_mode_ && returns_.find(previous_return_) != returns_.end()); + if (unique_mode_) + returns_.insert(previous_return_); + return previous_return_; +} + +// Start a rectangular search. Will search for a box that overlaps the +// given rectangle. +template<class BBC, class BBC_CLIST, class BBC_C_IT> +void GridSearch<BBC, BBC_CLIST, BBC_C_IT>::StartRectSearch(const TBOX& rect) { + // Rect search records the xmin in x_origin_, the ymin in y_origin_ + // and the xmax in max_radius_. + // The search proceeds left to right, top to bottom. + rect_ = rect; + CommonStart(rect.left(), rect.top()); + grid_->GridCoords(rect.right(), rect.bottom(), // - rect.height(), + &max_radius_, &y_origin_); +} + +// Return the next bbox in the rectangular search or nullptr if complete. +template<class BBC, class BBC_CLIST, class BBC_C_IT> +BBC* GridSearch<BBC, BBC_CLIST, BBC_C_IT>::NextRectSearch() { + do { + while (it_.cycled_list()) { + ++x_; + if (x_ > max_radius_) { + --y_; + x_ = x_origin_; + if (y_ < y_origin_) + return CommonEnd(); + } + SetIterator(); + } + CommonNext(); + } while (!rect_.overlap(previous_return_->bounding_box()) || + (unique_mode_ && returns_.find(previous_return_) != returns_.end())); + if (unique_mode_) + returns_.insert(previous_return_); + return previous_return_; +} + +// Remove the last returned BBC. Will not invalidate this. May invalidate +// any other concurrent GridSearch on the same grid. If any others are +// in use, call RepositionIterator on those, to continue without harm. +template<class BBC, class BBC_CLIST, class BBC_C_IT> +void GridSearch<BBC, BBC_CLIST, BBC_C_IT>::RemoveBBox() { + if (previous_return_ != nullptr) { + // Remove all instances of previous_return_ from the list, so the iterator + // remains valid after removal from the rest of the grid cells. + // if previous_return_ is not on the list, then it has been removed already. + BBC* prev_data = nullptr; + BBC* new_previous_return = nullptr; + it_.move_to_first(); + for (it_.mark_cycle_pt(); !it_.cycled_list();) { + if (it_.data() == previous_return_) { + new_previous_return = prev_data; + it_.extract(); + it_.forward(); + next_return_ = it_.cycled_list() ? nullptr : it_.data(); + } else { + prev_data = it_.data(); + it_.forward(); + } + } + grid_->RemoveBBox(previous_return_); + previous_return_ = new_previous_return; + RepositionIterator(); + } +} + +template<class BBC, class BBC_CLIST, class BBC_C_IT> +void GridSearch<BBC, BBC_CLIST, BBC_C_IT>::RepositionIterator() { + // Something was deleted, so we have little choice but to clear the + // returns list. + returns_.clear(); + // Reset the iterator back to one past the previous return. + // If the previous_return_ is no longer in the list, then + // next_return_ serves as a backup. + it_.move_to_first(); + // Special case, the first element was removed and reposition + // iterator was called. In this case, the data is fine, but the + // cycle point is not. Detect it and return. + if (!it_.empty() && it_.data() == next_return_) { + it_.mark_cycle_pt(); + return; + } + for (it_.mark_cycle_pt(); !it_.cycled_list(); it_.forward()) { + if (it_.data() == previous_return_ || + it_.data_relative(1) == next_return_) { + CommonNext(); + return; + } + } + // We ran off the end of the list. Move to a new cell next time. + previous_return_ = nullptr; + next_return_ = nullptr; +} + +// Factored out helper to start a search. +template<class BBC, class BBC_CLIST, class BBC_C_IT> +void GridSearch<BBC, BBC_CLIST, BBC_C_IT>::CommonStart(int x, int y) { + grid_->GridCoords(x, y, &x_origin_, &y_origin_); + x_ = x_origin_; + y_ = y_origin_; + SetIterator(); + previous_return_ = nullptr; + next_return_ = it_.empty() ? nullptr : it_.data(); + returns_.clear(); +} + +// Factored out helper to complete a next search. +template<class BBC, class BBC_CLIST, class BBC_C_IT> +BBC* GridSearch<BBC, BBC_CLIST, BBC_C_IT>::CommonNext() { + previous_return_ = it_.data(); + it_.forward(); + next_return_ = it_.cycled_list() ? nullptr : it_.data(); + return previous_return_; +} + +// Factored out final return when search is exhausted. +template<class BBC, class BBC_CLIST, class BBC_C_IT> +BBC* GridSearch<BBC, BBC_CLIST, BBC_C_IT>::CommonEnd() { + previous_return_ = nullptr; + next_return_ = nullptr; + return nullptr; +} + +// Factored out function to set the iterator to the current x_, y_ +// grid coords and mark the cycle pt. +template<class BBC, class BBC_CLIST, class BBC_C_IT> +void GridSearch<BBC, BBC_CLIST, BBC_C_IT>::SetIterator() { + it_= &(grid_->grid_[y_ * grid_->gridwidth_ + x_]); + it_.mark_cycle_pt(); +} + +} // namespace tesseract. + +#endif // TESSERACT_TEXTORD_BBGRID_H_ diff --git a/tesseract/src/textord/blkocc.cpp b/tesseract/src/textord/blkocc.cpp new file mode 100644 index 00000000..f63b8ef9 --- /dev/null +++ b/tesseract/src/textord/blkocc.cpp @@ -0,0 +1,165 @@ +/***************************************************************************** + * + * File: blkocc.cpp (Formerly blockocc.c) + * Description: Block Occupancy routines + * Author: Chris Newton + * + * (c) Copyright 1991, Hewlett-Packard Company. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + ******************************************************************************/ + +#include "blkocc.h" + +#include "drawtord.h" +#include "errcode.h" + +#include <cctype> +#include <cmath> + +#include "helpers.h" + +namespace tesseract { + +double_VAR(textord_underline_threshold, 0.5, "Fraction of width occupied"); + +// Forward declarations of static functions +static void horizontal_cblob_projection(C_BLOB *blob, // blob to project + STATS *stats); // output +static void horizontal_coutline_projection(C_OUTLINE *outline, + STATS *stats); // output + +/** + * test_underline + * + * Check to see if the blob is an underline. + * Return true if it is. + */ + +bool test_underline( //look for underlines + bool testing_on, ///< drawing blob + C_BLOB* blob, ///< blob to test + int16_t baseline, ///< coords of baseline + int16_t xheight ///< height of line +) { + int16_t occ; + int16_t blob_width; //width of blob + TBOX blob_box; //bounding box + int32_t desc_occ; + int32_t x_occ; + int32_t asc_occ; + STATS projection; + + blob_box = blob->bounding_box (); + blob_width = blob->bounding_box ().width (); + projection.set_range (blob_box.bottom (), blob_box.top () + 1); + if (testing_on) { + // blob->plot(to_win,GOLDENROD,GOLDENROD); + // line_color_index(to_win,GOLDENROD); + // move2d(to_win,blob_box.left(),baseline); + // draw2d(to_win,blob_box.right(),baseline); + // move2d(to_win,blob_box.left(),baseline+xheight); + // draw2d(to_win,blob_box.right(),baseline+xheight); + tprintf + ("Testing underline on blob at (%d,%d)->(%d,%d), base=%d\nOccs:", + blob->bounding_box ().left (), blob->bounding_box ().bottom (), + blob->bounding_box ().right (), blob->bounding_box ().top (), + baseline); + } + horizontal_cblob_projection(blob, &projection); + desc_occ = 0; + for (occ = blob_box.bottom (); occ < baseline; occ++) + if (occ <= blob_box.top () && projection.pile_count (occ) > desc_occ) + //max in region + desc_occ = projection.pile_count (occ); + x_occ = 0; + for (occ = baseline; occ <= baseline + xheight; occ++) + if (occ >= blob_box.bottom () && occ <= blob_box.top () + && projection.pile_count (occ) > x_occ) + //max in region + x_occ = projection.pile_count (occ); + asc_occ = 0; + for (occ = baseline + xheight + 1; occ <= blob_box.top (); occ++) + if (occ >= blob_box.bottom () && projection.pile_count (occ) > asc_occ) + asc_occ = projection.pile_count (occ); + if (testing_on) { + tprintf ("%d %d %d\n", desc_occ, x_occ, asc_occ); + } + if (desc_occ == 0 && x_occ == 0 && asc_occ == 0) { + tprintf ("Bottom=%d, top=%d, base=%d, x=%d\n", + blob_box.bottom (), blob_box.top (), baseline, xheight); + projection.print(); + } + if (desc_occ > x_occ + x_occ + && desc_occ > blob_width * textord_underline_threshold) + return true; //real underline + return asc_occ > x_occ + x_occ && + asc_occ > blob_width * textord_underline_threshold; //overline + //neither +} + + +/** + * horizontal_cblob_projection + * + * Compute the horizontal projection of a cblob from its outlines + * and add to the given STATS. + */ + +static void horizontal_cblob_projection( //project outlines + C_BLOB *blob, ///< blob to project + STATS *stats ///< output + ) { + //outlines of blob + C_OUTLINE_IT out_it = blob->out_list (); + + for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { + horizontal_coutline_projection (out_it.data (), stats); + } +} + + +/** + * horizontal_coutline_projection + * + * Compute the horizontal projection of a outline from its outlines + * and add to the given STATS. + */ + +static void horizontal_coutline_projection( //project outlines + C_OUTLINE *outline, ///< outline to project + STATS *stats ///< output + ) { + ICOORD pos; //current point + ICOORD step; //edge step + int32_t length; //of outline + int16_t stepindex; //current step + C_OUTLINE_IT out_it = outline->child (); + + pos = outline->start_pos (); + length = outline->pathlength (); + for (stepindex = 0; stepindex < length; stepindex++) { + step = outline->step (stepindex); + if (step.y () > 0) { + stats->add (pos.y (), pos.x ()); + } + else if (step.y () < 0) { + stats->add (pos.y () - 1, -pos.x ()); + } + pos += step; + } + + for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { + horizontal_coutline_projection (out_it.data (), stats); + } +} + +} // namespace tesseract diff --git a/tesseract/src/textord/blkocc.h b/tesseract/src/textord/blkocc.h new file mode 100644 index 00000000..4dee5cd8 --- /dev/null +++ b/tesseract/src/textord/blkocc.h @@ -0,0 +1,253 @@ +/****************************************************************************** + * + * File: blkocc.h (Formerly blockocc.h) + * Description: Block Occupancy routines + * Author: Chris Newton + * + * (c) Copyright 1991, Hewlett-Packard Company. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + ******************************************************************************/ + +#ifndef BLKOCC_H +#define BLKOCC_H + +#include "params.h" +#include "elst.h" + +namespace tesseract { + +class C_BLOB; + +/*************************************************************************** +CLASS REGION_OCC + + The class REGION_OCC defines a section of outline which exists entirely + within a single region. The only data held is the min and max x limits of + the outline within the region. + + REGION_OCCs are held on lists, one list for each region. The lists are + built in sorted order of min x. Overlapping REGION_OCCs are not permitted on + a single list. An overlapping region to be added causes the existing region + to be extended. This extension may result in the following REGION_OCC on the + list overlapping the amended one. In this case the amended REGION_OCC is + further extended to include the range of the following one, so that the + following one can be deleted. + +****************************************************************************/ + +class REGION_OCC : public ELIST_LINK +{ + public: + float min_x; //Lowest x in region + float max_x; //Highest x in region + int16_t region_type; //Type of crossing + + REGION_OCC() = default; // constructor used + // only in COPIER etc + REGION_OCC( //constructor + float min, + float max, + int16_t region) { + min_x = min; + max_x = max; + region_type = region; + } +}; + +ELISTIZEH (REGION_OCC) +#define RANGE_IN_BAND(band_max, band_min, range_max, range_min) \ +(((range_min) >= (band_min)) && ((range_max) < (band_max))) +/************************************************************************ +Adapted from the following procedure so that it can be used in the bands +class in an include file... + +bool range_in_band[ + range within band? +int16_t band_max, +int16_t band_min, +int16_t range_max, +int16_t range_min] +{ + if ((range_min >= band_min) && (range_max < band_max)) + return true; + else + return false; +} +***********************************************************************/ +#define RANGE_OVERLAPS_BAND(band_max, band_min, range_max, range_min) \ +(((range_max) >= (band_min)) && ((range_min) < (band_max))) +/************************************************************************ +Adapted from the following procedure so that it can be used in the bands +class in an include file... + +bool range_overlaps_band[ + range crosses band? +int16_t band_max, +int16_t band_min, +int16_t range_max, +int16_t range_min] +{ + if ((range_max >= band_min) && (range_min < band_max)) + return true; + else + return false; +} +***********************************************************************/ +/********************************************************************** + Bands + ----- + + BAND 4 +-------------------------------- + BAND 3 +-------------------------------- + + BAND 2 + +-------------------------------- + + BAND 1 + +Band 0 is the dot band + +Each band has an error margin above and below. An outline is not considered to +have significantly changed bands until it has moved out of the error margin. +*************************************************************************/ +class BAND +{ + public: + int16_t max_max; //upper max + int16_t max; //nominal max + int16_t min_max; //lower max + int16_t max_min; //upper min + int16_t min; //nominal min + int16_t min_min; //lower min + + BAND() = default; // constructor + + void set( // initialise a band + int16_t new_max_max, // upper max + int16_t new_max, // new nominal max + int16_t new_min_max, // new lower max + int16_t new_max_min, // new upper min + int16_t new_min, // new nominal min + int16_t new_min_min) { // new lower min + max_max = new_max_max; + max = new_max; + min_max = new_min_max; + max_min = new_max_min; + min = new_min; + min_min = new_min_min; + } + + bool in_minimal( //in minimal limits? + float y) { //y value + return (y >= max_min) && (y < min_max); + } + + bool in_nominal( //in nominal limits? + float y) { //y value + return (y >= min) && (y < max); + } + + bool in_maximal( //in maximal limits? + float y) { //y value + return (y >= min_min) && (y < max_max); + } + + //overlaps min limits? + bool range_overlaps_minimal(float y1, //one range limit + float y2) { //other range limit + if (y1 > y2) + return RANGE_OVERLAPS_BAND (min_max, max_min, y1, y2); + else + return RANGE_OVERLAPS_BAND (min_max, max_min, y2, y1); + } + + //overlaps nom limits? + bool range_overlaps_nominal(float y1, //one range limit + float y2) { //other range limit + if (y1 > y2) + return RANGE_OVERLAPS_BAND (max, min, y1, y2); + else + return RANGE_OVERLAPS_BAND (max, min, y2, y1); + } + + //overlaps max limits? + bool range_overlaps_maximal(float y1, //one range limit + float y2) { //other range limit + if (y1 > y2) + return RANGE_OVERLAPS_BAND (max_max, min_min, y1, y2); + else + return RANGE_OVERLAPS_BAND (max_max, min_min, y2, y1); + } + + bool range_in_minimal( //within min limits? + float y1, //one range limit + float y2) { //other range limit + if (y1 > y2) + return RANGE_IN_BAND (min_max, max_min, y1, y2); + else + return RANGE_IN_BAND (min_max, max_min, y2, y1); + } + + bool range_in_nominal( //within nom limits? + float y1, //one range limit + float y2) { //other range limit + if (y1 > y2) + return RANGE_IN_BAND (max, min, y1, y2); + else + return RANGE_IN_BAND (max, min, y2, y1); + } + + bool range_in_maximal( //within max limits? + float y1, //one range limit + float y2) { //other range limit + if (y1 > y2) + return RANGE_IN_BAND (max_max, min_min, y1, y2); + else + return RANGE_IN_BAND (max_max, min_min, y2, y1); + } +}; + +/* Standard positions */ + +#define MAX_NUM_BANDS 5 +#define UNDEFINED_BAND 99 +#define NO_LOWER_LIMIT -9999 +#define NO_UPPER_LIMIT 9999 + +#define DOT_BAND 0 + +/* Special occupancy code emitted for the 0 region at the end of a word */ + +#define END_OF_WERD_CODE 255 + +extern BOOL_VAR_H (blockocc_show_result, false, "Show intermediate results"); +extern INT_VAR_H (blockocc_desc_height, 0, +"Descender height after normalisation"); +extern INT_VAR_H (blockocc_asc_height, 255, +"Ascender height after normalisation"); +extern INT_VAR_H (blockocc_band_count, 4, "Number of bands used"); +extern double_VAR_H (textord_underline_threshold, 0.9, +"Fraction of width occupied"); + +bool test_underline( //look for underlines + bool testing_on, //drawing blob + C_BLOB* blob, //blob to test + int16_t baseline, //coords of baseline + int16_t xheight //height of line +); + +} // namespace tesseract + +#endif diff --git a/tesseract/src/textord/blobgrid.cpp b/tesseract/src/textord/blobgrid.cpp new file mode 100644 index 00000000..53a1d7b3 --- /dev/null +++ b/tesseract/src/textord/blobgrid.cpp @@ -0,0 +1,45 @@ +/////////////////////////////////////////////////////////////////////// +// File: blobgrid.cpp +// Description: BBGrid of BLOBNBOX with useful BLOBNBOX-specific methods. +// Copyright 2011 Google Inc. All Rights Reserved. +// Author: rays@google.com (Ray Smith) +// Created: Sat Jun 11 10:30:01 PST 2011 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#include "blobgrid.h" + +namespace tesseract { + +BlobGrid::BlobGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright) + : BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>(gridsize, bleft, tright) { +} + +// Destructor. +// It is defined here, so the compiler can create a single vtable +// instead of weak vtables in every compilation unit. +BlobGrid::~BlobGrid() = default; + +// Inserts all the blobs from the given list, with x and y spreading, +// without removing from the source list, so ownership remains with the +// source list. +void BlobGrid::InsertBlobList(BLOBNBOX_LIST* blobs) { + BLOBNBOX_IT blob_it(blobs); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + BLOBNBOX* blob = blob_it.data(); + if (!blob->joined_to_prev()) + InsertBBox(true, true, blob); + } +} + +} // namespace tesseract. diff --git a/tesseract/src/textord/blobgrid.h b/tesseract/src/textord/blobgrid.h new file mode 100644 index 00000000..54b19aeb --- /dev/null +++ b/tesseract/src/textord/blobgrid.h @@ -0,0 +1,46 @@ +/////////////////////////////////////////////////////////////////////// +// File: blobgrid.h +// Description: BBGrid of BLOBNBOX with useful BLOBNBOX-specific methods. +// Copyright 2011 Google Inc. All Rights Reserved. +// Author: rays@google.com (Ray Smith) +// Created: Sat Jun 11 10:26:01 PST 2011 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + + +#ifndef TESSERACT_TEXTORD_BLOBGRID_H_ +#define TESSERACT_TEXTORD_BLOBGRID_H_ + +#include "bbgrid.h" +#include "blobbox.h" + +namespace tesseract { + +CLISTIZEH(BLOBNBOX) + +using BlobGridSearch = GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>; + +class TESS_API BlobGrid : public BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> { + public: + BlobGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright); + ~BlobGrid() override; + + // Inserts all the blobs from the given list, with x and y spreading, + // without removing from the source list, so ownership remains with the + // source list. + void InsertBlobList(BLOBNBOX_LIST* blobs); +}; + +} // namespace tesseract. + +#endif // TESSERACT_TEXTORD_BLOBGRID_H_ diff --git a/tesseract/src/textord/ccnontextdetect.cpp b/tesseract/src/textord/ccnontextdetect.cpp new file mode 100644 index 00000000..cfbbb95a --- /dev/null +++ b/tesseract/src/textord/ccnontextdetect.cpp @@ -0,0 +1,323 @@ +/////////////////////////////////////////////////////////////////////// +// File: ccnontextdetect.cpp +// Description: Connected-Component-based photo (non-text) detection. +// Author: rays@google.com (Ray Smith) +// +// Copyright 2011 Google Inc. All Rights Reserved. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#include "ccnontextdetect.h" +#include "imagefind.h" +#include "strokewidth.h" + +namespace tesseract { + +// Max number of neighbour small objects per squared gridsize before a grid +// cell becomes image. +const double kMaxSmallNeighboursPerPix = 1.0 / 32; +// Max number of small blobs a large blob may overlap before it is rejected +// and determined to be image. +const int kMaxLargeOverlapsWithSmall = 3; +// Max number of small blobs a medium blob may overlap before it is rejected +// and determined to be image. Larger than for large blobs as medium blobs +// may be complex Chinese characters. Very large Chinese characters are going +// to overlap more medium blobs than small. +const int kMaxMediumOverlapsWithSmall = 12; +// Max number of normal blobs a large blob may overlap before it is rejected +// and determined to be image. This is set higher to allow for drop caps, which +// may overlap a lot of good text blobs. +const int kMaxLargeOverlapsWithMedium = 12; +// Multiplier of original noise_count used to test for the case of spreading +// noise beyond where it should really be. +const int kOriginalNoiseMultiple = 8; +// Pixel padding for noise blobs when rendering on the image +// mask to encourage them to join together. Make it too big and images +// will fatten out too much and have to be clipped to text. +const int kNoisePadding = 4; +// Fraction of max_noise_count_ to be added to the noise count if there is +// photo mask in the background. +const double kPhotoOffsetFraction = 0.375; +// Min ratio of perimeter^2/16area for a "good" blob in estimating noise +// density. Good blobs are supposed to be highly likely real text. +// We consider a square to have unit ratio, where A=(p/4)^2, hence the factor +// of 16. Digital circles are weird and have a minimum ratio of pi/64, not +// the 1/(4pi) that you would expect. +const double kMinGoodTextPARatio = 1.5; + +CCNonTextDetect::CCNonTextDetect(int gridsize, + const ICOORD& bleft, const ICOORD& tright) + : BlobGrid(gridsize, bleft, tright), + max_noise_count_(static_cast<int>(kMaxSmallNeighboursPerPix * + gridsize * gridsize)), + noise_density_(nullptr) { + // TODO(rays) break max_noise_count_ out into an area-proportional + // value, as now plus an additive constant for the number of text blobs + // in the 3x3 neighbourhood - maybe 9. +} + +CCNonTextDetect::~CCNonTextDetect() { + delete noise_density_; +} + +// Creates and returns a Pix with the same resolution as the original +// in which 1 (black) pixels represent likely non text (photo, line drawing) +// areas of the page, deleting from the blob_block the blobs that were +// determined to be non-text. +// The photo_map is used to bias the decision towards non-text, rather than +// supplying definite decision. +// The blob_block is the usual result of connected component analysis, +// holding the detected blobs. +// The returned Pix should be PixDestroyed after use. +Pix* CCNonTextDetect::ComputeNonTextMask(bool debug, Pix* photo_map, + TO_BLOCK* blob_block) { + // Insert the smallest blobs into the grid. + InsertBlobList(&blob_block->small_blobs); + InsertBlobList(&blob_block->noise_blobs); + // Add the medium blobs that don't have a good strokewidth neighbour. + // Those that do go into good_grid as an antidote to spreading beyond the + // real reaches of a noise region. + BlobGrid good_grid(gridsize(), bleft(), tright()); + BLOBNBOX_IT blob_it(&blob_block->blobs); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + BLOBNBOX* blob = blob_it.data(); + double perimeter_area_ratio = blob->cblob()->perimeter() / 4.0; + perimeter_area_ratio *= perimeter_area_ratio / blob->enclosed_area(); + if (blob->GoodTextBlob() == 0 || perimeter_area_ratio < kMinGoodTextPARatio) + InsertBBox(true, true, blob); + else + good_grid.InsertBBox(true, true, blob); + } + noise_density_ = ComputeNoiseDensity(debug, photo_map, &good_grid); + good_grid.Clear(); // Not needed any more. + Pix* pix = noise_density_->ThresholdToPix(max_noise_count_); + if (debug) { + pixWrite("junknoisemask.png", pix, IFF_PNG); + } + ScrollView* win = nullptr; + #ifndef GRAPHICS_DISABLED + if (debug) { + win = MakeWindow(0, 400, "Photo Mask Blobs"); + } + #endif // !GRAPHICS_DISABLED + // Large and medium blobs are not text if they overlap with "a lot" of small + // blobs. + MarkAndDeleteNonTextBlobs(&blob_block->large_blobs, + kMaxLargeOverlapsWithSmall, + win, ScrollView::DARK_GREEN, pix); + MarkAndDeleteNonTextBlobs(&blob_block->blobs, kMaxMediumOverlapsWithSmall, + win, ScrollView::WHITE, pix); + // Clear the grid of small blobs and insert the medium blobs. + Clear(); + InsertBlobList(&blob_block->blobs); + MarkAndDeleteNonTextBlobs(&blob_block->large_blobs, + kMaxLargeOverlapsWithMedium, + win, ScrollView::DARK_GREEN, pix); + // Clear again before we start deleting the blobs in the grid. + Clear(); + MarkAndDeleteNonTextBlobs(&blob_block->noise_blobs, -1, + win, ScrollView::CORAL, pix); + MarkAndDeleteNonTextBlobs(&blob_block->small_blobs, -1, + win, ScrollView::GOLDENROD, pix); + MarkAndDeleteNonTextBlobs(&blob_block->blobs, -1, + win, ScrollView::WHITE, pix); + if (debug) { + #ifndef GRAPHICS_DISABLED + win->Update(); + #endif // !GRAPHICS_DISABLED + pixWrite("junkccphotomask.png", pix, IFF_PNG); + #ifndef GRAPHICS_DISABLED + delete win->AwaitEvent(SVET_DESTROY); + delete win; + #endif // !GRAPHICS_DISABLED + } + return pix; +} + +// Computes and returns the noise_density IntGrid, at the same gridsize as +// this by summing the number of small elements in a 3x3 neighbourhood of +// each grid cell. good_grid is filled with blobs that are considered most +// likely good text, and this is filled with small and medium blobs that are +// more likely non-text. +// The photo_map is used to bias the decision towards non-text, rather than +// supplying definite decision. +IntGrid* CCNonTextDetect::ComputeNoiseDensity(bool debug, Pix* photo_map, + BlobGrid* good_grid) { + IntGrid* noise_counts = CountCellElements(); + IntGrid* noise_density = noise_counts->NeighbourhoodSum(); + IntGrid* good_counts = good_grid->CountCellElements(); + // Now increase noise density in photo areas, to bias the decision and + // minimize hallucinated text on image, but trim the noise_density where + // there are good blobs and the original count is low in non-photo areas, + // indicating that most of the result came from neighbouring cells. + int height = pixGetHeight(photo_map); + int photo_offset = IntCastRounded(max_noise_count_ * kPhotoOffsetFraction); + for (int y = 0; y < gridheight(); ++y) { + for (int x = 0; x < gridwidth(); ++x) { + int noise = noise_density->GridCellValue(x, y); + if (max_noise_count_ < noise + photo_offset && + noise <= max_noise_count_) { + // Test for photo. + int left = x * gridsize(); + int right = left + gridsize(); + int bottom = height - y * gridsize(); + int top = bottom - gridsize(); + if (ImageFind::BoundsWithinRect(photo_map, &left, &top, &right, + &bottom)) { + noise_density->SetGridCell(x, y, noise + photo_offset); + } + } + if (debug && noise > max_noise_count_ && + good_counts->GridCellValue(x, y) > 0) { + tprintf("At %d, %d, noise = %d, good=%d, orig=%d, thr=%d\n", + x * gridsize(), y * gridsize(), + noise_density->GridCellValue(x, y), + good_counts->GridCellValue(x, y), + noise_counts->GridCellValue(x, y), max_noise_count_); + } + if (noise > max_noise_count_ && + good_counts->GridCellValue(x, y) > 0 && + noise_counts->GridCellValue(x, y) * kOriginalNoiseMultiple <= + max_noise_count_) { + noise_density->SetGridCell(x, y, 0); + } + } + } + delete noise_counts; + delete good_counts; + return noise_density; +} + +// Helper to expand a box in one of the 4 directions by the given pad, +// provided it does not expand into any cell with a zero noise density. +// If that is not possible, try expanding all round by a small constant. +static TBOX AttemptBoxExpansion(const TBOX& box, const IntGrid& noise_density, + int pad) { + TBOX expanded_box(box); + expanded_box.set_right(box.right() + pad); + if (!noise_density.AnyZeroInRect(expanded_box)) + return expanded_box; + expanded_box = box; + expanded_box.set_left(box.left() - pad); + if (!noise_density.AnyZeroInRect(expanded_box)) + return expanded_box; + expanded_box = box; + expanded_box.set_top(box.top() + pad); + if (!noise_density.AnyZeroInRect(expanded_box)) + return expanded_box; + expanded_box = box; + expanded_box.set_bottom(box.bottom() + pad); + if (!noise_density.AnyZeroInRect(expanded_box)) + return expanded_box; + expanded_box = box; + expanded_box.pad(kNoisePadding, kNoisePadding); + if (!noise_density.AnyZeroInRect(expanded_box)) + return expanded_box; + return box; +} + +// Tests each blob in the list to see if it is certain non-text using 2 +// conditions: +// 1. blob overlaps a cell with high value in noise_density_ (previously set +// by ComputeNoiseDensity). +// OR 2. The blob overlaps more than max_blob_overlaps in *this grid. This +// condition is disabled with max_blob_overlaps == -1. +// If it does, the blob is declared non-text, and is used to mark up the +// nontext_mask. Such blobs are fully deleted, and non-noise blobs have their +// neighbours reset, as they may now point to deleted data. +// WARNING: The blobs list blobs may be in the *this grid, but they are +// not removed. If any deleted blobs might be in *this, then this must be +// Clear()ed immediately after MarkAndDeleteNonTextBlobs is called. +// If the win is not nullptr, deleted blobs are drawn on it in red, and kept +// blobs are drawn on it in ok_color. +void CCNonTextDetect::MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST* blobs, + int max_blob_overlaps, + ScrollView* win, + ScrollView::Color ok_color, + Pix* nontext_mask) { + int imageheight = tright().y() - bleft().x(); + BLOBNBOX_IT blob_it(blobs); + BLOBNBOX_LIST dead_blobs; + BLOBNBOX_IT dead_it(&dead_blobs); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + BLOBNBOX* blob = blob_it.data(); + TBOX box = blob->bounding_box(); + if (!noise_density_->RectMostlyOverThreshold(box, max_noise_count_) && + (max_blob_overlaps < 0 || + !BlobOverlapsTooMuch(blob, max_blob_overlaps))) { + blob->ClearNeighbours(); + #ifndef GRAPHICS_DISABLED + if (win != nullptr) + blob->plot(win, ok_color, ok_color); + #endif // !GRAPHICS_DISABLED + } else { + if (noise_density_->AnyZeroInRect(box)) { + // There is a danger that the bounding box may overlap real text, so + // we need to render the outline. + Pix* blob_pix = blob->cblob()->render_outline(); + pixRasterop(nontext_mask, box.left(), imageheight - box.top(), + box.width(), box.height(), PIX_SRC | PIX_DST, + blob_pix, 0, 0); + pixDestroy(&blob_pix); + } else { + if (box.area() < gridsize() * gridsize()) { + // It is a really bad idea to make lots of small components in the + // photo mask, so try to join it to a bigger area by expanding the + // box in a way that does not touch any zero noise density cell. + box = AttemptBoxExpansion(box, *noise_density_, gridsize()); + } + // All overlapped cells are non-zero, so just mark the rectangle. + pixRasterop(nontext_mask, box.left(), imageheight - box.top(), + box.width(), box.height(), PIX_SET, nullptr, 0, 0); + } + #ifndef GRAPHICS_DISABLED + if (win != nullptr) + blob->plot(win, ScrollView::RED, ScrollView::RED); + #endif // !GRAPHICS_DISABLED + // It is safe to delete the cblob now, as it isn't used by the grid + // or BlobOverlapsTooMuch, and the BLOBNBOXes will go away with the + // dead_blobs list. + // TODO(rays) delete the delete when the BLOBNBOX destructor deletes + // the cblob. + delete blob->cblob(); + dead_it.add_to_end(blob_it.extract()); + } + } +} + +// Returns true if the given blob overlaps more than max_overlaps blobs +// in the current grid. +bool CCNonTextDetect::BlobOverlapsTooMuch(BLOBNBOX* blob, int max_overlaps) { + // Search the grid to see what intersects it. + // Setup a Rectangle search for overlapping this blob. + BlobGridSearch rsearch(this); + const TBOX& box = blob->bounding_box(); + rsearch.StartRectSearch(box); + rsearch.SetUniqueMode(true); + BLOBNBOX* neighbour; + int overlap_count = 0; + while (overlap_count <= max_overlaps && + (neighbour = rsearch.NextRectSearch()) != nullptr) { + if (box.major_overlap(neighbour->bounding_box())) { + ++overlap_count; + if (overlap_count > max_overlaps) + return true; + } + } + return false; +} + +} // namespace tesseract. diff --git a/tesseract/src/textord/ccnontextdetect.h b/tesseract/src/textord/ccnontextdetect.h new file mode 100644 index 00000000..e1f1ca2d --- /dev/null +++ b/tesseract/src/textord/ccnontextdetect.h @@ -0,0 +1,87 @@ +/////////////////////////////////////////////////////////////////////// +// File: ccnontextdetect.h +// Description: Connected-Component-based non-text detection. +// Copyright 2011 Google Inc. All Rights Reserved. +// Author: rays@google.com (Ray Smith) +// Created: Sat Jun 11 09:52:01 PST 2011 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_TEXTORD_CCPHOTODETECT_H_ +#define TESSERACT_TEXTORD_CCPHOTODETECT_H_ + +#include "blobgrid.h" +#include "scrollview.h" + +namespace tesseract { + +// The CCNonTextDetect class contains grid-based operations on blobs to create +// a full-resolution image mask analogous yet complementary to +// pixGenHalftoneMask as it is better at line-drawings, graphs and charts. +class CCNonTextDetect : public BlobGrid { + public: + CCNonTextDetect(int gridsize, const ICOORD& bleft, const ICOORD& tright); + ~CCNonTextDetect() override; + + // Creates and returns a Pix with the same resolution as the original + // in which 1 (black) pixels represent likely non text (photo, line drawing) + // areas of the page, deleting from the blob_block the blobs that were + // determined to be non-text. + // The photo_map (binary image mask) is used to bias the decision towards + // non-text, rather than supplying a definite decision. + // The blob_block is the usual result of connected component analysis, + // holding the detected blobs. + // The returned Pix should be PixDestroyed after use. + Pix* ComputeNonTextMask(bool debug, Pix* photo_map, TO_BLOCK* blob_block); + + private: + // Computes and returns the noise_density IntGrid, at the same gridsize as + // this by summing the number of small elements in a 3x3 neighbourhood of + // each grid cell. good_grid is filled with blobs that are considered most + // likely good text, and this is filled with small and medium blobs that are + // more likely non-text. + // The photo_map is used to bias the decision towards non-text, rather than + // supplying definite decision. + IntGrid* ComputeNoiseDensity(bool debug, Pix* photo_map, BlobGrid* good_grid); + + // Tests each blob in the list to see if it is certain non-text using 2 + // conditions: + // 1. blob overlaps a cell with high value in noise_density_ (previously set + // by ComputeNoiseDensity). + // OR 2. The blob overlaps more than max_blob_overlaps in *this grid. This + // condition is disabled with max_blob_overlaps == -1. + // If it does, the blob is declared non-text, and is used to mark up the + // nontext_mask. Such blobs are fully deleted, and non-noise blobs have their + // neighbours reset, as they may now point to deleted data. + // WARNING: The blobs list blobs may be in the *this grid, but they are + // not removed. If any deleted blobs might be in *this, then this must be + // Clear()ed immediately after MarkAndDeleteNonTextBlobs is called. + // If the win is not nullptr, deleted blobs are drawn on it in red, and kept + void MarkAndDeleteNonTextBlobs(BLOBNBOX_LIST* blobs, + int max_blob_overlaps, + ScrollView* win, ScrollView::Color ok_color, + Pix* nontext_mask); + // Returns true if the given blob overlaps more than max_overlaps blobs + // in the current grid. + bool BlobOverlapsTooMuch(BLOBNBOX* blob, int max_overlaps); + + // Max entry in noise_density_ before the cell is declared noisy. + int max_noise_count_; + // Completed noise density map, which we keep around to use for secondary + // noise detection. + IntGrid* noise_density_; +}; + +} // namespace tesseract. + +#endif // TESSERACT_TEXTORD_CCPHOTODETECT_H_ diff --git a/tesseract/src/textord/cjkpitch.cpp b/tesseract/src/textord/cjkpitch.cpp new file mode 100644 index 00000000..3d547396 --- /dev/null +++ b/tesseract/src/textord/cjkpitch.cpp @@ -0,0 +1,1070 @@ +/////////////////////////////////////////////////////////////////////// +// File: cjkpitch.cpp +// Description: Code to determine fixed pitchness and the pitch if fixed, +// for CJK text. +// Author: takenaka@google.com (Hiroshi Takenaka) +// +// Copyright 2011 Google Inc. All Rights Reserved. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#include "cjkpitch.h" +#include "genericvector.h" +#include "topitch.h" +#include "tovars.h" + +#include <algorithm> // for std::sort +#include <vector> // for std::vector + +namespace tesseract { + +static BOOL_VAR(textord_space_size_is_variable, false, + "If true, word delimiter spaces are assumed to have " + "variable width, even though characters have fixed pitch."); + +// Allow +/-10% error for character pitch / body size. +static const float kFPTolerance = 0.1f; + +// Minimum ratio of "good" character pitch for a row to be considered +// to be fixed-pitch. +static const float kFixedPitchThreshold = 0.35f; + +// rank statistics for a small collection of float values. +class SimpleStats { + public: + SimpleStats() = default; + ~SimpleStats() = default; + + void Clear() { + values_.clear(); + finalized_ = false; + } + + void Add(float value) { + values_.push_back(value); + finalized_ = false; + } + + void Finish() { + std::sort(values_.begin(), values_.end()); + finalized_ = true; + } + + float ile(double frac) { + if (!finalized_) Finish(); + if (values_.empty()) return 0.0f; + if (frac >= 1.0) return values_.back(); + if (frac <= 0.0 || values_.size() == 1) return values_[0]; + int index = static_cast<int>((values_.size() - 1) * frac); + float reminder = (values_.size() - 1) * frac - index; + + return values_[index] * (1.0f - reminder) + + values_[index + 1] * reminder; + } + + float median() { + return ile(0.5); + } + + float minimum() { + if (!finalized_) Finish(); + if (values_.empty()) return 0.0f; + return values_[0]; + } + + int size() const { + return values_.size(); + } + + private: + bool finalized_ = false; + std::vector<float> values_; +}; + +// statistics for a small collection of float pairs (x, y). +// EstimateYFor(x, r) returns the estimated y at x, based on +// existing samples between x*(1-r) ~ x*(1+r). +class LocalCorrelation { + public: + struct float_pair { + float x, y; + int vote; + }; + + LocalCorrelation(): finalized_(false) { } + ~LocalCorrelation() { } + + void Finish() { + values_.sort(float_pair_compare); + finalized_ = true; + } + + void Clear() { + finalized_ = false; + } + + void Add(float x, float y, int v) { + struct float_pair value; + value.x = x; + value.y = y; + value.vote = v; + values_.push_back(value); + finalized_ = false; + } + + float EstimateYFor(float x, float r) { + ASSERT_HOST(finalized_); + int start = 0, end = values_.size(); + // Because the number of samples (used_) is assumed to be small, + // just use linear search to find values within the range. + while (start < values_.size() && values_[start].x < x * (1.0 - r)) start++; + while (end - 1 >= 0 && values_[end - 1].x > x * (1.0 + r)) end--; + + // Fall back to the global average if there are no data within r + // of x. + if (start >= end) { + start = 0; + end = values_.size(); + } + + // Compute weighted average of the values. + float rc = 0; + int vote = 0; + for (int i = start; i < end; i++) { + rc += values_[i].vote * x * values_[i].y / values_[i].x; + vote += values_[i].vote; + } + + return rc / vote; + } + + private: + static int float_pair_compare(const void* a, const void* b) { + const auto* f_a = static_cast<const float_pair*>(a); + const auto* f_b = static_cast<const float_pair*>(b); + return (f_a->x > f_b->x) ? 1 : ((f_a->x < f_b->x) ? -1 : 0); + } + + bool finalized_; + GenericVector<struct float_pair> values_; +}; + +// Class to represent a character on a fixed pitch row. A FPChar may +// consist of multiple blobs (BLOBNBOX's). +class FPChar { + public: + enum Alignment { + ALIGN_UNKNOWN, ALIGN_GOOD, ALIGN_BAD + }; + + FPChar(): box_(), real_body_(), + from_(nullptr), to_(nullptr), num_blobs_(0), max_gap_(0), + final_(false), alignment_(ALIGN_UNKNOWN), + merge_to_prev_(false), delete_flag_(false) { + } + + // Initialize from blob. + void Init(BLOBNBOX *blob) { + box_ = blob->bounding_box(); + real_body_ = box_; + from_ = to_ = blob; + num_blobs_ = 1; + } + + // Merge this character with "next". The "next" character should + // consist of succeeding blobs on the same row. + void Merge(const FPChar &next) { + int gap = real_body_.x_gap(next.real_body_); + if (gap > max_gap_) max_gap_ = gap; + + box_ += next.box_; + real_body_ += next.real_body_; + to_ = next.to_; + num_blobs_ += next.num_blobs_; + } + + // Accessors. + const TBOX &box() const { return box_; } + void set_box(const TBOX &box) { + box_ = box; + } + const TBOX &real_body() const { return real_body_; } + + bool is_final() const { return final_; } + void set_final(bool flag) { + final_ = flag; + } + + const Alignment& alignment() const { + return alignment_; + } + void set_alignment(Alignment alignment) { + alignment_ = alignment; + } + + bool merge_to_prev() const { + return merge_to_prev_; + } + void set_merge_to_prev(bool flag) { + merge_to_prev_ = flag; + } + + bool delete_flag() const { + return delete_flag_; + } + void set_delete_flag(bool flag) { + delete_flag_ = flag; + } + + int max_gap() const { + return max_gap_; + } + + int num_blobs() const { + return num_blobs_; + } + + private: + TBOX box_; // Rectangle region considered to be occupied by this + // character. It could be bigger than the bounding box. + TBOX real_body_; // Real bounding box of this character. + BLOBNBOX *from_; // The first blob of this character. + BLOBNBOX *to_; // The last blob of this character. + int num_blobs_; // Number of blobs that belong to this character. + int max_gap_; // Maximum x gap between the blobs. + + bool final_; // True if alignment/fragmentation decision for this + // character is finalized. + + Alignment alignment_; // Alignment status. + bool merge_to_prev_; // True if this is a fragmented blob that + // needs to be merged to the previous + // character. + + int delete_flag_; // True if this character is merged to another + // one and needs to be deleted. +}; + +// Class to represent a fixed pitch row, as a linear collection of +// FPChar's. +class FPRow { + public: + FPRow() : all_pitches_(), all_gaps_(), good_pitches_(), good_gaps_(), + heights_(), characters_() { + } + + ~FPRow() { } + + // Initialize from TD_ROW. + void Init(TO_ROW *row); + + // Estimate character pitch of this row, based on current alignment + // status of underlying FPChar's. The argument pass1 can be set to + // true if the function is called after Pass1Analyze(), to eliminate + // some redundant computation. + void EstimatePitch(bool pass1); + + // Check each character if it has good character pitches between its + // predecessor and its successor and set its alignment status. If + // we already calculated the estimated pitch for this row, the value + // is used. If we didn't, a character is considered to be good, if + // the pitches between its predecessor and its successor are almost + // equal. + void Pass1Analyze(); + + // Find characters that fit nicely into one imaginary body next to a + // character which is already finalized. Then mark them as character + // fragments. + bool Pass2Analyze(); + + // Merge FPChars marked as character fragments into one. + void MergeFragments(); + + // Finalize characters that are already large enough and cannot be + // merged with others any more. + void FinalizeLargeChars(); + + // Output pitch estimation results to attributes of TD_ROW. + void OutputEstimations(); + + void DebugOutputResult(int row_index); + + int good_pitches() { + return good_pitches_.size(); + } + + float pitch() { + return pitch_; + } + + float estimated_pitch() { + return estimated_pitch_; + } + + void set_estimated_pitch(float v) { + estimated_pitch_ = v; + } + + float height() { + return height_; + } + + float height_pitch_ratio() { + if (good_pitches_.size() < 2) return -1.0; + return height_ / good_pitches_.median(); + } + + float gap() { + return gap_; + } + + size_t num_chars() { + return characters_.size(); + } + FPChar *character(int i) { + return &characters_[i]; + } + + const TBOX &box(int i) { + return characters_[i].box(); + } + + const TBOX &real_body(int i) { + return characters_[i].real_body(); + } + + bool is_box_modified(int i) { + return !(characters_[i].box() == characters_[i].real_body()); + } + + float center_x(int i) { + return (characters_[i].box().left() + characters_[i].box().right()) / 2.0; + } + + bool is_final(int i) { + return characters_[i].is_final(); + } + + void finalize(int i) { + characters_[i].set_final(true); + } + + bool is_good(int i) { + return characters_[i].alignment() == FPChar::ALIGN_GOOD; + } + + void mark_good(int i) { + characters_[i].set_alignment(FPChar::ALIGN_GOOD); + } + + void mark_bad(int i) { + characters_[i].set_alignment(FPChar::ALIGN_BAD); + } + + void clear_alignment(int i) { + characters_[i].set_alignment(FPChar::ALIGN_UNKNOWN); + } + + private: + static float x_overlap_fraction(const TBOX& box1, const TBOX& box2) { + if (std::min(box1.width(), box2.width()) == 0) return 0.0; + return -box1.x_gap(box2) / static_cast<float>(std::min(box1.width(), box2.width())); + } + + static bool mostly_overlap(const TBOX& box1, const TBOX& box2) { + return x_overlap_fraction(box1, box2) > 0.9; + } + + static bool significant_overlap(const TBOX& box1, const TBOX& box2) { + if (std::min(box1.width(), box2.width()) == 0) return false; + int overlap = -box1.x_gap(box2); + return overlap > 1 || x_overlap_fraction(box1, box2) > 0.1; + } + + static float box_pitch(const TBOX& ref, const TBOX& box) { + return abs(ref.left() + ref.right() - box.left() - box.right()) / 2.0; + } + + // Check if two neighboring characters satisfy the fixed pitch model. + static bool is_good_pitch(float pitch, const TBOX& box1, const TBOX& box2) { + // Character box shouldn't exceed pitch. + if (box1.width() >= pitch * (1.0 + kFPTolerance) || + box2.width() >= pitch * (1.0 + kFPTolerance) || + box1.height() >= pitch * (1.0 + kFPTolerance) || + box2.height() >= pitch * (1.0 + kFPTolerance)) return false; + + const float real_pitch = box_pitch(box1, box2); + if (fabs(real_pitch - pitch) < pitch * kFPTolerance) return true; + + if (textord_space_size_is_variable) { + // Hangul characters usually have fixed pitch, but words are + // delimited by space which can be narrower than characters. + if (real_pitch > pitch && real_pitch < pitch * 2.0 && + real_pitch - box1.x_gap(box2) < pitch) { + return true; + } + } + return false; + } + + static bool is_interesting_blob(const BLOBNBOX *blob) { + return !blob->joined_to_prev() && blob->flow() != BTFT_LEADER; + } + + // Cleanup chars that are already merged to others. + void DeleteChars() { + int index = 0; + for (int i = 0; i < characters_.size(); ++i) { + if (!characters_[i].delete_flag()) { + if (index != i) characters_[index] = characters_[i]; + index++; + } + } + characters_.truncate(index); + } + + float pitch_ = 0.0f; // Character pitch. + float estimated_pitch_ = 0.0f; // equal to pitch_ if pitch_ is considered + // to be good enough. + float height_ = 0.0f; // Character height. + float gap_ = 0.0f; // Minimum gap between characters. + + // Pitches between any two successive characters. + SimpleStats all_pitches_; + // Gaps between any two successive characters. + SimpleStats all_gaps_; + // Pitches between any two successive characters that are consistent + // with the fixed pitch model. + SimpleStats good_pitches_; + // Gaps between any two successive characters that are consistent + // with the fixed pitch model. + SimpleStats good_gaps_; + + SimpleStats heights_; + + GenericVector<FPChar> characters_; + TO_ROW *real_row_ = nullptr; // Underlying TD_ROW for this row. +}; + +void FPRow::Init(TO_ROW *row) { + ASSERT_HOST(row != nullptr); + ASSERT_HOST(row->xheight > 0); + real_row_ = row; + real_row_->pitch_decision = PITCH_CORR_PROP; // Default decision. + + BLOBNBOX_IT blob_it = row->blob_list(); + // Initialize characters_ and compute the initial estimation of + // character height. + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + if (is_interesting_blob(blob_it.data())) { + FPChar fp_char; + fp_char.Init(blob_it.data()); + // Merge unconditionally if two blobs overlap. + if (!characters_.empty() && + significant_overlap(fp_char.box(), characters_.back().box())) { + characters_.back().Merge(fp_char); + } else { + characters_.push_back(fp_char); + } + TBOX bound = blob_it.data()->bounding_box(); + if (bound.height() * 3.0 > bound.width()) { + heights_.Add(bound.height()); + } + } + } + heights_.Finish(); + height_ = heights_.ile(0.875); +} + +void FPRow::OutputEstimations() { + if (good_pitches_.size() == 0) { + pitch_ = 0.0f; + real_row_->pitch_decision = PITCH_CORR_PROP; + return; + } + + pitch_ = good_pitches_.median(); + real_row_->fixed_pitch = pitch_; + // good_gaps_.ile(0.125) can be large if most characters on the row + // are skinny. Use pitch_ - height_ instead if it's smaller, but + // positive. + real_row_->kern_size = real_row_->pr_nonsp = + std::min(good_gaps_.ile(0.125), std::max(pitch_ - height_, 0.0f)); + real_row_->body_size = pitch_ - real_row_->kern_size; + + if (good_pitches_.size() < all_pitches_.size() * kFixedPitchThreshold) { + // If more than half of the characters of a line don't fit to the + // fixed pitch model, consider the line to be proportional. 50% + // seems to be a good threshold in practice as well. + // Anyway we store estimated values (fixed_pitch, kern_size, etc.) in + // real_row_ as a partial estimation result and try to use them in the + // normalization process. + real_row_->pitch_decision = PITCH_CORR_PROP; + return; + } else if (good_pitches_.size() > all_pitches_.size() * 0.75) { + real_row_->pitch_decision = PITCH_DEF_FIXED; + } else { + real_row_->pitch_decision = PITCH_CORR_FIXED; + } + + real_row_->space_size = real_row_->pr_space = pitch_; + // Set min_space to 50% of character pitch so that we can break CJK + // text at a half-width space after punctuation. + real_row_->min_space = (pitch_ + good_gaps_.minimum()) * 0.5; + + // Don't consider a quarter space as a real space, because it's used + // for line justification in traditional Japanese books. + real_row_->max_nonspace = std::max(pitch_ * 0.25 + good_gaps_.minimum(), + static_cast<double>(good_gaps_.ile(0.875))); + + int space_threshold = + std::min((real_row_->max_nonspace + real_row_->min_space) / 2, + static_cast<int>(real_row_->xheight)); + + // Make max_nonspace larger than any intra-character gap so that + // make_prop_words() won't break a row at the middle of a character. + for (size_t i = 0; i < num_chars(); ++i) { + if (characters_[i].max_gap() > real_row_->max_nonspace) { + real_row_->max_nonspace = characters_[i].max_gap(); + } + } + real_row_->space_threshold = + std::min((real_row_->max_nonspace + real_row_->min_space) / 2, + static_cast<int>(real_row_->xheight)); + real_row_->used_dm_model = false; + + // Setup char_cells. + ICOORDELT_IT cell_it = &real_row_->char_cells; + auto *cell = new ICOORDELT(real_body(0).left(), 0); + cell_it.add_after_then_move(cell); + + int right = real_body(0).right(); + for (size_t i = 1; i < num_chars(); ++i) { + // Put a word break if gap between two characters is bigger than + // space_threshold. Don't break if none of two characters + // couldn't be "finalized", because maybe they need to be merged + // to one character. + if ((is_final(i - 1) || is_final(i)) && + real_body(i - 1).x_gap(real_body(i)) > space_threshold) { + cell = new ICOORDELT(right + 1, 0); + cell_it.add_after_then_move(cell); + while (right + pitch_ < box(i).left()) { + right += pitch_; + cell = new ICOORDELT(right + 1, 0); + cell_it.add_after_then_move(cell); + } + right = box(i).left(); + } + cell = new ICOORDELT((right + real_body(i).left()) / 2, 0); + cell_it.add_after_then_move(cell); + right = real_body(i).right(); + } + + cell = new ICOORDELT(right + 1, 0); + cell_it.add_after_then_move(cell); + + // TODO(takenaka): add code to store alignment/fragmentation + // information to blobs so that it can be reused later, e.g. in + // recognition phase. +} + +void FPRow::EstimatePitch(bool pass1) { + good_pitches_.Clear(); + all_pitches_.Clear(); + good_gaps_.Clear(); + all_gaps_.Clear(); + heights_.Clear(); + if (num_chars() == 0) return; + + int32_t cx0, cx1; + bool prev_was_good = is_good(0); + cx0 = center_x(0); + + heights_.Add(box(0).height()); + for (size_t i = 1; i < num_chars(); i++) { + cx1 = center_x(i); + int32_t pitch = cx1 - cx0; + int32_t gap = std::max(0, real_body(i - 1).x_gap(real_body(i))); + + heights_.Add(box(i).height()); + // Ignore if the pitch is too close. But don't ignore wide pitch + // may be the result of large tracking. + if (pitch > height_ * 0.5) { + all_pitches_.Add(pitch); + all_gaps_.Add(gap); + if (is_good(i)) { + // In pass1 (after Pass1Analyze()), all characters marked as + // "good" have a good consistent pitch with their previous + // characters. However, it's not true in pass2 and a good + // character may have a good pitch only between its successor. + // So we collect only pitch values between two good + // characters. and within tolerance in pass2. + if (pass1 || (prev_was_good && + fabs(estimated_pitch_ - pitch) < + kFPTolerance * estimated_pitch_)) { + good_pitches_.Add(pitch); + if (!is_box_modified(i - 1) && !is_box_modified(i)) { + good_gaps_.Add(gap); + } + } + prev_was_good = true; + } else { + prev_was_good = false; + } + } + cx0 = cx1; + } + + good_pitches_.Finish(); + all_pitches_.Finish(); + good_gaps_.Finish(); + all_gaps_.Finish(); + heights_.Finish(); + + height_ = heights_.ile(0.875); + if (all_pitches_.size() == 0) { + pitch_ = 0.0f; + gap_ = 0.0f; + } else if (good_pitches_.size() < 2) { + // We don't have enough data to estimate the pitch of this row yet. + // Use median of all pitches as the initial guess. + pitch_ = all_pitches_.median(); + ASSERT_HOST(pitch_ > 0.0f); + gap_ = all_gaps_.ile(0.125); + } else { + pitch_ = good_pitches_.median(); + ASSERT_HOST(pitch_ > 0.0f); + gap_ = good_gaps_.ile(0.125); + } +} + +void FPRow::DebugOutputResult(int row_index) { + if (num_chars() > 0) { + tprintf("Row %d: pitch_decision=%d, fixed_pitch=%f, max_nonspace=%d, " + "space_size=%f, space_threshold=%d, xheight=%f\n", + row_index, static_cast<int>(real_row_->pitch_decision), + real_row_->fixed_pitch, real_row_->max_nonspace, + real_row_->space_size, real_row_->space_threshold, + real_row_->xheight); + + for (unsigned i = 0; i < num_chars(); i++) { + tprintf("Char %u: is_final=%d is_good=%d num_blobs=%d: ", + i, is_final(i), is_good(i), character(i)->num_blobs()); + box(i).print(); + } + } +} + +void FPRow::Pass1Analyze() { + if (num_chars() < 2) return; + + if (estimated_pitch_ > 0.0f) { + for (size_t i = 2; i < num_chars(); i++) { + if (is_good_pitch(estimated_pitch_, box(i - 2), box(i-1)) && + is_good_pitch(estimated_pitch_, box(i - 1), box(i))) { + mark_good(i - 1); + } + } + } else { + for (size_t i = 2; i < num_chars(); i++) { + if (is_good_pitch(box_pitch(box(i-2), box(i-1)), box(i - 1), box(i))) { + mark_good(i - 1); + } + } + } + character(0)->set_alignment(character(1)->alignment()); + character(num_chars() - 1)->set_alignment( + character(num_chars() - 2)->alignment()); +} + +bool FPRow::Pass2Analyze() { + bool changed = false; + if (num_chars() <= 1 || estimated_pitch_ == 0.0f) { + return false; + } + for (size_t i = 0; i < num_chars(); i++) { + if (is_final(i)) continue; + + FPChar::Alignment alignment = character(i)->alignment(); + bool intersecting = false; + bool not_intersecting = false; + + if (i < num_chars() - 1 && is_final(i + 1)) { + // Next character is already finalized. Estimate the imaginary + // body including this character based on the character. Skip + // whitespace if necessary. + bool skipped_whitespaces = false; + float c1 = center_x(i + 1) - 1.5 * estimated_pitch_; + while (c1 > box(i).right()) { + skipped_whitespaces = true; + c1 -= estimated_pitch_; + } + TBOX ibody(c1, box(i).bottom(), c1 + estimated_pitch_, box(i).top()); + + // Collect all characters that mostly fit in the region. + // Also, their union height shouldn't be too big. + int j = i; + TBOX merged; + while (j >= 0 && !is_final(j) && mostly_overlap(ibody, box(j)) && + merged.bounding_union(box(j)).height() < + estimated_pitch_ * (1 + kFPTolerance)) { + merged += box(j); + j--; + } + + if (j >= 0 && significant_overlap(ibody, box(j))) { + // character(j) lies on the character boundary and doesn't fit + // well into the imaginary body. + if (!is_final(j)) intersecting = true; + } else { + not_intersecting = true; + if (i - j > 0) { + // Merge character(j+1) ... character(i) because they fit + // into the body nicely. + if (i - j == 1) { + // Only one char in the imaginary body. + if (!skipped_whitespaces) mark_good(i); + // set ibody as bounding box of this character to get + // better pitch analysis result for halfwidth glyphs + // followed by a halfwidth space. + if (box(i).width() <= estimated_pitch_ * 0.5) { + ibody += box(i); + character(i)->set_box(ibody); + } + character(i)->set_merge_to_prev(false); + finalize(i); + } else { + for (int k = i; k > j + 1; k--) { + character(k)->set_merge_to_prev(true); + } + } + } + } + } + if (i > 0 && is_final(i - 1)) { + // Now we repeat everything from the opposite side. Previous + // character is already finalized. Estimate the imaginary body + // including this character based on the character. + bool skipped_whitespaces = false; + float c1 = center_x(i - 1) + 1.5 * estimated_pitch_; + while (c1 < box(i).left()) { + skipped_whitespaces = true; + c1 += estimated_pitch_; + } + TBOX ibody(c1 - estimated_pitch_, box(i).bottom(), c1, box(i).top()); + + size_t j = i; + TBOX merged; + while (j < num_chars() && !is_final(j) && mostly_overlap(ibody, box(j)) && + merged.bounding_union(box(j)).height() < + estimated_pitch_ * (1 + kFPTolerance)) { + merged += box(j); + j++; + } + + if (j < num_chars() && significant_overlap(ibody, box(j))) { + if (!is_final(j)) intersecting = true; + } else { + not_intersecting = true; + if (j - i > 0) { + if (j - i == 1) { + if (!skipped_whitespaces) mark_good(i); + if (box(i).width() <= estimated_pitch_ * 0.5) { + ibody += box(i); + character(i)->set_box(ibody); + } + character(i)->set_merge_to_prev(false); + finalize(i); + } else { + for (size_t k = i + 1; k < j; k++) { + character(k)->set_merge_to_prev(true); + } + } + } + } + } + + // This character doesn't fit well into the estimated imaginary + // bodies. Mark it as bad. + if (intersecting && !not_intersecting) mark_bad(i); + if (character(i)->alignment() != alignment || + character(i)->merge_to_prev()) { + changed = true; + } + } + + return changed; +} + +void FPRow::MergeFragments() { + int last_char = 0; + + for (size_t j = 0; j < num_chars(); ++j) { + if (character(j)->merge_to_prev()) { + character(last_char)->Merge(*character(j)); + character(j)->set_delete_flag(true); + clear_alignment(last_char); + character(j-1)->set_merge_to_prev(false); + } else { + last_char = j; + } + } + DeleteChars(); +} + +void FPRow::FinalizeLargeChars() { + float row_pitch = estimated_pitch(); + for (size_t i = 0; i < num_chars(); i++) { + if (is_final(i)) continue; + + // Finalize if both neighbors are finalized. We have no other choice. + if (i > 0 && is_final(i - 1) && i < num_chars() - 1 && is_final(i + 1)) { + finalize(i); + continue; + } + + float cx = center_x(i); + TBOX ibody(cx - 0.5 * row_pitch, 0, cx + 0.5 * row_pitch, 1); + if (i > 0) { + // The preceding character significantly intersects with the + // imaginary body of this character. Let Pass2Analyze() handle + // this case. + if (x_overlap_fraction(ibody, box(i - 1)) > 0.1) continue; + if (!is_final(i - 1)) { + TBOX merged = box(i); + merged += box(i - 1); + if (merged.width() < row_pitch) continue; + // This character cannot be finalized yet because it can be + // merged with the previous one. Again, let Pass2Analyze() + // handle this case. + } + } + if (i < num_chars() - 1) { + if (x_overlap_fraction(ibody, box(i + 1)) > 0.1) continue; + if (!is_final(i + 1)) { + TBOX merged = box(i); + merged += box(i + 1); + if (merged.width() < row_pitch) continue; + } + } + finalize(i); + } + + // Update alignment decision. We only consider finalized characters + // in pass2. E.g. if a finalized character C has another finalized + // character L on its left and a not-finalized character R on its + // right, we mark C as good if the pitch between C and L is good, + // regardless of the pitch between C and R. + for (size_t i = 0; i < num_chars(); i++) { + if (!is_final(i)) continue; + bool good_pitch = false; + bool bad_pitch = false; + if (i > 0 && is_final(i - 1)) { + if (is_good_pitch(row_pitch, box(i - 1), box(i))) { + good_pitch = true; + } else { + bad_pitch = true; + } + } + if (i < num_chars() - 1 && is_final(i + 1)) { + if (is_good_pitch(row_pitch, box(i), box(i + 1))) { + good_pitch = true; + } else { + bad_pitch = true; + } + } + if (good_pitch && !bad_pitch) mark_good(i); + else if (!good_pitch && bad_pitch) mark_bad(i); + } +} + +class FPAnalyzer { + public: + FPAnalyzer(ICOORD page_tr, TO_BLOCK_LIST *port_blocks); + ~FPAnalyzer() { } + + void Pass1Analyze() { + for (auto & row : rows_) row.Pass1Analyze(); + } + + // Estimate character pitch for each row. The argument pass1 can be + // set to true if the function is called after Pass1Analyze(), to + // eliminate some redundant computation. + void EstimatePitch(bool pass1); + + bool maybe_fixed_pitch() { + if (rows_.empty() || + rows_.size() <= num_bad_rows_ + num_tall_rows_ + 1) return false; + return true; + } + + void MergeFragments() { + for (auto & row : rows_) row.MergeFragments(); + } + + void FinalizeLargeChars() { + for (auto & row : rows_) row.FinalizeLargeChars(); + } + + bool Pass2Analyze() { + bool changed = false; + for (auto & row : rows_) { + if (row.Pass2Analyze()) { + changed = true; + } + } + return changed; + } + + void OutputEstimations() { + for (auto & row : rows_) row.OutputEstimations(); + // Don't we need page-level estimation of gaps/spaces? + } + + void DebugOutputResult() { + tprintf("FPAnalyzer: final result\n"); + for (size_t i = 0; i < rows_.size(); i++) rows_[i].DebugOutputResult(i); + } + + size_t num_rows() { + return rows_.size(); + } + + // Returns the upper limit for pass2 loop iteration. + unsigned max_iteration() { + // We're fixing at least one character per iteration. So basically + // we shouldn't require more than max_chars_per_row_ iterations. + return max_chars_per_row_ + 100; + } + + private: + ICOORD page_tr_; + std::vector<FPRow> rows_; + unsigned num_tall_rows_; + unsigned num_bad_rows_; + // TODO: num_empty_rows_ is incremented, but never used otherwise. + unsigned num_empty_rows_; + unsigned max_chars_per_row_; +}; + +FPAnalyzer::FPAnalyzer(ICOORD page_tr, TO_BLOCK_LIST *port_blocks) +: page_tr_(page_tr), + num_tall_rows_(0), + num_bad_rows_(0), + num_empty_rows_(0), + max_chars_per_row_(0) +{ + TO_BLOCK_IT block_it(port_blocks); + + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); + block_it.forward()) { + TO_BLOCK *block = block_it.data(); + if (!block->get_rows()->empty()) { + ASSERT_HOST(block->xheight > 0); + find_repeated_chars(block, false); + } + } + + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); + block_it.forward()) { + TO_ROW_IT row_it = block_it.data()->get_rows(); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + FPRow row; + row.Init(row_it.data()); + rows_.push_back(row); + size_t num_chars = rows_.back().num_chars(); + if (num_chars <= 1) num_empty_rows_++; + if (num_chars > max_chars_per_row_) max_chars_per_row_ = num_chars; + } + } +} + +void FPAnalyzer::EstimatePitch(bool pass1) { + LocalCorrelation pitch_height_stats; + + num_tall_rows_ = 0; + num_bad_rows_ = 0; + pitch_height_stats.Clear(); + for (auto & row : rows_) { + row.EstimatePitch(pass1); + if (row.good_pitches()) { + pitch_height_stats.Add(row.height() + row.gap(), + row.pitch(), row.good_pitches()); + if (row.height_pitch_ratio() > 1.1) num_tall_rows_++; + } else { + num_bad_rows_++; + } + } + + pitch_height_stats.Finish(); + for (auto & row : rows_) { + if (row.good_pitches() >= 5) { + // We have enough evidences. Just use the pitch estimation + // from this row. + row.set_estimated_pitch(row.pitch()); + } else if (row.num_chars() > 1) { + float estimated_pitch = + pitch_height_stats.EstimateYFor(row.height() + row.gap(), + 0.1f); + // CJK characters are more likely to be fragmented than poorly + // chopped. So trust the page-level estimation of character + // pitch only if it's larger than row-level estimation or + // row-level estimation is too large (2x bigger than row height). + if (estimated_pitch > row.pitch() || + row.pitch() > row.height() * 2.0) { + row.set_estimated_pitch(estimated_pitch); + } else { + row.set_estimated_pitch(row.pitch()); + } + } + } +} + +void compute_fixed_pitch_cjk(ICOORD page_tr, + TO_BLOCK_LIST *port_blocks) { + FPAnalyzer analyzer(page_tr, port_blocks); + if (analyzer.num_rows() == 0) return; + + analyzer.Pass1Analyze(); + analyzer.EstimatePitch(true); + + // Perform pass1 analysis again with the initial estimation of row + // pitches, for better estimation. + analyzer.Pass1Analyze(); + analyzer.EstimatePitch(true); + + // Early exit if the page doesn't seem to contain fixed pitch rows. + if (!analyzer.maybe_fixed_pitch()) { + if (textord_debug_pitch_test) { + tprintf("Page doesn't seem to contain fixed pitch rows\n"); + } + return; + } + + unsigned iteration = 0; + do { + analyzer.MergeFragments(); + analyzer.FinalizeLargeChars(); + analyzer.EstimatePitch(false); + iteration++; + } while (analyzer.Pass2Analyze() && iteration < analyzer.max_iteration()); + + if (textord_debug_pitch_test) { + tprintf("compute_fixed_pitch_cjk finished after %u iteration (limit=%u)\n", + iteration, analyzer.max_iteration()); + } + + analyzer.OutputEstimations(); + if (textord_debug_pitch_test) analyzer.DebugOutputResult(); +} + +} // namespace tesseract diff --git a/tesseract/src/textord/cjkpitch.h b/tesseract/src/textord/cjkpitch.h new file mode 100644 index 00000000..d42ab79f --- /dev/null +++ b/tesseract/src/textord/cjkpitch.h @@ -0,0 +1,75 @@ +/////////////////////////////////////////////////////////////////////// +// File: cjkpitch.h +// Description: Code to determine fixed pitchness and the pitch if fixed, +// for CJK text. +// Copyright 2011 Google Inc. All Rights Reserved. +// Author: takenaka@google.com (Hiroshi Takenaka) +// Created: Mon Jun 27 12:48:35 JST 2011 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// +#ifndef CJKPITCH_H_ +#define CJKPITCH_H_ + +#include "blobbox.h" + +namespace tesseract { + +// Function to test "fixed-pitchness" of the input text and estimating +// character pitch parameters for it, based on CJK fixed-pitch layout +// model. +// +// This function assumes that a fixed-pitch CJK text has following +// characteristics: +// +// - Most glyphs are designed to fit within the same sized square +// (imaginary body). Also they are aligned to the center of their +// imaginary bodies. +// - The imaginary body is always a regular rectangle. +// - There may be some extra space between character bodies +// (tracking). +// - There may be some extra space after punctuations. +// - The text is *not* space-delimited. Thus spaces are rare. +// - Character may consists of multiple unconnected blobs. +// +// And the function works in two passes. On pass 1, it looks for such +// "good" blobs that has the pitch same pitch on the both side and +// looks like a complete CJK character. Then estimates the character +// pitch for every row, based on those good blobs. If we couldn't find +// enough good blobs for a row, then the pitch is estimated from other +// rows with similar character height instead. +// +// Pass 2 is an iterative process to fit the blobs into fixed-pitch +// character cells. Once we have estimated the character pitch, blobs +// that are almost as large as the pitch can be considered to be +// complete characters. And once we know that some characters are +// complete characters, we can estimate the region occupied by its +// neighbors. And so on. +// +// We repeat the process until all ambiguities are resolved. Then make +// the final decision about fixed-pitchness of each row and compute +// pitch and spacing parameters. +// +// (If a row is considered to be proportional, pitch_decision for the +// row is set to PITCH_CORR_PROP and the later phase +// (i.e. Textord::to_spacing()) should determine its spacing +// parameters) +// +// This function doesn't provide all information required by +// fixed_pitch_words() and the rows need to be processed with +// make_prop_words() even if they are fixed pitched. +void compute_fixed_pitch_cjk(ICOORD page_tr, // top right + TO_BLOCK_LIST *port_blocks); // input list + +} // namespace tesseract + +#endif // CJKPITCH_H_ diff --git a/tesseract/src/textord/colfind.cpp b/tesseract/src/textord/colfind.cpp new file mode 100644 index 00000000..e305a2c3 --- /dev/null +++ b/tesseract/src/textord/colfind.cpp @@ -0,0 +1,1642 @@ +/////////////////////////////////////////////////////////////////////// +// File: colfind.cpp +// Description: Class to hold BLOBNBOXs in a grid for fast access +// to neighbours. +// Author: Ray Smith +// +// (C) Copyright 2007, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +// Include automatically generated configuration file if running autoconf. +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#include "colfind.h" + +#include "ccnontextdetect.h" +#include "colpartition.h" +#include "colpartitionset.h" +#ifndef DISABLED_LEGACY_ENGINE +#include "equationdetectbase.h" +#endif +#include "linefind.h" +#include "normalis.h" +#include "strokewidth.h" +#include "blobbox.h" +#include "scrollview.h" +#include "tablefind.h" +#include "params.h" +#include "workingpartset.h" + +#include <algorithm> + +namespace tesseract { + +// When assigning columns, the max number of misfit grid rows/ColPartitionSets +// that can be ignored. +const int kMaxIncompatibleColumnCount = 2; +// Max fraction of mean_column_gap_ for the gap between two partitions within a +// column to allow them to merge. +const double kHorizontalGapMergeFraction = 0.5; +// Minimum gutter width as a fraction of gridsize +const double kMinGutterWidthGrid = 0.5; +// Max multiple of a partition's median size as a distance threshold for +// adding noise blobs. +const double kMaxDistToPartSizeRatio = 1.5; + +#ifndef GRAPHICS_DISABLED +static BOOL_VAR(textord_tabfind_show_initial_partitions, + false, "Show partition bounds"); +static BOOL_VAR(textord_tabfind_show_reject_blobs, + false, "Show blobs rejected as noise"); +static INT_VAR(textord_tabfind_show_partitions, 0, + "Show partition bounds, waiting if >1 (ScrollView)"); +static BOOL_VAR(textord_tabfind_show_columns, false, "Show column bounds (ScrollView)"); +static BOOL_VAR(textord_tabfind_show_blocks, false, "Show final block bounds (ScrollView)"); +#endif +static BOOL_VAR(textord_tabfind_find_tables, true, "run table detection"); + +#ifndef GRAPHICS_DISABLED +ScrollView* ColumnFinder::blocks_win_ = nullptr; +#endif + +// Gridsize is an estimate of the text size in the image. A suitable value +// is in TO_BLOCK::line_size after find_components has been used to make +// the blobs. +// bleft and tright are the bounds of the image (or rectangle) being processed. +// vlines is a (possibly empty) list of TabVector and vertical_x and y are +// the sum logical vertical vector produced by LineFinder::FindVerticalLines. +ColumnFinder::ColumnFinder(int gridsize, + const ICOORD& bleft, const ICOORD& tright, + int resolution, bool cjk_script, + double aligned_gap_fraction, + TabVector_LIST* vlines, TabVector_LIST* hlines, + int vertical_x, int vertical_y) + : TabFind(gridsize, bleft, tright, vlines, vertical_x, vertical_y, + resolution), + cjk_script_(cjk_script), + min_gutter_width_(static_cast<int>(kMinGutterWidthGrid * gridsize)), + mean_column_gap_(tright.x() - bleft.x()), + tabfind_aligned_gap_fraction_(aligned_gap_fraction), + deskew_(0.0f, 0.0f), + reskew_(1.0f, 0.0f), rotation_(1.0f, 0.0f), rerotate_(1.0f, 0.0f), + text_rotation_(0.0f, 0.0f), + best_columns_(nullptr), stroke_width_(nullptr), + part_grid_(gridsize, bleft, tright), nontext_map_(nullptr), + projection_(resolution), + denorm_(nullptr), input_blobs_win_(nullptr), equation_detect_(nullptr) { + TabVector_IT h_it(&horizontal_lines_); + h_it.add_list_after(hlines); +} + +ColumnFinder::~ColumnFinder() { + column_sets_.delete_data_pointers(); + delete [] best_columns_; + delete stroke_width_; + delete input_blobs_win_; + pixDestroy(&nontext_map_); + while (denorm_ != nullptr) { + DENORM* dead_denorm = denorm_; + denorm_ = const_cast<DENORM*>(denorm_->predecessor()); + delete dead_denorm; + } + + // The ColPartitions are destroyed automatically, but any boxes in + // the noise_parts_ list are owned and need to be deleted explicitly. + ColPartition_IT part_it(&noise_parts_); + for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) { + ColPartition* part = part_it.data(); + part->DeleteBoxes(); + } + // Likewise any boxes in the good_parts_ list need to be deleted. + // These are just the image parts. Text parts have already given their + // boxes on to the TO_BLOCK, and have empty lists. + part_it.set_to_list(&good_parts_); + for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) { + ColPartition* part = part_it.data(); + part->DeleteBoxes(); + } + // Also, any blobs on the image_bblobs_ list need to have their cblobs + // deleted. This only happens if there has been an early return from + // FindColumns, as in a normal return, the blobs go into the grid and + // end up in noise_parts_, good_parts_ or the output blocks. + BLOBNBOX_IT bb_it(&image_bblobs_); + for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { + BLOBNBOX* bblob = bb_it.data(); + delete bblob->cblob(); + } +} + +// Performs initial processing on the blobs in the input_block: +// Setup the part_grid, stroke_width_, nontext_map. +// Obvious noise blobs are filtered out and used to mark the nontext_map_. +// Initial stroke-width analysis is used to get local text alignment +// direction, so the textline projection_ map can be setup. +// On return, IsVerticallyAlignedText may be called (now optionally) to +// determine the gross textline alignment of the page. +void ColumnFinder::SetupAndFilterNoise(PageSegMode pageseg_mode, + Pix* photo_mask_pix, + TO_BLOCK* input_block) { + part_grid_.Init(gridsize(), bleft(), tright()); + delete stroke_width_; + stroke_width_ = new StrokeWidth(gridsize(), bleft(), tright()); + min_gutter_width_ = static_cast<int>(kMinGutterWidthGrid * gridsize()); + input_block->ReSetAndReFilterBlobs(); + #ifndef GRAPHICS_DISABLED + if (textord_tabfind_show_blocks) { + input_blobs_win_ = MakeWindow(0, 0, "Filtered Input Blobs"); + input_block->plot_graded_blobs(input_blobs_win_); + } + #endif // !GRAPHICS_DISABLED + SetBlockRuleEdges(input_block); + pixDestroy(&nontext_map_); + // Run a preliminary strokewidth neighbour detection on the medium blobs. + stroke_width_->SetNeighboursOnMediumBlobs(input_block); + CCNonTextDetect nontext_detect(gridsize(), bleft(), tright()); + // Remove obvious noise and make the initial non-text map. + nontext_map_ = nontext_detect.ComputeNonTextMask(textord_debug_tabfind, + photo_mask_pix, input_block); + stroke_width_->FindTextlineDirectionAndFixBrokenCJK(pageseg_mode, cjk_script_, + input_block); + // Clear the strokewidth grid ready for rotation or leader finding. + stroke_width_->Clear(); +} + +// Tests for vertical alignment of text (returning true if so), and generates +// a list of blobs of moderate aspect ratio, in the most frequent writing +// direction (in osd_blobs) for orientation and script detection to test +// the character orientation. +// block is the single block for the whole page or rectangle to be OCRed. +// Note that the vertical alignment may be due to text whose writing direction +// is vertical, like say Japanese, or due to text whose writing direction is +// horizontal but whose text appears vertically aligned because the image is +// not the right way up. +bool ColumnFinder::IsVerticallyAlignedText(double find_vertical_text_ratio, + TO_BLOCK* block, + BLOBNBOX_CLIST* osd_blobs) { + return stroke_width_->TestVerticalTextDirection(find_vertical_text_ratio, + block, osd_blobs); +} + +// Rotates the blobs and the TabVectors so that the gross writing direction +// (text lines) are horizontal and lines are read down the page. +// Applied rotation stored in rotation_. +// A second rotation is calculated for application during recognition to +// make the rotated blobs upright for recognition. +// Subsequent rotation stored in text_rotation_. +// +// Arguments: +// vertical_text_lines true if the text lines are vertical. +// recognition_rotation [0..3] is the number of anti-clockwise 90 degree +// rotations from osd required for the text to be upright and readable. +void ColumnFinder::CorrectOrientation(TO_BLOCK* block, + bool vertical_text_lines, + int recognition_rotation) { + const FCOORD anticlockwise90(0.0f, 1.0f); + const FCOORD clockwise90(0.0f, -1.0f); + const FCOORD rotation180(-1.0f, 0.0f); + const FCOORD norotation(1.0f, 0.0f); + + text_rotation_ = norotation; + // Rotate the page to make the text upright, as implied by + // recognition_rotation. + rotation_ = norotation; + if (recognition_rotation == 1) { + rotation_ = anticlockwise90; + } else if (recognition_rotation == 2) { + rotation_ = rotation180; + } else if (recognition_rotation == 3) { + rotation_ = clockwise90; + } + // We infer text writing direction to be vertical if there are several + // vertical text lines detected, and horizontal if not. But if the page + // orientation was determined to be 90 or 270 degrees, the true writing + // direction is the opposite of what we inferred. + if (recognition_rotation & 1) { + vertical_text_lines = !vertical_text_lines; + } + // If we still believe the writing direction is vertical, we use the + // convention of rotating the page ccw 90 degrees to make the text lines + // horizontal, and mark the blobs for rotation cw 90 degrees for + // classification so that the text order is correct after recognition. + if (vertical_text_lines) { + rotation_.rotate(anticlockwise90); + text_rotation_.rotate(clockwise90); + } + // Set rerotate_ to the inverse of rotation_. + rerotate_ = FCOORD(rotation_.x(), -rotation_.y()); + if (rotation_.x() != 1.0f || rotation_.y() != 0.0f) { + // Rotate all the blobs and tab vectors. + RotateBlobList(rotation_, &block->large_blobs); + RotateBlobList(rotation_, &block->blobs); + RotateBlobList(rotation_, &block->small_blobs); + RotateBlobList(rotation_, &block->noise_blobs); + TabFind::ResetForVerticalText(rotation_, rerotate_, &horizontal_lines_, + &min_gutter_width_); + part_grid_.Init(gridsize(), bleft(), tright()); + // Reset all blobs to initial state and filter by size. + // Since they have rotated, the list they belong on could have changed. + block->ReSetAndReFilterBlobs(); + SetBlockRuleEdges(block); + stroke_width_->CorrectForRotation(rerotate_, &part_grid_); + } + if (textord_debug_tabfind) { + tprintf("Vertical=%d, orientation=%d, final rotation=(%f, %f)+(%f,%f)\n", + vertical_text_lines, recognition_rotation, + rotation_.x(), rotation_.y(), + text_rotation_.x(), text_rotation_.y()); + } + // Setup the denormalization. + ASSERT_HOST(denorm_ == nullptr); + denorm_ = new DENORM; + denorm_->SetupNormalization(nullptr, &rotation_, nullptr, + 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f); +} + +// Finds blocks of text, image, rule line, table etc, returning them in the +// blocks and to_blocks +// (Each TO_BLOCK points to the basic BLOCK and adds more information.) +// Image blocks are generated by a combination of photo_mask_pix (which may +// NOT be nullptr) and the rejected text found during preliminary textline +// finding. +// The input_block is the result of a call to find_components, and contains +// the blobs found in the image or rectangle to be OCRed. These blobs will be +// removed and placed in the output blocks, while unused ones will be deleted. +// If single_column is true, the input is treated as single column, but +// it is still divided into blocks of equal line spacing/text size. +// scaled_color is scaled down by scaled_factor from the input color image, +// and may be nullptr if the input was not color. +// grey_pix is optional, but if present must match the photo_mask_pix in size, +// and must be a *real* grey image instead of binary_pix * 255. +// thresholds_pix is expected to be present iff grey_pix is present and +// can be an integer factor reduction of the grey_pix. It represents the +// thresholds that were used to create the binary_pix from the grey_pix. +// If diacritic_blobs is non-null, then diacritics/noise blobs, that would +// confuse layout analysis by causing textline overlap, are placed there, +// with the expectation that they will be reassigned to words later and +// noise/diacriticness determined via classification. +// Returns -1 if the user hits the 'd' key in the blocks window while running +// in debug mode, which requests a retry with more debug info. +int ColumnFinder::FindBlocks(PageSegMode pageseg_mode, Pix* scaled_color, + int scaled_factor, TO_BLOCK* input_block, + Pix* photo_mask_pix, Pix* thresholds_pix, + Pix* grey_pix, DebugPixa* pixa_debug, + BLOCK_LIST* blocks, BLOBNBOX_LIST* diacritic_blobs, + TO_BLOCK_LIST* to_blocks) { + pixOr(photo_mask_pix, photo_mask_pix, nontext_map_); + stroke_width_->FindLeaderPartitions(input_block, &part_grid_); + stroke_width_->RemoveLineResidue(&big_parts_); + FindInitialTabVectors(nullptr, min_gutter_width_, tabfind_aligned_gap_fraction_, + input_block); + SetBlockRuleEdges(input_block); + stroke_width_->GradeBlobsIntoPartitions( + pageseg_mode, rerotate_, input_block, nontext_map_, denorm_, cjk_script_, + &projection_, diacritic_blobs, &part_grid_, &big_parts_); + if (!PSM_SPARSE(pageseg_mode)) { + ImageFind::FindImagePartitions(photo_mask_pix, rotation_, rerotate_, + input_block, this, pixa_debug, &part_grid_, + &big_parts_); + ImageFind::TransferImagePartsToImageMask(rerotate_, &part_grid_, + photo_mask_pix); + ImageFind::FindImagePartitions(photo_mask_pix, rotation_, rerotate_, + input_block, this, pixa_debug, &part_grid_, + &big_parts_); + } + part_grid_.ReTypeBlobs(&image_bblobs_); + TidyBlobs(input_block); + Reset(); + // TODO(rays) need to properly handle big_parts_. + ColPartition_IT p_it(&big_parts_); + for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward()) + p_it.data()->DisownBoxesNoAssert(); + big_parts_.clear(); + delete stroke_width_; + stroke_width_ = nullptr; + // Compute the edge offsets whether or not there is a grey_pix. It is done + // here as the c_blobs haven't been touched by rotation or anything yet, + // so no denorm is required, yet the text has been separated from image, so + // no time is wasted running it on image blobs. + input_block->ComputeEdgeOffsets(thresholds_pix, grey_pix); + + // A note about handling right-to-left scripts (Hebrew/Arabic): + // The columns must be reversed and come out in right-to-left instead of + // the normal left-to-right order. Because the left-to-right ordering + // is implicit in many data structures, it is simpler to fool the algorithms + // into thinking they are dealing with left-to-right text. + // To do this, we reflect the needed data in the y-axis and then reflect + // the blocks back after they have been created. This is a temporary + // arrangement that is confined to this function only, so the reflection + // is completely invisible in the output blocks. + // The only objects reflected are: + // The vertical separator lines that have already been found; + // The bounding boxes of all BLOBNBOXES on all lists on the input_block + // plus the image_bblobs. The outlines are not touched, since they are + // not looked at. + bool input_is_rtl = input_block->block->right_to_left(); + if (input_is_rtl) { + // Reflect the vertical separator lines (member of TabFind). + ReflectInYAxis(); + // Reflect the blob boxes. + ReflectForRtl(input_block, &image_bblobs_); + part_grid_.ReflectInYAxis(); + } + + if (!PSM_SPARSE(pageseg_mode)) { + if (!PSM_COL_FIND_ENABLED(pageseg_mode)) { + // No tab stops needed. Just the grid that FindTabVectors makes. + DontFindTabVectors(&image_bblobs_, input_block, &deskew_, &reskew_); + } else { + SetBlockRuleEdges(input_block); + // Find the tab stops, estimate skew, and deskew the tabs, blobs and + // part_grid_. + FindTabVectors(&horizontal_lines_, &image_bblobs_, input_block, + min_gutter_width_, tabfind_aligned_gap_fraction_, + &part_grid_, &deskew_, &reskew_); + // Add the deskew to the denorm_. + auto* new_denorm = new DENORM; + new_denorm->SetupNormalization(nullptr, &deskew_, denorm_, + 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f); + denorm_ = new_denorm; + } + SetBlockRuleEdges(input_block); + part_grid_.SetTabStops(this); + + // Make the column_sets_. + if (!MakeColumns(false)) { + tprintf("Empty page!!\n"); + part_grid_.DeleteParts(); + return 0; // This is an empty page. + } + + // Refill the grid using rectangular spreading, and get the benefit + // of the completed tab vectors marking the rule edges of each blob. + Clear(); + #ifndef GRAPHICS_DISABLED + if (textord_tabfind_show_reject_blobs) { + ScrollView* rej_win = MakeWindow(500, 300, "Rejected blobs"); + input_block->plot_graded_blobs(rej_win); + } + #endif // !GRAPHICS_DISABLED + InsertBlobsToGrid(false, false, &image_bblobs_, this); + InsertBlobsToGrid(true, true, &input_block->blobs, this); + + part_grid_.GridFindMargins(best_columns_); + // Split and merge the partitions by looking at local neighbours. + GridSplitPartitions(); + // Resolve unknown partitions by adding to an existing partition, fixing + // the type, or declaring them noise. + part_grid_.GridFindMargins(best_columns_); + GridMergePartitions(); + // Insert any unused noise blobs that are close enough to an appropriate + // partition. + InsertRemainingNoise(input_block); + // Add horizontal line separators as partitions. + GridInsertHLinePartitions(); + GridInsertVLinePartitions(); + // Recompute margins based on a local neighbourhood search. + part_grid_.GridFindMargins(best_columns_); + SetPartitionTypes(); + } +#ifndef GRAPHICS_DISABLED + if (textord_tabfind_show_initial_partitions) { + ScrollView* part_win = MakeWindow(100, 300, "InitialPartitions"); + part_grid_.DisplayBoxes(part_win); + DisplayTabVectors(part_win); + } +#endif + if (!PSM_SPARSE(pageseg_mode)) { + #ifndef DISABLED_LEGACY_ENGINE + if (equation_detect_) { + equation_detect_->FindEquationParts(&part_grid_, best_columns_); + } + #endif + if (textord_tabfind_find_tables) { + TableFinder table_finder; + table_finder.Init(gridsize(), bleft(), tright()); + table_finder.set_resolution(resolution_); + table_finder.set_left_to_right_language( + !input_block->block->right_to_left()); + // Copy cleaned partitions from part_grid_ to clean_part_grid_ and + // insert dot-like noise into period_grid_ + table_finder.InsertCleanPartitions(&part_grid_, input_block); + // Get Table Regions + table_finder.LocateTables(&part_grid_, best_columns_, WidthCB(), reskew_); + } + GridRemoveUnderlinePartitions(); + part_grid_.DeleteUnknownParts(input_block); + + // Build the partitions into chains that belong in the same block and + // refine into one-to-one links, then smooth the types within each chain. + part_grid_.FindPartitionPartners(); + part_grid_.FindFigureCaptions(); + part_grid_.RefinePartitionPartners(true); + SmoothPartnerRuns(); + + #ifndef GRAPHICS_DISABLED + if (textord_tabfind_show_partitions) { + ScrollView* window = MakeWindow(400, 300, "Partitions"); + if (window != nullptr) { + part_grid_.DisplayBoxes(window); + if (!textord_debug_printable) + DisplayTabVectors(window); + if (window != nullptr && textord_tabfind_show_partitions > 1) { + delete window->AwaitEvent(SVET_DESTROY); + } + } + } + #endif // !GRAPHICS_DISABLED + part_grid_.AssertNoDuplicates(); + } + // Ownership of the ColPartitions moves from part_sets_ to part_grid_ here, + // and ownership of the BLOBNBOXes moves to the ColPartitions. + // (They were previously owned by the block or the image_bblobs list.) + ReleaseBlobsAndCleanupUnused(input_block); + // Ownership of the ColPartitions moves from part_grid_ to good_parts_ and + // noise_parts_ here. In text blocks, ownership of the BLOBNBOXes moves + // from the ColPartitions to the output TO_BLOCK. In non-text, the + // BLOBNBOXes stay with the ColPartitions and get deleted in the destructor. + if (PSM_SPARSE(pageseg_mode)) + part_grid_.ExtractPartitionsAsBlocks(blocks, to_blocks); + else + TransformToBlocks(blocks, to_blocks); + if (textord_debug_tabfind) { + tprintf("Found %d blocks, %d to_blocks\n", + blocks->length(), to_blocks->length()); + } + +#ifndef GRAPHICS_DISABLED + DisplayBlocks(blocks); +#endif + RotateAndReskewBlocks(input_is_rtl, to_blocks); + int result = 0; + #ifndef GRAPHICS_DISABLED + if (blocks_win_ != nullptr) { + bool waiting = false; + do { + waiting = false; + SVEvent* event = blocks_win_->AwaitEvent(SVET_ANY); + if (event->type == SVET_INPUT && event->parameter != nullptr) { + if (*event->parameter == 'd') + result = -1; + else + blocks->clear(); + } else if (event->type == SVET_DESTROY) { + blocks_win_ = nullptr; + } else { + waiting = true; + } + delete event; + } while (waiting); + } + #endif // !GRAPHICS_DISABLED + return result; +} + +// Get the rotation required to deskew, and its inverse rotation. +void ColumnFinder::GetDeskewVectors(FCOORD* deskew, FCOORD* reskew) { + *reskew = reskew_; + *deskew = reskew_; + deskew->set_y(-deskew->y()); +} + +#ifndef DISABLED_LEGACY_ENGINE +void ColumnFinder::SetEquationDetect(EquationDetectBase* detect) { + equation_detect_ = detect; +} +#endif + +//////////////// PRIVATE CODE ///////////////////////// + +#ifndef GRAPHICS_DISABLED + +// Displays the blob and block bounding boxes in a window called Blocks. +void ColumnFinder::DisplayBlocks(BLOCK_LIST* blocks) { + if (textord_tabfind_show_blocks) { + if (blocks_win_ == nullptr) + blocks_win_ = MakeWindow(700, 300, "Blocks"); + else + blocks_win_->Clear(); + DisplayBoxes(blocks_win_); + BLOCK_IT block_it(blocks); + int serial = 1; + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); + block_it.forward()) { + BLOCK* block = block_it.data(); + block->pdblk.plot(blocks_win_, serial++, + textord_debug_printable ? ScrollView::BLUE + : ScrollView::GREEN); + } + blocks_win_->Update(); + } +} + +// Displays the column edges at each grid y coordinate defined by +// best_columns_. +void ColumnFinder::DisplayColumnBounds(PartSetVector* sets) { + ScrollView* col_win = MakeWindow(50, 300, "Columns"); + DisplayBoxes(col_win); + col_win->Pen(textord_debug_printable ? ScrollView::BLUE : ScrollView::GREEN); + for (int i = 0; i < gridheight_; ++i) { + ColPartitionSet* columns = best_columns_[i]; + if (columns != nullptr) + columns->DisplayColumnEdges(i * gridsize_, (i + 1) * gridsize_, col_win); + } +} + +#endif // !GRAPHICS_DISABLED + +// Sets up column_sets_ (the determined column layout at each horizontal +// slice). Returns false if the page is empty. +bool ColumnFinder::MakeColumns(bool single_column) { + // The part_sets_ are a temporary structure used during column creation, + // and is a vector of ColPartitionSets, representing ColPartitions found + // at horizontal slices through the page. + PartSetVector part_sets; + if (!single_column) { + if (!part_grid_.MakeColPartSets(&part_sets)) + return false; // Empty page. + ASSERT_HOST(part_grid_.gridheight() == gridheight_); + // Try using only the good parts first. + bool good_only = true; + do { + for (int i = 0; i < gridheight_; ++i) { + ColPartitionSet* line_set = part_sets.get(i); + if (line_set != nullptr && line_set->LegalColumnCandidate()) { + ColPartitionSet* column_candidate = line_set->Copy(good_only); + if (column_candidate != nullptr) + column_candidate->AddToColumnSetsIfUnique(&column_sets_, WidthCB()); + } + } + good_only = !good_only; + } while (column_sets_.empty() && !good_only); + if (textord_debug_tabfind) + PrintColumnCandidates("Column candidates"); + // Improve the column candidates against themselves. + ImproveColumnCandidates(&column_sets_, &column_sets_); + if (textord_debug_tabfind) + PrintColumnCandidates("Improved columns"); + // Improve the column candidates using the part_sets_. + ImproveColumnCandidates(&part_sets, &column_sets_); + } + ColPartitionSet* single_column_set = + part_grid_.MakeSingleColumnSet(WidthCB()); + if (single_column_set != nullptr) { + // Always add the single column set as a backup even if not in + // single column mode. + single_column_set->AddToColumnSetsIfUnique(&column_sets_, WidthCB()); + } + if (textord_debug_tabfind) + PrintColumnCandidates("Final Columns"); + bool has_columns = !column_sets_.empty(); + if (has_columns) { + // Divide the page into sections of uniform column layout. + bool any_multi_column = AssignColumns(part_sets); +#ifndef GRAPHICS_DISABLED + if (textord_tabfind_show_columns) { + DisplayColumnBounds(&part_sets); + } +#endif + ComputeMeanColumnGap(any_multi_column); + } + for (int i = 0; i < part_sets.size(); ++i) { + ColPartitionSet* line_set = part_sets.get(i); + if (line_set != nullptr) { + line_set->RelinquishParts(); + delete line_set; + } + } + return has_columns; +} + +// Attempt to improve the column_candidates by expanding the columns +// and adding new partitions from the partition sets in src_sets. +// Src_sets may be equal to column_candidates, in which case it will +// use them as a source to improve themselves. +void ColumnFinder::ImproveColumnCandidates(PartSetVector* src_sets, + PartSetVector* column_sets) { + PartSetVector temp_cols; + temp_cols.move(column_sets); + if (src_sets == column_sets) + src_sets = &temp_cols; + int set_size = temp_cols.size(); + // Try using only the good parts first. + bool good_only = true; + do { + for (int i = 0; i < set_size; ++i) { + ColPartitionSet* column_candidate = temp_cols.get(i); + ASSERT_HOST(column_candidate != nullptr); + ColPartitionSet* improved = column_candidate->Copy(good_only); + if (improved != nullptr) { + improved->ImproveColumnCandidate(WidthCB(), src_sets); + improved->AddToColumnSetsIfUnique(column_sets, WidthCB()); + } + } + good_only = !good_only; + } while (column_sets->empty() && !good_only); + if (column_sets->empty()) + column_sets->move(&temp_cols); + else + temp_cols.delete_data_pointers(); +} + +// Prints debug information on the column candidates. +void ColumnFinder::PrintColumnCandidates(const char* title) { + int set_size = column_sets_.size(); + tprintf("Found %d %s:\n", set_size, title); + if (textord_debug_tabfind >= 3) { + for (int i = 0; i < set_size; ++i) { + ColPartitionSet* column_set = column_sets_.get(i); + column_set->Print(); + } + } +} + +// Finds the optimal set of columns that cover the entire image with as +// few changes in column partition as possible. +// NOTE: this could be thought of as an optimization problem, but a simple +// greedy algorithm is used instead. The algorithm repeatedly finds the modal +// compatible column in an unassigned region and uses that with the extra +// tweak of extending the modal region over small breaks in compatibility. +// Where modal regions overlap, the boundary is chosen so as to minimize +// the cost in terms of ColPartitions not fitting an approved column. +// Returns true if any part of the page is multi-column. +bool ColumnFinder::AssignColumns(const PartSetVector& part_sets) { + int set_count = part_sets.size(); + ASSERT_HOST(set_count == gridheight()); + // Allocate and init the best_columns_. + best_columns_ = new ColPartitionSet*[set_count]; + for (int y = 0; y < set_count; ++y) + best_columns_[y] = nullptr; + int column_count = column_sets_.size(); + // column_set_costs[part_sets_ index][column_sets_ index] is + // < INT32_MAX if the partition set is compatible with the column set, + // in which case its value is the cost for that set used in deciding + // which competing set to assign. + // any_columns_possible[part_sets_ index] is true if any of + // possible_column_sets[part_sets_ index][*] is < INT32_MAX. + // assigned_costs[part_sets_ index] is set to the column_set_costs + // of the assigned column_sets_ index or INT32_MAX if none is set. + // On return the best_columns_ member is set. + bool* any_columns_possible = new bool[set_count]; + int* assigned_costs = new int[set_count]; + int** column_set_costs = new int*[set_count]; + // Set possible column_sets to indicate whether each set is compatible + // with each column. + for (int part_i = 0; part_i < set_count; ++part_i) { + ColPartitionSet* line_set = part_sets.get(part_i); + bool debug = line_set != nullptr && + WithinTestRegion(2, line_set->bounding_box().left(), + line_set->bounding_box().bottom()); + column_set_costs[part_i] = new int[column_count]; + any_columns_possible[part_i] = false; + assigned_costs[part_i] = INT32_MAX; + for (int col_i = 0; col_i < column_count; ++col_i) { + if (line_set != nullptr && + column_sets_.get(col_i)->CompatibleColumns(debug, line_set, + WidthCB())) { + column_set_costs[part_i][col_i] = + column_sets_.get(col_i)->UnmatchedWidth(line_set); + any_columns_possible[part_i] = true; + } else { + column_set_costs[part_i][col_i] = INT32_MAX; + if (debug) + tprintf("Set id %d did not match at y=%d, lineset =%p\n", + col_i, part_i, line_set); + } + } + } + bool any_multi_column = false; + // Assign a column set to each vertical grid position. + // While there is an unassigned range, find its mode. + int start, end; + while (BiggestUnassignedRange(set_count, any_columns_possible, + &start, &end)) { + if (textord_debug_tabfind >= 2) + tprintf("Biggest unassigned range = %d- %d\n", start, end); + // Find the modal column_set_id in the range. + int column_set_id = RangeModalColumnSet(column_set_costs, + assigned_costs, start, end); + if (textord_debug_tabfind >= 2) { + tprintf("Range modal column id = %d\n", column_set_id); + column_sets_.get(column_set_id)->Print(); + } + // Now find the longest run of the column_set_id in the range. + ShrinkRangeToLongestRun(column_set_costs, assigned_costs, + any_columns_possible, + column_set_id, &start, &end); + if (textord_debug_tabfind >= 2) + tprintf("Shrunk range = %d- %d\n", start, end); + // Extend the start and end past the longest run, while there are + // only small gaps in compatibility that can be overcome by larger + // regions of compatibility beyond. + ExtendRangePastSmallGaps(column_set_costs, assigned_costs, + any_columns_possible, + column_set_id, -1, -1, &start); + --end; + ExtendRangePastSmallGaps(column_set_costs, assigned_costs, + any_columns_possible, + column_set_id, 1, set_count, &end); + ++end; + if (textord_debug_tabfind) + tprintf("Column id %d applies to range = %d - %d\n", + column_set_id, start, end); + // Assign the column to the range, which now may overlap with other ranges. + AssignColumnToRange(column_set_id, start, end, column_set_costs, + assigned_costs); + if (column_sets_.get(column_set_id)->GoodColumnCount() > 1) + any_multi_column = true; + } + // If anything remains unassigned, the whole lot is unassigned, so + // arbitrarily assign id 0. + if (best_columns_[0] == nullptr) { + AssignColumnToRange(0, 0, gridheight_, column_set_costs, assigned_costs); + } + // Free memory. + for (int i = 0; i < set_count; ++i) { + delete [] column_set_costs[i]; + } + delete [] assigned_costs; + delete [] any_columns_possible; + delete [] column_set_costs; + return any_multi_column; +} + +// Finds the biggest range in part_sets_ that has no assigned column, but +// column assignment is possible. +bool ColumnFinder::BiggestUnassignedRange(int set_count, + const bool* any_columns_possible, + int* best_start, int* best_end) { + int best_range_size = 0; + *best_start = set_count; + *best_end = set_count; + int end = set_count; + for (int start = 0; start < gridheight_; start = end) { + // Find the first unassigned index in start. + while (start < set_count) { + if (best_columns_[start] == nullptr && any_columns_possible[start]) + break; + ++start; + } + // Find the first past the end and count the good ones in between. + int range_size = 1; // Number of non-null, but unassigned line sets. + end = start + 1; + while (end < set_count) { + if (best_columns_[end] != nullptr) + break; + if (any_columns_possible[end]) + ++range_size; + ++end; + } + if (start < set_count && range_size > best_range_size) { + best_range_size = range_size; + *best_start = start; + *best_end = end; + } + } + return *best_start < *best_end; +} + +// Finds the modal compatible column_set_ index within the given range. +int ColumnFinder::RangeModalColumnSet(int** column_set_costs, + const int* assigned_costs, + int start, int end) { + int column_count = column_sets_.size(); + STATS column_stats(0, column_count); + for (int part_i = start; part_i < end; ++part_i) { + for (int col_j = 0; col_j < column_count; ++col_j) { + if (column_set_costs[part_i][col_j] < assigned_costs[part_i]) + column_stats.add(col_j, 1); + } + } + ASSERT_HOST(column_stats.get_total() > 0); + return column_stats.mode(); +} + +// Given that there are many column_set_id compatible columns in the range, +// shrinks the range to the longest contiguous run of compatibility, allowing +// gaps where no columns are possible, but not where competing columns are +// possible. +void ColumnFinder::ShrinkRangeToLongestRun(int** column_set_costs, + const int* assigned_costs, + const bool* any_columns_possible, + int column_set_id, + int* best_start, int* best_end) { + // orig_start and orig_end are the maximum range we will look at. + int orig_start = *best_start; + int orig_end = *best_end; + int best_range_size = 0; + *best_start = orig_end; + *best_end = orig_end; + int end = orig_end; + for (int start = orig_start; start < orig_end; start = end) { + // Find the first possible + while (start < orig_end) { + if (column_set_costs[start][column_set_id] < assigned_costs[start] || + !any_columns_possible[start]) + break; + ++start; + } + // Find the first past the end. + end = start + 1; + while (end < orig_end) { + if (column_set_costs[end][column_set_id] >= assigned_costs[start] && + any_columns_possible[end]) + break; + ++end; + } + if (start < orig_end && end - start > best_range_size) { + best_range_size = end - start; + *best_start = start; + *best_end = end; + } + } +} + +// Moves start in the direction of step, up to, but not including end while +// the only incompatible regions are no more than kMaxIncompatibleColumnCount +// in size, and the compatible regions beyond are bigger. +void ColumnFinder::ExtendRangePastSmallGaps(int** column_set_costs, + const int* assigned_costs, + const bool* any_columns_possible, + int column_set_id, + int step, int end, int* start) { + if (textord_debug_tabfind > 2) + tprintf("Starting expansion at %d, step=%d, limit=%d\n", + *start, step, end); + if (*start == end) + return; // Cannot be expanded. + + int barrier_size = 0; + int good_size = 0; + do { + // Find the size of the incompatible barrier. + barrier_size = 0; + int i; + for (i = *start + step; i != end; i += step) { + if (column_set_costs[i][column_set_id] < assigned_costs[i]) + break; // We are back on. + // Locations where none are possible don't count. + if (any_columns_possible[i]) + ++barrier_size; + } + if (textord_debug_tabfind > 2) + tprintf("At %d, Barrier size=%d\n", i, barrier_size); + if (barrier_size > kMaxIncompatibleColumnCount) + return; // Barrier too big. + if (i == end) { + // We can't go any further, but the barrier was small, so go to the end. + *start = i - step; + return; + } + // Now find the size of the good region on the other side. + good_size = 1; + for (i += step; i != end; i += step) { + if (column_set_costs[i][column_set_id] < assigned_costs[i]) + ++good_size; + else if (any_columns_possible[i]) + break; + } + if (textord_debug_tabfind > 2) + tprintf("At %d, good size = %d\n", i, good_size); + // If we had enough good ones we can extend the start and keep looking. + if (good_size >= barrier_size) + *start = i - step; + } while (good_size >= barrier_size); +} + +// Assigns the given column_set_id to the given range. +void ColumnFinder::AssignColumnToRange(int column_set_id, int start, int end, + int** column_set_costs, + int* assigned_costs) { + ColPartitionSet* column_set = column_sets_.get(column_set_id); + for (int i = start; i < end; ++i) { + assigned_costs[i] = column_set_costs[i][column_set_id]; + best_columns_[i] = column_set; + } +} + +// Computes the mean_column_gap_. +void ColumnFinder::ComputeMeanColumnGap(bool any_multi_column) { + int total_gap = 0; + int total_width = 0; + int gap_samples = 0; + int width_samples = 0; + for (int i = 0; i < gridheight_; ++i) { + ASSERT_HOST(best_columns_[i] != nullptr); + best_columns_[i]->AccumulateColumnWidthsAndGaps(&total_width, + &width_samples, + &total_gap, + &gap_samples); + } + mean_column_gap_ = any_multi_column && gap_samples > 0 + ? total_gap / gap_samples : width_samples > 0 + ? total_width / width_samples : 0; +} + +//////// Functions that manipulate ColPartitions in the part_grid_ ///// +//////// to split, merge, find margins, and find types. ////////////// + +// Helper to delete all the deletable blobs on the list. Owned blobs are +// extracted from the list, but not deleted, leaving them owned by the owner(). +static void ReleaseAllBlobsAndDeleteUnused(BLOBNBOX_LIST* blobs) { + for (BLOBNBOX_IT blob_it(blobs); !blob_it.empty(); blob_it.forward()) { + BLOBNBOX* blob = blob_it.extract(); + if (blob->owner() == nullptr) { + delete blob->cblob(); + delete blob; + } + } +} + +// Hoovers up all un-owned blobs and deletes them. +// The rest get released from the block so the ColPartitions can pass +// ownership to the output blocks. +void ColumnFinder::ReleaseBlobsAndCleanupUnused(TO_BLOCK* block) { + ReleaseAllBlobsAndDeleteUnused(&block->blobs); + ReleaseAllBlobsAndDeleteUnused(&block->small_blobs); + ReleaseAllBlobsAndDeleteUnused(&block->noise_blobs); + ReleaseAllBlobsAndDeleteUnused(&block->large_blobs); + ReleaseAllBlobsAndDeleteUnused(&image_bblobs_); +} + +// Splits partitions that cross columns where they have nothing in the gap. +void ColumnFinder::GridSplitPartitions() { + // Iterate the ColPartitions in the grid. + GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> + gsearch(&part_grid_); + gsearch.StartFullSearch(); + ColPartition* dont_repeat = nullptr; + ColPartition* part; + while ((part = gsearch.NextFullSearch()) != nullptr) { + if (part->blob_type() < BRT_UNKNOWN || part == dont_repeat) + continue; // Only applies to text partitions. + ColPartitionSet* column_set = best_columns_[gsearch.GridY()]; + int first_col = -1; + int last_col = -1; + // Find which columns the partition spans. + part->ColumnRange(resolution_, column_set, &first_col, &last_col); + if (first_col > 0) + --first_col; + // Convert output column indices to physical column indices. + first_col /= 2; + last_col /= 2; + // We will only consider cases where a partition spans two columns, + // since a heading that spans more columns than that is most likely + // genuine. + if (last_col != first_col + 1) + continue; + // Set up a rectangle search x-bounded by the column gap and y by the part. + int y = part->MidY(); + TBOX margin_box = part->bounding_box(); + bool debug = AlignedBlob::WithinTestRegion(2, margin_box.left(), + margin_box.bottom()); + if (debug) { + tprintf("Considering partition for GridSplit:"); + part->Print(); + } + ColPartition* column = column_set->GetColumnByIndex(first_col); + if (column == nullptr) + continue; + margin_box.set_left(column->RightAtY(y) + 2); + column = column_set->GetColumnByIndex(last_col); + if (column == nullptr) + continue; + margin_box.set_right(column->LeftAtY(y) - 2); + // TODO(rays) Decide whether to keep rectangular filling or not in the + // main grid and therefore whether we need a fancier search here. + // Now run the rect search on the main blob grid. + GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> rectsearch(this); + if (debug) { + tprintf("Searching box (%d,%d)->(%d,%d)\n", + margin_box.left(), margin_box.bottom(), + margin_box.right(), margin_box.top()); + part->Print(); + } + rectsearch.StartRectSearch(margin_box); + BLOBNBOX* bbox; + while ((bbox = rectsearch.NextRectSearch()) != nullptr) { + if (bbox->bounding_box().overlap(margin_box)) + break; + } + if (bbox == nullptr) { + // There seems to be nothing in the hole, so split the partition. + gsearch.RemoveBBox(); + int x_middle = (margin_box.left() + margin_box.right()) / 2; + if (debug) { + tprintf("Splitting part at %d:", x_middle); + part->Print(); + } + ColPartition* split_part = part->SplitAt(x_middle); + if (split_part != nullptr) { + if (debug) { + tprintf("Split result:"); + part->Print(); + split_part->Print(); + } + part_grid_.InsertBBox(true, true, split_part); + } else { + // Split had no effect + if (debug) + tprintf("Split had no effect\n"); + dont_repeat = part; + } + part_grid_.InsertBBox(true, true, part); + gsearch.RepositionIterator(); + } else if (debug) { + tprintf("Part cannot be split: blob (%d,%d)->(%d,%d) in column gap\n", + bbox->bounding_box().left(), bbox->bounding_box().bottom(), + bbox->bounding_box().right(), bbox->bounding_box().top()); + } + } +} + +// Merges partitions where there is vertical overlap, within a single column, +// and the horizontal gap is small enough. +void ColumnFinder::GridMergePartitions() { + // Iterate the ColPartitions in the grid. + GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> + gsearch(&part_grid_); + gsearch.StartFullSearch(); + ColPartition* part; + while ((part = gsearch.NextFullSearch()) != nullptr) { + if (part->IsUnMergeableType()) + continue; + // Set up a rectangle search x-bounded by the column and y by the part. + ColPartitionSet* columns = best_columns_[gsearch.GridY()]; + TBOX box = part->bounding_box(); + bool debug = AlignedBlob::WithinTestRegion(1, box.left(), box.bottom()); + if (debug) { + tprintf("Considering part for merge at:"); + part->Print(); + } + int y = part->MidY(); + ColPartition* left_column = columns->ColumnContaining(box.left(), y); + ColPartition* right_column = columns->ColumnContaining(box.right(), y); + if (left_column == nullptr || right_column != left_column) { + if (debug) + tprintf("In different columns\n"); + continue; + } + box.set_left(left_column->LeftAtY(y)); + box.set_right(right_column->RightAtY(y)); + // Now run the rect search. + bool modified_box = false; + GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> + rsearch(&part_grid_); + rsearch.SetUniqueMode(true); + rsearch.StartRectSearch(box); + ColPartition* neighbour; + + while ((neighbour = rsearch.NextRectSearch()) != nullptr) { + if (neighbour == part || neighbour->IsUnMergeableType()) + continue; + const TBOX& neighbour_box = neighbour->bounding_box(); + if (debug) { + tprintf("Considering merge with neighbour at:"); + neighbour->Print(); + } + if (neighbour_box.right() < box.left() || + neighbour_box.left() > box.right()) + continue; // Not within the same column. + if (part->VSignificantCoreOverlap(*neighbour) && + part->TypesMatch(*neighbour)) { + // There is vertical overlap and the gross types match, but only + // merge if the horizontal gap is small enough, as one of the + // partitions may be a figure caption within a column. + // If there is only one column, then the mean_column_gap_ is large + // enough to allow almost any merge, by being the mean column width. + const TBOX& part_box = part->bounding_box(); + // Don't merge if there is something else in the way. Use the margin + // to decide, and check both to allow a bit of overlap. + if (neighbour_box.left() > part->right_margin() && + part_box.right() < neighbour->left_margin()) + continue; // Neighbour is too far to the right. + if (neighbour_box.right() < part->left_margin() && + part_box.left() > neighbour->right_margin()) + continue; // Neighbour is too far to the left. + int h_gap = std::max(part_box.left(), neighbour_box.left()) - + std::min(part_box.right(), neighbour_box.right()); + if (h_gap < mean_column_gap_ * kHorizontalGapMergeFraction || + part_box.width() < mean_column_gap_ || + neighbour_box.width() < mean_column_gap_) { + if (debug) { + tprintf("Running grid-based merge between:\n"); + part->Print(); + neighbour->Print(); + } + rsearch.RemoveBBox(); + if (!modified_box) { + // We are going to modify part, so remove it and re-insert it after. + gsearch.RemoveBBox(); + rsearch.RepositionIterator(); + modified_box = true; + } + part->Absorb(neighbour, WidthCB()); + } else if (debug) { + tprintf("Neighbour failed hgap test\n"); + } + } else if (debug) { + tprintf("Neighbour failed overlap or typesmatch test\n"); + } + } + if (modified_box) { + // We modified the box of part, so re-insert it into the grid. + // This does no harm in the current cell, as it already exists there, + // but it needs to exist in all the cells covered by its bounding box, + // or it will never be found by a full search. + // Because the box has changed, it has to be removed first, otherwise + // add_sorted may fail to keep a single copy of the pointer. + part_grid_.InsertBBox(true, true, part); + gsearch.RepositionIterator(); + } + } +} + +// Inserts remaining noise blobs into the most applicable partition if any. +// If there is no applicable partition, then the blobs are deleted. +void ColumnFinder::InsertRemainingNoise(TO_BLOCK* block) { + BLOBNBOX_IT blob_it(&block->noise_blobs); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + BLOBNBOX* blob = blob_it.data(); + if (blob->owner() != nullptr) continue; + TBOX search_box(blob->bounding_box()); + bool debug = WithinTestRegion(2, search_box.left(), search_box.bottom()); + search_box.pad(gridsize(), gridsize()); + // Setup a rectangle search to find the best partition to merge with. + ColPartitionGridSearch rsearch(&part_grid_); + rsearch.SetUniqueMode(true); + rsearch.StartRectSearch(search_box); + ColPartition* part; + ColPartition* best_part = nullptr; + int best_distance = 0; + while ((part = rsearch.NextRectSearch()) != nullptr) { + if (part->IsUnMergeableType()) + continue; + int distance = projection_.DistanceOfBoxFromPartition( + blob->bounding_box(), *part, denorm_, debug); + if (best_part == nullptr || distance < best_distance) { + best_part = part; + best_distance = distance; + } + } + if (best_part != nullptr && + best_distance < kMaxDistToPartSizeRatio * best_part->median_height()) { + // Close enough to merge. + if (debug) { + tprintf("Adding noise blob with distance %d, thr=%g:box:", + best_distance, + kMaxDistToPartSizeRatio * best_part->median_height()); + blob->bounding_box().print(); + tprintf("To partition:"); + best_part->Print(); + } + part_grid_.RemoveBBox(best_part); + best_part->AddBox(blob); + part_grid_.InsertBBox(true, true, best_part); + blob->set_owner(best_part); + blob->set_flow(best_part->flow()); + blob->set_region_type(best_part->blob_type()); + } else { + // Mark the blob for deletion. + blob->set_region_type(BRT_NOISE); + } + } + // Delete the marked blobs, clearing neighbour references. + block->DeleteUnownedNoise(); +} + +// Helper makes a box from a horizontal line. +static TBOX BoxFromHLine(const TabVector* hline) { + int top = std::max(hline->startpt().y(), hline->endpt().y()); + int bottom = std::min(hline->startpt().y(), hline->endpt().y()); + top += hline->mean_width(); + if (top == bottom) { + if (bottom > 0) + --bottom; + else + ++top; + } + return TBOX(hline->startpt().x(), bottom, hline->endpt().x(), top); +} + +// Remove partitions that come from horizontal lines that look like +// underlines, but are not part of a table. +void ColumnFinder::GridRemoveUnderlinePartitions() { + TabVector_IT hline_it(&horizontal_lines_); + for (hline_it.mark_cycle_pt(); !hline_it.cycled_list(); hline_it.forward()) { + TabVector* hline = hline_it.data(); + if (hline->intersects_other_lines()) + continue; + TBOX line_box = BoxFromHLine(hline); + TBOX search_box = line_box; + search_box.pad(0, line_box.height()); + ColPartitionGridSearch part_search(&part_grid_); + part_search.SetUniqueMode(true); + part_search.StartRectSearch(search_box); + ColPartition* covered; + bool touched_table = false; + bool touched_text = false; + ColPartition* line_part = nullptr; + while ((covered = part_search.NextRectSearch()) != nullptr) { + if (covered->type() == PT_TABLE) { + touched_table = true; + break; + } else if (covered->IsTextType()) { + // TODO(rays) Add a list of underline sections to ColPartition. + int text_bottom = covered->median_bottom(); + if (line_box.bottom() <= text_bottom && text_bottom <= search_box.top()) + touched_text = true; + } else if (covered->blob_type() == BRT_HLINE && + line_box.contains(covered->bounding_box()) && + // not if same instance (identical to hline) + !TBOX(covered->bounding_box()).contains(line_box)) { + line_part = covered; + } + } + if (line_part != nullptr && !touched_table && touched_text) { + part_grid_.RemoveBBox(line_part); + delete line_part; + } + } +} + +// Add horizontal line separators as partitions. +void ColumnFinder::GridInsertHLinePartitions() { + TabVector_IT hline_it(&horizontal_lines_); + for (hline_it.mark_cycle_pt(); !hline_it.cycled_list(); hline_it.forward()) { + TabVector* hline = hline_it.data(); + TBOX line_box = BoxFromHLine(hline); + ColPartition* part = ColPartition::MakeLinePartition( + BRT_HLINE, vertical_skew_, + line_box.left(), line_box.bottom(), line_box.right(), line_box.top()); + part->set_type(PT_HORZ_LINE); + bool any_image = false; + ColPartitionGridSearch part_search(&part_grid_); + part_search.SetUniqueMode(true); + part_search.StartRectSearch(line_box); + ColPartition* covered; + while ((covered = part_search.NextRectSearch()) != nullptr) { + if (covered->IsImageType()) { + any_image = true; + break; + } + } + if (!any_image) + part_grid_.InsertBBox(true, true, part); + else + delete part; + } +} + +// Add horizontal line separators as partitions. +void ColumnFinder::GridInsertVLinePartitions() { + TabVector_IT vline_it(dead_vectors()); + for (vline_it.mark_cycle_pt(); !vline_it.cycled_list(); vline_it.forward()) { + TabVector* vline = vline_it.data(); + if (!vline->IsSeparator()) + continue; + int left = std::min(vline->startpt().x(), vline->endpt().x()); + int right = std::max(vline->startpt().x(), vline->endpt().x()); + right += vline->mean_width(); + if (left == right) { + if (left > 0) + --left; + else + ++right; + } + ColPartition* part = ColPartition::MakeLinePartition( + BRT_VLINE, vertical_skew_, + left, vline->startpt().y(), right, vline->endpt().y()); + part->set_type(PT_VERT_LINE); + bool any_image = false; + ColPartitionGridSearch part_search(&part_grid_); + part_search.SetUniqueMode(true); + part_search.StartRectSearch(part->bounding_box()); + ColPartition* covered; + while ((covered = part_search.NextRectSearch()) != nullptr) { + if (covered->IsImageType()) { + any_image = true; + break; + } + } + if (!any_image) + part_grid_.InsertBBox(true, true, part); + else + delete part; + } +} + +// For every ColPartition in the grid, sets its type based on position +// in the columns. +void ColumnFinder::SetPartitionTypes() { + GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> + gsearch(&part_grid_); + gsearch.StartFullSearch(); + ColPartition* part; + while ((part = gsearch.NextFullSearch()) != nullptr) { + part->SetPartitionType(resolution_, best_columns_[gsearch.GridY()]); + } +} + +// Only images remain with multiple types in a run of partners. +// Sets the type of all in the group to the maximum of the group. +void ColumnFinder::SmoothPartnerRuns() { + // Iterate the ColPartitions in the grid. + GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> + gsearch(&part_grid_); + gsearch.StartFullSearch(); + ColPartition* part; + while ((part = gsearch.NextFullSearch()) != nullptr) { + ColPartition* partner = part->SingletonPartner(true); + if (partner != nullptr) { + if (partner->SingletonPartner(false) != part) { + tprintf("Ooops! Partition:(%d partners)", + part->upper_partners()->length()); + part->Print(); + tprintf("has singleton partner:(%d partners", + partner->lower_partners()->length()); + partner->Print(); + tprintf("but its singleton partner is:"); + if (partner->SingletonPartner(false) == nullptr) + tprintf("NULL\n"); + else + partner->SingletonPartner(false)->Print(); + } + ASSERT_HOST(partner->SingletonPartner(false) == part); + } else if (part->SingletonPartner(false) != nullptr) { + ColPartitionSet* column_set = best_columns_[gsearch.GridY()]; + int column_count = column_set->ColumnCount(); + part->SmoothPartnerRun(column_count * 2 + 1); + } + } +} + +// Helper functions for TransformToBlocks. +// Add the part to the temp list in the correct order. +void ColumnFinder::AddToTempPartList(ColPartition* part, + ColPartition_CLIST* temp_list) { + int mid_y = part->MidY(); + ColPartition_C_IT it(temp_list); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + ColPartition* test_part = it.data(); + if (part->type() == PT_NOISE || test_part->type() == PT_NOISE) + continue; // Noise stays in sequence. + if (test_part == part->SingletonPartner(false)) + break; // Insert before its lower partner. + int neighbour_bottom = test_part->median_bottom(); + int neighbour_top = test_part->median_top(); + int neighbour_y = (neighbour_bottom + neighbour_top) / 2; + if (neighbour_y < mid_y) + break; // part is above test_part so insert it. + if (!part->HOverlaps(*test_part) && !part->WithinSameMargins(*test_part)) + continue; // Incompatibles stay in order + } + if (it.cycled_list()) { + it.add_to_end(part); + } else { + it.add_before_stay_put(part); + } +} + +// Add everything from the temp list to the work_set assuming correct order. +void ColumnFinder::EmptyTempPartList(ColPartition_CLIST* temp_list, + WorkingPartSet_LIST* work_set) { + ColPartition_C_IT it(temp_list); + while (!it.empty()) { + it.extract()->AddToWorkingSet(bleft_, tright_, resolution_, + &good_parts_, work_set); + it.forward(); + } +} + +// Transform the grid of partitions to the output blocks. +void ColumnFinder::TransformToBlocks(BLOCK_LIST* blocks, + TO_BLOCK_LIST* to_blocks) { + WorkingPartSet_LIST work_set; + ColPartitionSet* column_set = nullptr; + ColPartition_IT noise_it(&noise_parts_); + // The temp_part_list holds a list of parts at the same grid y coord + // so they can be added in the correct order. This prevents thin objects + // like horizontal lines going before the text lines above them. + ColPartition_CLIST temp_part_list; + // Iterate the ColPartitions in the grid. It starts at the top + GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> + gsearch(&part_grid_); + gsearch.StartFullSearch(); + int prev_grid_y = -1; + ColPartition* part; + while ((part = gsearch.NextFullSearch()) != nullptr) { + int grid_y = gsearch.GridY(); + if (grid_y != prev_grid_y) { + EmptyTempPartList(&temp_part_list, &work_set); + prev_grid_y = grid_y; + } + if (best_columns_[grid_y] != column_set) { + column_set = best_columns_[grid_y]; + // Every line should have a non-null best column. + ASSERT_HOST(column_set != nullptr); + column_set->ChangeWorkColumns(bleft_, tright_, resolution_, + &good_parts_, &work_set); + if (textord_debug_tabfind) + tprintf("Changed column groups at grid index %d, y=%d\n", + gsearch.GridY(), gsearch.GridY() * gridsize()); + } + if (part->type() == PT_NOISE) { + noise_it.add_to_end(part); + } else { + AddToTempPartList(part, &temp_part_list); + } + } + EmptyTempPartList(&temp_part_list, &work_set); + // Now finish all working sets and transfer ColPartitionSets to block_sets. + WorkingPartSet_IT work_it(&work_set); + while (!work_it.empty()) { + WorkingPartSet* working_set = work_it.extract(); + working_set->ExtractCompletedBlocks(bleft_, tright_, resolution_, + &good_parts_, blocks, to_blocks); + delete working_set; + work_it.forward(); + } +} + +// Helper reflects a list of blobs in the y-axis. +// Only reflects the BLOBNBOX bounding box. Not the blobs or outlines below. +static void ReflectBlobList(BLOBNBOX_LIST* bblobs) { + BLOBNBOX_IT it(bblobs); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + it.data()->reflect_box_in_y_axis(); + } +} + +// Reflect the blob boxes (but not the outlines) in the y-axis so that +// the blocks get created in the correct RTL order. Reflects the blobs +// in the input_block and the bblobs list. +// The reflection is undone in RotateAndReskewBlocks by +// reflecting the blocks themselves, and then recomputing the blob bounding +// boxes. +void ColumnFinder::ReflectForRtl(TO_BLOCK* input_block, BLOBNBOX_LIST* bblobs) { + ReflectBlobList(bblobs); + ReflectBlobList(&input_block->blobs); + ReflectBlobList(&input_block->small_blobs); + ReflectBlobList(&input_block->noise_blobs); + ReflectBlobList(&input_block->large_blobs); + // Update the denorm with the reflection. + auto* new_denorm = new DENORM; + new_denorm->SetupNormalization(nullptr, nullptr, denorm_, + 0.0f, 0.0f, -1.0f, 1.0f, 0.0f, 0.0f); + denorm_ = new_denorm; +} + +// Helper fixes up blobs and cblobs to match the desired rotation, +// exploding multi-outline blobs back to single blobs and accumulating +// the bounding box widths and heights. +static void RotateAndExplodeBlobList(const FCOORD& blob_rotation, + BLOBNBOX_LIST* bblobs, + STATS* widths, + STATS* heights) { + BLOBNBOX_IT it(bblobs); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + BLOBNBOX* blob = it.data(); + C_BLOB* cblob = blob->cblob(); + C_OUTLINE_LIST* outlines = cblob->out_list(); + C_OUTLINE_IT ol_it(outlines); + if (!outlines->singleton()) { + // This blob has multiple outlines from CJK repair. + // Explode the blob back into individual outlines. + for (;!ol_it.empty(); ol_it.forward()) { + C_OUTLINE* outline = ol_it.extract(); + BLOBNBOX* new_blob = BLOBNBOX::RealBlob(outline); + // This blob will be revisited later since we add_after_stay_put here. + // This means it will get rotated and have its width/height added to + // the stats below. + it.add_after_stay_put(new_blob); + } + it.extract(); + delete cblob; + delete blob; + } else { + if (blob_rotation.x() != 1.0f || blob_rotation.y() != 0.0f) { + cblob->rotate(blob_rotation); + } + blob->compute_bounding_box(); + widths->add(blob->bounding_box().width(), 1); + heights->add(blob->bounding_box().height(), 1); + } + } +} + +// Undo the deskew that was done in FindTabVectors, as recognition is done +// without correcting blobs or blob outlines for skew. +// Reskew the completed blocks to put them back to the original rotated coords +// that were created by CorrectOrientation. +// If the input_is_rtl, then reflect the blocks in the y-axis to undo the +// reflection that was done before FindTabVectors. +// Blocks that were identified as vertical text (relative to the rotated +// coordinates) are further rotated so the text lines are horizontal. +// blob polygonal outlines are rotated to match the position of the blocks +// that they are in, and their bounding boxes are recalculated to be accurate. +// Record appropriate inverse transformations and required +// classifier transformation in the blocks. +void ColumnFinder::RotateAndReskewBlocks(bool input_is_rtl, + TO_BLOCK_LIST* blocks) { + if (input_is_rtl) { + // The skew is backwards because of the reflection. + FCOORD tmp = deskew_; + deskew_ = reskew_; + reskew_ = tmp; + } + TO_BLOCK_IT it(blocks); + int block_index = 1; + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + TO_BLOCK* to_block = it.data(); + BLOCK* block = to_block->block; + // Blocks are created on the deskewed blob outlines in TransformToBlocks() + // so we need to reskew them back to page coordinates. + if (input_is_rtl) { + block->reflect_polygon_in_y_axis(); + } + block->rotate(reskew_); + // Copy the right_to_left flag to the created block. + block->set_right_to_left(input_is_rtl); + // Save the skew angle in the block for baseline computations. + block->set_skew(reskew_); + block->pdblk.set_index(block_index++); + FCOORD blob_rotation = ComputeBlockAndClassifyRotation(block); + // Rotate all the blobs if needed and recompute the bounding boxes. + // Compute the block median blob width and height as we go. + STATS widths(0, block->pdblk.bounding_box().width()); + STATS heights(0, block->pdblk.bounding_box().height()); + RotateAndExplodeBlobList(blob_rotation, &to_block->blobs, + &widths, &heights); + TO_ROW_IT row_it(to_block->get_rows()); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + TO_ROW* row = row_it.data(); + RotateAndExplodeBlobList(blob_rotation, row->blob_list(), + &widths, &heights); + } + block->set_median_size(static_cast<int>(widths.median() + 0.5), + static_cast<int>(heights.median() + 0.5)); + if (textord_debug_tabfind >= 2) + tprintf("Block median size = (%d, %d)\n", + block->median_size().x(), block->median_size().y()); + } +} + +// Computes the rotations for the block (to make textlines horizontal) and +// for the blobs (for classification) and sets the appropriate members +// of the given block. +// Returns the rotation that needs to be applied to the blobs to make +// them sit in the rotated block. +FCOORD ColumnFinder::ComputeBlockAndClassifyRotation(BLOCK* block) { + // The text_rotation_ tells us the gross page text rotation that needs + // to be applied for classification + // TODO(rays) find block-level classify rotation by orientation detection. + // In the mean time, assume that "up" for text printed in the minority + // direction (PT_VERTICAL_TEXT) is perpendicular to the line of reading. + // Accomplish this by zero-ing out the text rotation. This covers the + // common cases of image credits in documents written in Latin scripts + // and page headings for predominantly vertically written CJK books. + FCOORD classify_rotation(text_rotation_); + FCOORD block_rotation(1.0f, 0.0f); + if (block->pdblk.poly_block()->isA() == PT_VERTICAL_TEXT) { + // Vertical text needs to be 90 degrees rotated relative to the rest. + // If the rest has a 90 degree rotation already, use the inverse, making + // the vertical text the original way up. Otherwise use 90 degrees + // clockwise. + if (rerotate_.x() == 0.0f) + block_rotation = rerotate_; + else + block_rotation = FCOORD(0.0f, -1.0f); + block->rotate(block_rotation); + classify_rotation = FCOORD(1.0f, 0.0f); + } + block_rotation.rotate(rotation_); + // block_rotation is now what we have done to the blocks. Now do the same + // thing to the blobs, but save the inverse rotation in the block, as that + // is what we need to DENORM back to the image coordinates. + FCOORD blob_rotation(block_rotation); + block_rotation.set_y(-block_rotation.y()); + block->set_re_rotation(block_rotation); + block->set_classify_rotation(classify_rotation); + if (textord_debug_tabfind) { + tprintf("Blk %d, type %d rerotation(%.2f, %.2f), char(%.2f,%.2f), box:", + block->pdblk.index(), block->pdblk.poly_block()->isA(), + block->re_rotation().x(), block->re_rotation().y(), + classify_rotation.x(), classify_rotation.y()); + block->pdblk.bounding_box().print(); + } + return blob_rotation; +} + +} // namespace tesseract. diff --git a/tesseract/src/textord/colfind.h b/tesseract/src/textord/colfind.h new file mode 100644 index 00000000..b7d5b672 --- /dev/null +++ b/tesseract/src/textord/colfind.h @@ -0,0 +1,366 @@ +/////////////////////////////////////////////////////////////////////// +// File: colfind.h +// Description: Class to find columns in the grid of BLOBNBOXes. +// Author: Ray Smith +// +// (C) Copyright 2008, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_TEXTORD_COLFIND_H_ +#define TESSERACT_TEXTORD_COLFIND_H_ + +#include "colpartitiongrid.h" +#include "colpartitionset.h" +#include "debugpixa.h" +#include "imagefind.h" +#include "ocrblock.h" +#include "tabfind.h" +#include "textlineprojection.h" + +class BLOCK_LIST; +struct Boxa; +struct Pixa; +class DENORM; +class ScrollView; +class STATS; +class TO_BLOCK; + +namespace tesseract { + +class ColPartitionSet; +class ColPartitionSet_LIST; +class ColSegment_LIST; +class ColumnGroup_LIST; +class LineSpacing; +class StrokeWidth; +class TempColumn_LIST; +class EquationDetectBase; + +// The ColumnFinder class finds columns in the grid. +class TESS_API ColumnFinder : public TabFind { + public: + // Gridsize is an estimate of the text size in the image. A suitable value + // is in TO_BLOCK::line_size after find_components has been used to make + // the blobs. + // bleft and tright are the bounds of the image (rectangle) being processed. + // vlines is a (possibly empty) list of TabVector and vertical_x and y are + // the sum logical vertical vector produced by LineFinder::FindVerticalLines. + // If cjk_script is true, then broken CJK characters are fixed during + // layout analysis to assist in detecting horizontal vs vertically written + // textlines. + ColumnFinder(int gridsize, const ICOORD& bleft, const ICOORD& tright, + int resolution, bool cjk_script, double aligned_gap_fraction, + TabVector_LIST* vlines, TabVector_LIST* hlines, + int vertical_x, int vertical_y); + ~ColumnFinder() override; + + // Accessors for testing + const DENORM* denorm() const { + return denorm_; + } + const TextlineProjection* projection() const { + return &projection_; + } + void set_cjk_script(bool is_cjk) { + cjk_script_ = is_cjk; + } + + // ====================================================================== + // The main function of ColumnFinder is broken into pieces to facilitate + // optional insertion of orientation and script detection in an efficient + // way. The calling sequence IS MANDATORY however, whether or not + // OSD is being used: + // 1. Construction. + // 2. SetupAndFilterNoise. + // 3. IsVerticallyAlignedText. + // 4. CorrectOrientation. + // 5. FindBlocks. + // 6. Destruction. Use of a single column finder for multiple images does not + // make sense. + // Throughout these steps, the ColPartitions are owned by part_grid_, which + // means that that it must be kept correct. Exception: big_parts_ owns its + // own ColPartitions. + // The BLOBNBOXes are owned by the input TO_BLOCK for the whole time, except + // for a phase in FindBlocks before TransformToBlocks, when they become + // owned by the ColPartitions. The owner() ColPartition of a BLOBNBOX + // indicates more of a betrothal for the majority of layout analysis, ie + // which ColPartition will take ownership when the blobs are release from + // the input TO_BLOCK. Exception: image_bblobs_ owns the fake blobs that + // are part of the image regions, as they are not on any TO_BLOCK list. + // TODO(rays) break up column finder further into smaller classes, as + // there is a lot more to it than column finding now. + // ====================================================================== + + // Performs initial processing on the blobs in the input_block: + // Setup the part_grid, stroke_width_, nontext_map_. + // Obvious noise blobs are filtered out and used to mark the nontext_map_. + // Initial stroke-width analysis is used to get local text alignment + // direction, so the textline projection_ map can be setup. + // On return, IsVerticallyAlignedText may be called (now optionally) to + // determine the gross textline alignment of the page. + void SetupAndFilterNoise(PageSegMode pageseg_mode, Pix* photo_mask_pix, + TO_BLOCK* input_block); + + // Tests for vertical alignment of text (returning true if so), and generates + // a list of blobs (in osd_blobs) for orientation and script detection. + // block is the single block for the whole page or rectangle to be OCRed. + // Note that the vertical alignment may be due to text whose writing direction + // is vertical, like say Japanese, or due to text whose writing direction is + // horizontal but whose text appears vertically aligned because the image is + // not the right way up. + // find_vertical_text_ratio should be textord_tabfind_vertical_text_ratio. + bool IsVerticallyAlignedText(double find_vertical_text_ratio, + TO_BLOCK* block, BLOBNBOX_CLIST* osd_blobs); + + // Rotates the blobs and the TabVectors so that the gross writing direction + // (text lines) are horizontal and lines are read down the page. + // Applied rotation stored in rotation_. + // A second rotation is calculated for application during recognition to + // make the rotated blobs upright for recognition. + // Subsequent rotation stored in text_rotation_. + // + // Arguments: + // vertical_text_lines is true if the text lines are vertical. + // recognition_rotation [0..3] is the number of anti-clockwise 90 degree + // rotations from osd required for the text to be upright and readable. + void CorrectOrientation(TO_BLOCK* block, bool vertical_text_lines, + int recognition_rotation); + + // Finds blocks of text, image, rule line, table etc, returning them in the + // blocks and to_blocks + // (Each TO_BLOCK points to the basic BLOCK and adds more information.) + // Image blocks are generated by a combination of photo_mask_pix (which may + // NOT be nullptr) and the rejected text found during preliminary textline + // finding. + // The input_block is the result of a call to find_components, and contains + // the blobs found in the image or rectangle to be OCRed. These blobs will be + // removed and placed in the output blocks, while unused ones will be deleted. + // If single_column is true, the input is treated as single column, but + // it is still divided into blocks of equal line spacing/text size. + // scaled_color is scaled down by scaled_factor from the input color image, + // and may be nullptr if the input was not color. + // grey_pix is optional, but if present must match the photo_mask_pix in size, + // and must be a *real* grey image instead of binary_pix * 255. + // thresholds_pix is expected to be present iff grey_pix is present and + // can be an integer factor reduction of the grey_pix. It represents the + // thresholds that were used to create the binary_pix from the grey_pix. + // Small blobs that confuse the segmentation into lines are placed into + // diacritic_blobs, with the intention that they be put into the most + // appropriate word after the rest of layout analysis. + // Returns -1 if the user hits the 'd' key in the blocks window while running + // in debug mode, which requests a retry with more debug info. + int FindBlocks(PageSegMode pageseg_mode, Pix* scaled_color, int scaled_factor, + TO_BLOCK* block, Pix* photo_mask_pix, Pix* thresholds_pix, + Pix* grey_pix, DebugPixa* pixa_debug, BLOCK_LIST* blocks, + BLOBNBOX_LIST* diacritic_blobs, TO_BLOCK_LIST* to_blocks); + + // Get the rotation required to deskew, and its inverse rotation. + void GetDeskewVectors(FCOORD* deskew, FCOORD* reskew); + + // Set the equation detection pointer. + void SetEquationDetect(EquationDetectBase* detect); + + private: + // Displays the blob and block bounding boxes in a window called Blocks. + void DisplayBlocks(BLOCK_LIST* blocks); + // Displays the column edges at each grid y coordinate defined by + // best_columns_. + void DisplayColumnBounds(PartSetVector* sets); + + ////// Functions involved in determining the columns used on the page. ///// + + // Sets up column_sets_ (the determined column layout at each horizontal + // slice). Returns false if the page is empty. + bool MakeColumns(bool single_column); + // Attempt to improve the column_candidates by expanding the columns + // and adding new partitions from the partition sets in src_sets. + // Src_sets may be equal to column_candidates, in which case it will + // use them as a source to improve themselves. + void ImproveColumnCandidates(PartSetVector* src_sets, + PartSetVector* column_sets); + // Prints debug information on the column candidates. + void PrintColumnCandidates(const char* title); + // Finds the optimal set of columns that cover the entire image with as + // few changes in column partition as possible. + // Returns true if any part of the page is multi-column. + bool AssignColumns(const PartSetVector& part_sets); + // Finds the biggest range in part_sets_ that has no assigned column, but + // column assignment is possible. + bool BiggestUnassignedRange(int set_count, const bool* any_columns_possible, + int* start, int* end); + // Finds the modal compatible column_set_ index within the given range. + int RangeModalColumnSet(int** column_set_costs, const int* assigned_costs, + int start, int end); + // Given that there are many column_set_id compatible columns in the range, + // shrinks the range to the longest contiguous run of compatibility, allowing + // gaps where no columns are possible, but not where competing columns are + // possible. + void ShrinkRangeToLongestRun(int** column_set_costs, + const int* assigned_costs, + const bool* any_columns_possible, + int column_set_id, + int* best_start, int* best_end); + // Moves start in the direction of step, up to, but not including end while + // the only incompatible regions are no more than kMaxIncompatibleColumnCount + // in size, and the compatible regions beyond are bigger. + void ExtendRangePastSmallGaps(int** column_set_costs, + const int* assigned_costs, + const bool* any_columns_possible, + int column_set_id, + int step, int end, int* start); + // Assigns the given column_set_id to the part_sets_ in the given range. + void AssignColumnToRange(int column_set_id, int start, int end, + int** column_set_costs, int* assigned_costs); + + // Computes the mean_column_gap_. + void ComputeMeanColumnGap(bool any_multi_column); + + //////// Functions that manipulate ColPartitions in the part_grid_ ///// + //////// to split, merge, find margins, and find types. ////////////// + + // Hoovers up all un-owned blobs and deletes them. + // The rest get released from the block so the ColPartitions can pass + // ownership to the output blocks. + void ReleaseBlobsAndCleanupUnused(TO_BLOCK* block); + // Splits partitions that cross columns where they have nothing in the gap. + void GridSplitPartitions(); + // Merges partitions where there is vertical overlap, within a single column, + // and the horizontal gap is small enough. + void GridMergePartitions(); + // Inserts remaining noise blobs into the most applicable partition if any. + // If there is no applicable partition, then the blobs are deleted. + void InsertRemainingNoise(TO_BLOCK* block); + // Remove partitions that come from horizontal lines that look like + // underlines, but are not part of a table. + void GridRemoveUnderlinePartitions(); + // Add horizontal line separators as partitions. + void GridInsertHLinePartitions(); + // Add vertical line separators as partitions. + void GridInsertVLinePartitions(); + // For every ColPartition in the grid, sets its type based on position + // in the columns. + void SetPartitionTypes(); + // Only images remain with multiple types in a run of partners. + // Sets the type of all in the group to the maximum of the group. + void SmoothPartnerRuns(); + + //////// Functions that make the final output blocks /////// + + // Helper functions for TransformToBlocks. + // Add the part to the temp list in the correct order. + void AddToTempPartList(ColPartition* part, ColPartition_CLIST* temp_list); + // Add everything from the temp list to the work_set assuming correct order. + void EmptyTempPartList(ColPartition_CLIST* temp_list, + WorkingPartSet_LIST* work_set); + + // Transform the grid of partitions to the output blocks. + void TransformToBlocks(BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks); + + // Reflect the blob boxes (but not the outlines) in the y-axis so that + // the blocks get created in the correct RTL order. Rotates the blobs + // in the input_block and the bblobs list. + // The reflection is undone in RotateAndReskewBlocks by + // reflecting the blocks themselves, and then recomputing the blob bounding + // boxes. + void ReflectForRtl(TO_BLOCK* input_block, BLOBNBOX_LIST* bblobs); + + // Undo the deskew that was done in FindTabVectors, as recognition is done + // without correcting blobs or blob outlines for skew. + // Reskew the completed blocks to put them back to the original rotated coords + // that were created by CorrectOrientation. + // If the input_is_rtl, then reflect the blocks in the y-axis to undo the + // reflection that was done before FindTabVectors. + // Blocks that were identified as vertical text (relative to the rotated + // coordinates) are further rotated so the text lines are horizontal. + // blob polygonal outlines are rotated to match the position of the blocks + // that they are in, and their bounding boxes are recalculated to be accurate. + // Record appropriate inverse transformations and required + // classifier transformation in the blocks. + void RotateAndReskewBlocks(bool input_is_rtl, TO_BLOCK_LIST* to_blocks); + + // Computes the rotations for the block (to make textlines horizontal) and + // for the blobs (for classification) and sets the appropriate members + // of the given block. + // Returns the rotation that needs to be applied to the blobs to make + // them sit in the rotated block. + FCOORD ComputeBlockAndClassifyRotation(BLOCK* block); + + // If true then the page language is cjk, so it is safe to perform + // FixBrokenCJK. + bool cjk_script_; + // The minimum gutter width to apply for finding columns. + // Modified when vertical text is detected to prevent detection of + // vertical text lines as columns. + int min_gutter_width_; + // The mean gap between columns over the page. + int mean_column_gap_; + // Config param saved at construction time. Modifies min_gutter_width_ with + // vertical text to prevent detection of vertical text as columns. + double tabfind_aligned_gap_fraction_; + // The rotation vector needed to convert original coords to deskewed. + FCOORD deskew_; + // The rotation vector needed to convert deskewed back to original coords. + FCOORD reskew_; + // The rotation vector used to rotate vertically oriented pages. + FCOORD rotation_; + // The rotation vector needed to convert the rotated back to original coords. + FCOORD rerotate_; + // The additional rotation vector needed to rotate text for recognition. + FCOORD text_rotation_; + // The column_sets_ contain the ordered candidate ColPartitionSets that + // define the possible divisions of the page into columns. + PartSetVector column_sets_; + // A simple array of pointers to the best assigned column division at + // each grid y coordinate. + ColPartitionSet** best_columns_; + // The grid used for creating initial partitions with strokewidth. + StrokeWidth* stroke_width_; + // The grid used to hold ColPartitions after the columns have been determined. + ColPartitionGrid part_grid_; + // List of ColPartitions that are no longer needed after they have been + // turned into regions, but are kept around because they are referenced + // by the part_grid_. + ColPartition_LIST good_parts_; + // List of ColPartitions that are big and might be dropcap or vertically + // joined. + ColPartition_LIST big_parts_; + // List of ColPartitions that have been declared noise. + ColPartition_LIST noise_parts_; + // The fake blobs that are made from the images. + BLOBNBOX_LIST image_bblobs_; + // Horizontal line separators. + TabVector_LIST horizontal_lines_; + // Image map of photo/noise areas on the page. + Pix* nontext_map_; + // Textline projection map. + TextlineProjection projection_; + // Sequence of DENORMS that indicate how to get back to the original image + // coordinate space. The destructor must delete all the DENORMs in the chain. + DENORM* denorm_; + + // Various debug windows that automatically go away on completion. + ScrollView* input_blobs_win_; + + // The equation region detector pointer. Note: This pointer is passed in by + // member function SetEquationDetect, and releasing it is NOT owned by this + // class. + EquationDetectBase* equation_detect_; + + // Allow a subsequent instance to reuse the blocks window. + // Not thread-safe, but multiple threads shouldn't be using windows anyway. + static ScrollView* blocks_win_; +}; + +} // namespace tesseract. + +#endif // TESSERACT_TEXTORD_COLFIND_H_ diff --git a/tesseract/src/textord/colpartition.cpp b/tesseract/src/textord/colpartition.cpp new file mode 100644 index 00000000..6dcdda74 --- /dev/null +++ b/tesseract/src/textord/colpartition.cpp @@ -0,0 +1,2597 @@ +/////////////////////////////////////////////////////////////////////// +// File: colpartition.cpp +// Description: Class to hold partitions of the page that correspond +// roughly to text lines. +// Author: Ray Smith +// +// (C) Copyright 2008, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#include "colpartition.h" +#include "colpartitiongrid.h" +#include "colpartitionset.h" +#include "detlinefit.h" +#include "dppoint.h" +#include "imagefind.h" +#include "workingpartset.h" +#include "host.h" // for NearlyEqual + +#include <algorithm> + +namespace tesseract { + +ELIST2IZE(ColPartition) +CLISTIZE(ColPartition) + +//////////////// ColPartition Implementation //////////////// + +// enum to refer to the entries in a neighbourhood of lines. +// Used by SmoothSpacings to test for blips with OKSpacingBlip. +enum SpacingNeighbourhood { + PN_ABOVE2, + PN_ABOVE1, + PN_UPPER, + PN_LOWER, + PN_BELOW1, + PN_BELOW2, + PN_COUNT +}; + +// Maximum change in spacing (in inches) to ignore. +const double kMaxSpacingDrift = 1.0 / 72; // 1/72 is one point. +// Maximum fraction of line height used as an additional allowance +// for top spacing. +const double kMaxTopSpacingFraction = 0.25; +// What multiple of the largest line height should be used as an upper bound +// for whether lines are in the same text block? +const double kMaxSameBlockLineSpacing = 3; +// Maximum ratio of sizes for lines to be considered the same size. +const double kMaxSizeRatio = 1.5; +// Fraction of max of leader width and gap for max IQR of gaps. +const double kMaxLeaderGapFractionOfMax = 0.25; +// Fraction of min of leader width and gap for max IQR of gaps. +const double kMaxLeaderGapFractionOfMin = 0.5; +// Minimum number of blobs to be considered a leader. +const int kMinLeaderCount = 5; +// Minimum score for a STRONG_CHAIN textline. +const int kMinStrongTextValue = 6; +// Minimum score for a CHAIN textline. +const int kMinChainTextValue = 3; +// Minimum number of blobs for strong horizontal text lines. +const int kHorzStrongTextlineCount = 8; +// Minimum height (in image pixels) for strong horizontal text lines. +const int kHorzStrongTextlineHeight = 10; +// Minimum aspect ratio for strong horizontal text lines. +const int kHorzStrongTextlineAspect = 5; +// Maximum upper quartile error allowed on a baseline fit as a fraction +// of height. +const double kMaxBaselineError = 0.4375; +// Min coverage for a good baseline between vectors +const double kMinBaselineCoverage = 0.5; +// Max RMS color noise to compare colors. +const int kMaxRMSColorNoise = 128; +// Maximum distance to allow a partition color to be to use that partition +// in smoothing neighbouring types. This is a squared distance. +const int kMaxColorDistance = 900; + +// blob_type is the blob_region_type_ of the blobs in this partition. +// Vertical is the direction of logical vertical on the possibly skewed image. +ColPartition::ColPartition(BlobRegionType blob_type, const ICOORD& vertical) + : left_margin_(-INT32_MAX), right_margin_(INT32_MAX), + median_bottom_(INT32_MAX), median_top_(-INT32_MAX), + median_left_(INT32_MAX), median_right_(-INT32_MAX), + blob_type_(blob_type), + vertical_(vertical) { + memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_)); +} + +// Constructs a fake ColPartition with a single fake BLOBNBOX, all made +// from a single TBOX. +// WARNING: Despite being on C_LISTs, the BLOBNBOX owns the C_BLOB and +// the ColPartition owns the BLOBNBOX!!! +// Call DeleteBoxes before deleting the ColPartition. +ColPartition* ColPartition::FakePartition(const TBOX& box, + PolyBlockType block_type, + BlobRegionType blob_type, + BlobTextFlowType flow) { + ColPartition* part = new ColPartition(blob_type, ICOORD(0, 1)); + part->set_type(block_type); + part->set_flow(flow); + part->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(box))); + part->set_left_margin(box.left()); + part->set_right_margin(box.right()); + part->SetBlobTypes(); + part->ComputeLimits(); + part->ClaimBoxes(); + return part; +} + +// Constructs and returns a ColPartition with the given real BLOBNBOX, +// and sets it up to be a "big" partition (single-blob partition bigger +// than the surrounding text that may be a dropcap, two or more vertically +// touching characters, or some graphic element. +// If the given list is not nullptr, the partition is also added to the list. +ColPartition* ColPartition::MakeBigPartition(BLOBNBOX* box, + ColPartition_LIST* big_part_list) { + box->set_owner(nullptr); + ColPartition* single = new ColPartition(BRT_UNKNOWN, ICOORD(0, 1)); + single->set_flow(BTFT_NONE); + single->AddBox(box); + single->ComputeLimits(); + single->ClaimBoxes(); + single->SetBlobTypes(); + single->set_block_owned(true); + if (big_part_list != nullptr) { + ColPartition_IT part_it(big_part_list); + part_it.add_to_end(single); + } + return single; +} + +ColPartition::~ColPartition() { + // Remove this as a partner of all partners, as we don't want them + // referring to a deleted object. + ColPartition_C_IT it(&upper_partners_); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + it.data()->RemovePartner(false, this); + } + it.set_to_list(&lower_partners_); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + it.data()->RemovePartner(true, this); + } +} + +// Constructs a fake ColPartition with no BLOBNBOXes to represent a +// horizontal or vertical line, given a type and a bounding box. +ColPartition* ColPartition::MakeLinePartition(BlobRegionType blob_type, + const ICOORD& vertical, + int left, int bottom, + int right, int top) { + auto* part = new ColPartition(blob_type, vertical); + part->bounding_box_ = TBOX(left, bottom, right, top); + part->median_bottom_ = bottom; + part->median_top_ = top; + part->median_height_ = top - bottom; + part->median_left_ = left; + part->median_right_ = right; + part->median_width_ = right - left; + part->left_key_ = part->BoxLeftKey(); + part->right_key_ = part->BoxRightKey(); + return part; +} + + +// Adds the given box to the partition, updating the partition bounds. +// The list of boxes in the partition is updated, ensuring that no box is +// recorded twice, and the boxes are kept in increasing left position. +void ColPartition::AddBox(BLOBNBOX* bbox) { + TBOX box = bbox->bounding_box(); + // Update the partition limits. + if (boxes_.length() == 0) { + bounding_box_ = box; + } else { + bounding_box_ += box; + } + + if (IsVerticalType()) { + if (!last_add_was_vertical_) { + boxes_.sort(SortByBoxBottom<BLOBNBOX>); + last_add_was_vertical_ = true; + } + boxes_.add_sorted(SortByBoxBottom<BLOBNBOX>, true, bbox); + } else { + if (last_add_was_vertical_) { + boxes_.sort(SortByBoxLeft<BLOBNBOX>); + last_add_was_vertical_ = false; + } + boxes_.add_sorted(SortByBoxLeft<BLOBNBOX>, true, bbox); + } + if (!left_key_tab_) + left_key_ = BoxLeftKey(); + if (!right_key_tab_) + right_key_ = BoxRightKey(); + if (TabFind::WithinTestRegion(2, box.left(), box.bottom())) + tprintf("Added box (%d,%d)->(%d,%d) left_blob_x_=%d, right_blob_x_ = %d\n", + box.left(), box.bottom(), box.right(), box.top(), + bounding_box_.left(), bounding_box_.right()); +} + +// Removes the given box from the partition, updating the bounds. +void ColPartition::RemoveBox(BLOBNBOX* box) { + BLOBNBOX_C_IT bb_it(&boxes_); + for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { + if (box == bb_it.data()) { + bb_it.extract(); + ComputeLimits(); + return; + } + } +} + +// Returns the tallest box in the partition, as measured perpendicular to the +// presumed flow of text. +BLOBNBOX* ColPartition::BiggestBox() { + BLOBNBOX* biggest = nullptr; + BLOBNBOX_C_IT bb_it(&boxes_); + for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { + BLOBNBOX* bbox = bb_it.data(); + if (IsVerticalType()) { + if (biggest == nullptr || + bbox->bounding_box().width() > biggest->bounding_box().width()) + biggest = bbox; + } else { + if (biggest == nullptr || + bbox->bounding_box().height() > biggest->bounding_box().height()) + biggest = bbox; + } + } + return biggest; +} + +// Returns the bounding box excluding the given box. +TBOX ColPartition::BoundsWithoutBox(BLOBNBOX* box) { + TBOX result; + BLOBNBOX_C_IT bb_it(&boxes_); + for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { + if (box != bb_it.data()) { + result += bb_it.data()->bounding_box(); + } + } + return result; +} + +// Claims the boxes in the boxes_list by marking them with a this owner +// pointer. If a box is already owned, then it must be owned by this. +void ColPartition::ClaimBoxes() { + BLOBNBOX_C_IT bb_it(&boxes_); + for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { + BLOBNBOX* bblob = bb_it.data(); + ColPartition* other = bblob->owner(); + if (other == nullptr) { + // Normal case: ownership is available. + bblob->set_owner(this); + } else { + ASSERT_HOST(other == this); + } + } +} + +// nullptr the owner of the blobs in this partition, so they can be deleted +// independently of the ColPartition. +void ColPartition::DisownBoxes() { + BLOBNBOX_C_IT bb_it(&boxes_); + for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { + BLOBNBOX* bblob = bb_it.data(); + ASSERT_HOST(bblob->owner() == this || bblob->owner() == nullptr); + bblob->set_owner(nullptr); + } +} + +// nullptr the owner of the blobs in this partition that are owned by this +// partition, so they can be deleted independently of the ColPartition. +// Any blobs that are not owned by this partition get to keep their owner +// without an assert failure. +void ColPartition::DisownBoxesNoAssert() { + BLOBNBOX_C_IT bb_it(&boxes_); + for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { + BLOBNBOX* bblob = bb_it.data(); + if (bblob->owner() == this) + bblob->set_owner(nullptr); + } +} + +// Nulls the owner of the blobs in this partition that are owned by this +// partition and not leader blobs, removing them from the boxes_ list, thus +// turning this partition back to a leader partition if it contains a leader, +// or otherwise leaving it empty. Returns true if any boxes remain. +bool ColPartition::ReleaseNonLeaderBoxes() { + BLOBNBOX_C_IT bb_it(&boxes_); + for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { + BLOBNBOX* bblob = bb_it.data(); + if (bblob->flow() != BTFT_LEADER) { + if (bblob->owner() == this) bblob->set_owner(nullptr); + bb_it.extract(); + } + } + if (bb_it.empty()) return false; + flow_ = BTFT_LEADER; + ComputeLimits(); + return true; +} + +// Delete the boxes that this partition owns. +void ColPartition::DeleteBoxes() { + // Although the boxes_ list is a C_LIST, in some cases it owns the + // BLOBNBOXes, as the ColPartition takes ownership from the grid, + // and the BLOBNBOXes own the underlying C_BLOBs. + for (BLOBNBOX_C_IT bb_it(&boxes_); !bb_it.empty(); bb_it.forward()) { + BLOBNBOX* bblob = bb_it.extract(); + delete bblob->cblob(); + delete bblob; + } +} + +// Reflects the partition in the y-axis, assuming that its blobs have +// already been done. Corrects only a limited part of the members, since +// this function is assumed to be used shortly after initial creation, which +// is before a lot of the members are used. +void ColPartition::ReflectInYAxis() { + BLOBNBOX_CLIST reversed_boxes; + BLOBNBOX_C_IT reversed_it(&reversed_boxes); + // Reverse the order of the boxes_. + BLOBNBOX_C_IT bb_it(&boxes_); + for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { + reversed_it.add_before_then_move(bb_it.extract()); + } + bb_it.add_list_after(&reversed_boxes); + ASSERT_HOST(!left_key_tab_ && !right_key_tab_); + int tmp = left_margin_; + left_margin_ = -right_margin_; + right_margin_ = -tmp; + ComputeLimits(); +} + +// Returns true if this is a legal partition - meaning that the conditions +// left_margin <= bounding_box left +// left_key <= bounding box left key +// bounding box left <= bounding box right +// and likewise for right margin and key +// are all met. +bool ColPartition::IsLegal() { + if (bounding_box_.left() > bounding_box_.right()) { + if (textord_debug_bugs) { + tprintf("Bounding box invalid\n"); + Print(); + } + return false; // Bounding box invalid. + } + if (left_margin_ > bounding_box_.left() || + right_margin_ < bounding_box_.right()) { + if (textord_debug_bugs) { + tprintf("Margins invalid\n"); + Print(); + } + return false; // Margins invalid. + } + if (left_key_ > BoxLeftKey() || right_key_ < BoxRightKey()) { + if (textord_debug_bugs) { + tprintf("Key inside box: %d v %d or %d v %d\n", + left_key_, BoxLeftKey(), right_key_, BoxRightKey()); + Print(); + } + return false; // Keys inside the box. + } + return true; +} + +// Returns true if the left and right edges are approximately equal. +bool ColPartition::MatchingColumns(const ColPartition& other) const { + int y = (MidY() + other.MidY()) / 2; + if (!NearlyEqual(other.LeftAtY(y) / kColumnWidthFactor, + LeftAtY(y) / kColumnWidthFactor, 1)) + return false; + if (!NearlyEqual(other.RightAtY(y) / kColumnWidthFactor, + RightAtY(y) / kColumnWidthFactor, 1)) + return false; + return true; +} + +// Returns true if the colors match for two text partitions. +bool ColPartition::MatchingTextColor(const ColPartition& other) const { + if (color1_[L_ALPHA_CHANNEL] > kMaxRMSColorNoise && + other.color1_[L_ALPHA_CHANNEL] > kMaxRMSColorNoise) + return false; // Too noisy. + + // Colors must match for other to count. + double d_this1_o = ImageFind::ColorDistanceFromLine(other.color1_, + other.color2_, + color1_); + double d_this2_o = ImageFind::ColorDistanceFromLine(other.color1_, + other.color2_, + color2_); + double d_o1_this = ImageFind::ColorDistanceFromLine(color1_, color2_, + other.color1_); + double d_o2_this = ImageFind::ColorDistanceFromLine(color1_, color2_, + other.color2_); +// All 4 distances must be small enough. + return d_this1_o < kMaxColorDistance && d_this2_o < kMaxColorDistance && + d_o1_this < kMaxColorDistance && d_o2_this < kMaxColorDistance; +} + +// Returns true if the sizes match for two text partitions, +// taking orientation into account. See also SizesSimilar. +bool ColPartition::MatchingSizes(const ColPartition& other) const { + if (blob_type_ == BRT_VERT_TEXT || other.blob_type_ == BRT_VERT_TEXT) + return !TabFind::DifferentSizes(median_width_, other.median_width_); + else + return !TabFind::DifferentSizes(median_height_, other.median_height_); +} + +// Returns true if there is no tabstop violation in merging this and other. +bool ColPartition::ConfirmNoTabViolation(const ColPartition& other) const { + if (bounding_box_.right() < other.bounding_box_.left() && + bounding_box_.right() < other.LeftBlobRule()) + return false; + if (other.bounding_box_.right() < bounding_box_.left() && + other.bounding_box_.right() < LeftBlobRule()) + return false; + if (bounding_box_.left() > other.bounding_box_.right() && + bounding_box_.left() > other.RightBlobRule()) + return false; + if (other.bounding_box_.left() > bounding_box_.right() && + other.bounding_box_.left() > RightBlobRule()) + return false; + return true; +} + +// Returns true if other has a similar stroke width to this. +bool ColPartition::MatchingStrokeWidth(const ColPartition& other, + double fractional_tolerance, + double constant_tolerance) const { + int match_count = 0; + int nonmatch_count = 0; + BLOBNBOX_C_IT box_it(const_cast<BLOBNBOX_CLIST*>(&boxes_)); + BLOBNBOX_C_IT other_it(const_cast<BLOBNBOX_CLIST*>(&other.boxes_)); + box_it.mark_cycle_pt(); + other_it.mark_cycle_pt(); + while (!box_it.cycled_list() && !other_it.cycled_list()) { + if (box_it.data()->MatchingStrokeWidth(*other_it.data(), + fractional_tolerance, + constant_tolerance)) + ++match_count; + else + ++nonmatch_count; + box_it.forward(); + other_it.forward(); + } + return match_count > nonmatch_count; +} + +// Returns true if base is an acceptable diacritic base char merge +// with this as the diacritic. +// Returns true if: +// (1) this is a ColPartition containing only diacritics, and +// (2) the base characters indicated on the diacritics all believably lie +// within the text line of the candidate ColPartition. +bool ColPartition::OKDiacriticMerge(const ColPartition& candidate, + bool debug) const { + BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_)); + int min_top = INT32_MAX; + int max_bottom = -INT32_MAX; + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + BLOBNBOX* blob = it.data(); + if (!blob->IsDiacritic()) { + if (debug) { + tprintf("Blob is not a diacritic:"); + blob->bounding_box().print(); + } + return false; // All blobs must have diacritic bases. + } + if (blob->base_char_top() < min_top) + min_top = blob->base_char_top(); + if (blob->base_char_bottom() > max_bottom) + max_bottom = blob->base_char_bottom(); + } + // If the intersection of all vertical ranges of all base characters + // overlaps the median range of this, then it is OK. + bool result = min_top > candidate.median_bottom_ && + max_bottom < candidate.median_top_; + if (debug) { + if (result) + tprintf("OKDiacritic!\n"); + else + tprintf("y ranges don\'t overlap: %d-%d / %d-%d\n", + max_bottom, min_top, median_bottom_, median_top_); + } + return result; +} + +// Sets the sort key using either the tab vector, or the bounding box if +// the tab vector is nullptr. If the tab_vector lies inside the bounding_box, +// use the edge of the box as a key any way. +void ColPartition::SetLeftTab(const TabVector* tab_vector) { + if (tab_vector != nullptr) { + left_key_ = tab_vector->sort_key(); + left_key_tab_ = left_key_ <= BoxLeftKey(); + } else { + left_key_tab_ = false; + } + if (!left_key_tab_) + left_key_ = BoxLeftKey(); +} + +// As SetLeftTab, but with the right. +void ColPartition::SetRightTab(const TabVector* tab_vector) { + if (tab_vector != nullptr) { + right_key_ = tab_vector->sort_key(); + right_key_tab_ = right_key_ >= BoxRightKey(); + } else { + right_key_tab_ = false; + } + if (!right_key_tab_) + right_key_ = BoxRightKey(); +} + +// Copies the left/right tab from the src partition, but if take_box is +// true, copies the box instead and uses that as a key. +void ColPartition::CopyLeftTab(const ColPartition& src, bool take_box) { + left_key_tab_ = take_box ? false : src.left_key_tab_; + if (left_key_tab_) { + left_key_ = src.left_key_; + } else { + bounding_box_.set_left(XAtY(src.BoxLeftKey(), MidY())); + left_key_ = BoxLeftKey(); + } + if (left_margin_ > bounding_box_.left()) + left_margin_ = src.left_margin_; +} + +// As CopyLeftTab, but with the right. +void ColPartition::CopyRightTab(const ColPartition& src, bool take_box) { + right_key_tab_ = take_box ? false : src.right_key_tab_; + if (right_key_tab_) { + right_key_ = src.right_key_; + } else { + bounding_box_.set_right(XAtY(src.BoxRightKey(), MidY())); + right_key_ = BoxRightKey(); + } + if (right_margin_ < bounding_box_.right()) + right_margin_ = src.right_margin_; +} + +// Returns the left rule line x coord of the leftmost blob. +int ColPartition::LeftBlobRule() const { + BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_)); + return it.data()->left_rule(); +} +// Returns the right rule line x coord of the rightmost blob. +int ColPartition::RightBlobRule() const { + BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_)); + it.move_to_last(); + return it.data()->right_rule(); +} + +float ColPartition::SpecialBlobsDensity(const BlobSpecialTextType type) const { + ASSERT_HOST(type < BSTT_COUNT); + return special_blobs_densities_[type]; +} + +int ColPartition::SpecialBlobsCount(const BlobSpecialTextType type) { + ASSERT_HOST(type < BSTT_COUNT); + BLOBNBOX_C_IT blob_it(&boxes_); + int count = 0; + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + BLOBNBOX* blob = blob_it.data(); + BlobSpecialTextType blob_type = blob->special_text_type(); + if (blob_type == type) { + count++; + } + } + + return count; +} + +void ColPartition::SetSpecialBlobsDensity( + const BlobSpecialTextType type, const float density) { + ASSERT_HOST(type < BSTT_COUNT); + special_blobs_densities_[type] = density; +} + +void ColPartition::ComputeSpecialBlobsDensity() { + memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_)); + if (boxes_.empty()) { + return; + } + + BLOBNBOX_C_IT blob_it(&boxes_); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + BLOBNBOX* blob = blob_it.data(); + BlobSpecialTextType type = blob->special_text_type(); + special_blobs_densities_[type]++; + } + + for (float& special_blobs_density : special_blobs_densities_) { + special_blobs_density /= boxes_.length(); + } +} + +// Add a partner above if upper, otherwise below. +// Add them uniquely and keep the list sorted by box left. +// Partnerships are added symmetrically to partner and this. +void ColPartition::AddPartner(bool upper, ColPartition* partner) { + if (upper) { + partner->lower_partners_.add_sorted(SortByBoxLeft<ColPartition>, + true, this); + upper_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, partner); + } else { + partner->upper_partners_.add_sorted(SortByBoxLeft<ColPartition>, + true, this); + lower_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, partner); + } +} + +// Removes the partner from this, but does not remove this from partner. +// This asymmetric removal is so as not to mess up the iterator that is +// working on partner's partner list. +void ColPartition::RemovePartner(bool upper, ColPartition* partner) { + ColPartition_C_IT it(upper ? &upper_partners_ : &lower_partners_); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + if (it.data() == partner) { + it.extract(); + break; + } + } +} + +// Returns the partner if the given partner is a singleton, otherwise nullptr. +ColPartition* ColPartition::SingletonPartner(bool upper) { + ColPartition_CLIST* partners = upper ? &upper_partners_ : &lower_partners_; + if (!partners->singleton()) + return nullptr; + ColPartition_C_IT it(partners); + return it.data(); +} + +// Merge with the other partition and delete it. +void ColPartition::Absorb(ColPartition* other, WidthCallback cb) { + // The result has to either own all of the blobs or none of them. + // Verify the flag is consistent. + ASSERT_HOST(owns_blobs() == other->owns_blobs()); + // TODO(nbeato): check owns_blobs better. Right now owns_blobs + // should always be true when this is called. So there is no issues. + if (TabFind::WithinTestRegion(2, bounding_box_.left(), + bounding_box_.bottom()) || + TabFind::WithinTestRegion(2, other->bounding_box_.left(), + other->bounding_box_.bottom())) { + tprintf("Merging:"); + Print(); + other->Print(); + } + + // Update the special_blobs_densities_. + memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_)); + for (int type = 0; type < BSTT_COUNT; ++type) { + unsigned w1 = boxes_.length(); + unsigned w2 = other->boxes_.length(); + float new_val = special_blobs_densities_[type] * w1 + + other->special_blobs_densities_[type] * w2; + if (!w1 || !w2) { + ASSERT_HOST((w1 + w2) > 0); + special_blobs_densities_[type] = new_val / (w1 + w2); + } + } + + // Merge the two sorted lists. + BLOBNBOX_C_IT it(&boxes_); + BLOBNBOX_C_IT it2(&other->boxes_); + for (; !it2.empty(); it2.forward()) { + BLOBNBOX* bbox2 = it2.extract(); + ColPartition* prev_owner = bbox2->owner(); + if (prev_owner != other && prev_owner != nullptr) { + // A blob on other's list is owned by someone else; let them have it. + continue; + } + ASSERT_HOST(prev_owner == other || prev_owner == nullptr); + if (prev_owner == other) + bbox2->set_owner(this); + it.add_to_end(bbox2); + } + left_margin_ = std::min(left_margin_, other->left_margin_); + right_margin_ = std::max(right_margin_, other->right_margin_); + if (other->left_key_ < left_key_) { + left_key_ = other->left_key_; + left_key_tab_ = other->left_key_tab_; + } + if (other->right_key_ > right_key_) { + right_key_ = other->right_key_; + right_key_tab_ = other->right_key_tab_; + } + // Combine the flow and blob_type in a sensible way. + // Dominant flows stay. + if (!DominatesInMerge(flow_, other->flow_)) { + flow_ = other->flow_; + blob_type_ = other->blob_type_; + } + SetBlobTypes(); + if (IsVerticalType()) { + boxes_.sort(SortByBoxBottom<BLOBNBOX>); + last_add_was_vertical_ = true; + } else { + boxes_.sort(SortByBoxLeft<BLOBNBOX>); + last_add_was_vertical_ = false; + } + ComputeLimits(); + // Fix partner lists. other is going away, so remove it as a + // partner of all its partners and add this in its place. + for (int upper = 0; upper < 2; ++upper) { + ColPartition_CLIST partners; + ColPartition_C_IT part_it(&partners); + part_it.add_list_after(upper ? &other->upper_partners_ + : &other->lower_partners_); + for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) { + ColPartition* partner = part_it.extract(); + partner->RemovePartner(!upper, other); + partner->RemovePartner(!upper, this); + partner->AddPartner(!upper, this); + } + } + delete other; + if (cb != nullptr) { + SetColumnGoodness(cb); + } +} + +// Merge1 and merge2 are candidates to be merged, yet their combined box +// overlaps this. Is that allowed? +// Returns true if the overlap between this and the merged pair of +// merge candidates is sufficiently trivial to be allowed. +// The merged box can graze the edge of this by the ok_box_overlap +// if that exceeds the margin to the median top and bottom. +// ok_box_overlap should be set by the caller appropriate to the sizes of +// the text involved, and is usually a fraction of the median size of merge1 +// and/or merge2, or this. +// TODO(rays) Determine whether vertical text needs to be considered. +bool ColPartition::OKMergeOverlap(const ColPartition& merge1, + const ColPartition& merge2, + int ok_box_overlap, bool debug) { + // Vertical partitions are not allowed to be involved. + if (IsVerticalType() || merge1.IsVerticalType() || merge2.IsVerticalType()) { + if (debug) + tprintf("Vertical partition\n"); + return false; + } + // The merging partitions must strongly overlap each other. + if (!merge1.VSignificantCoreOverlap(merge2)) { + if (debug) + tprintf("Voverlap %d (%d)\n", + merge1.VCoreOverlap(merge2), + merge1.VSignificantCoreOverlap(merge2)); + return false; + } + // The merged box must not overlap the median bounds of this. + TBOX merged_box(merge1.bounding_box()); + merged_box += merge2.bounding_box(); + if (merged_box.bottom() < median_top_ && merged_box.top() > median_bottom_ && + merged_box.bottom() < bounding_box_.top() - ok_box_overlap && + merged_box.top() > bounding_box_.bottom() + ok_box_overlap) { + if (debug) + tprintf("Excessive box overlap\n"); + return false; + } + // Looks OK! + return true; +} + +// Find the blob at which to split this to minimize the overlap with the +// given box. Returns the first blob to go in the second partition. +BLOBNBOX* ColPartition::OverlapSplitBlob(const TBOX& box) { + if (boxes_.empty() || boxes_.singleton()) + return nullptr; + BLOBNBOX_C_IT it(&boxes_); + TBOX left_box(it.data()->bounding_box()); + for (it.forward(); !it.at_first(); it.forward()) { + BLOBNBOX* bbox = it.data(); + left_box += bbox->bounding_box(); + if (left_box.overlap(box)) + return bbox; + } + return nullptr; +} + +// Split this partition keeping the first half in this and returning +// the second half. +// Splits by putting the split_blob and the blobs that follow +// in the second half, and the rest in the first half. +ColPartition* ColPartition::SplitAtBlob(BLOBNBOX* split_blob) { + ColPartition* split_part = ShallowCopy(); + split_part->set_owns_blobs(owns_blobs()); + BLOBNBOX_C_IT it(&boxes_); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + BLOBNBOX* bbox = it.data(); + ColPartition* prev_owner = bbox->owner(); + ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == nullptr); + if (bbox == split_blob || !split_part->boxes_.empty()) { + split_part->AddBox(it.extract()); + if (owns_blobs() && prev_owner != nullptr) + bbox->set_owner(split_part); + } + } + ASSERT_HOST(!it.empty()); + if (split_part->IsEmpty()) { + // Split part ended up with nothing. Possible if split_blob is not + // in the list of blobs. + delete split_part; + return nullptr; + } + right_key_tab_ = false; + split_part->left_key_tab_ = false; + ComputeLimits(); + // TODO(nbeato) Merge Ray's CL like this: + // if (owns_blobs()) + // SetBlobTextlineGoodness(); + split_part->ComputeLimits(); + // TODO(nbeato) Merge Ray's CL like this: + // if (split_part->owns_blobs()) + // split_part->SetBlobTextlineGoodness(); + return split_part; +} + +// Split this partition at the given x coordinate, returning the right +// half and keeping the left half in this. +ColPartition* ColPartition::SplitAt(int split_x) { + if (split_x <= bounding_box_.left() || split_x >= bounding_box_.right()) + return nullptr; // There will be no change. + ColPartition* split_part = ShallowCopy(); + split_part->set_owns_blobs(owns_blobs()); + BLOBNBOX_C_IT it(&boxes_); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + BLOBNBOX* bbox = it.data(); + ColPartition* prev_owner = bbox->owner(); + ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == nullptr); + const TBOX& box = bbox->bounding_box(); + if (box.left() >= split_x) { + split_part->AddBox(it.extract()); + if (owns_blobs() && prev_owner != nullptr) + bbox->set_owner(split_part); + } + } + if (it.empty()) { + // Possible if split-x passes through the first blob. + it.add_list_after(&split_part->boxes_); + } + ASSERT_HOST(!it.empty()); + if (split_part->IsEmpty()) { + // Split part ended up with nothing. Possible if split_x passes + // through the last blob. + delete split_part; + return nullptr; + } + right_key_tab_ = false; + split_part->left_key_tab_ = false; + right_margin_ = split_x; + split_part->left_margin_ = split_x; + ComputeLimits(); + split_part->ComputeLimits(); + return split_part; +} + +// Recalculates all the coordinate limits of the partition. +void ColPartition::ComputeLimits() { + bounding_box_ = TBOX(); // Clear it + BLOBNBOX_C_IT it(&boxes_); + BLOBNBOX* bbox = nullptr; + int non_leader_count = 0; + if (it.empty()) { + bounding_box_.set_left(left_margin_); + bounding_box_.set_right(right_margin_); + bounding_box_.set_bottom(0); + bounding_box_.set_top(0); + } else { + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + bbox = it.data(); + bounding_box_ += bbox->bounding_box(); + if (bbox->flow() != BTFT_LEADER) + ++non_leader_count; + } + } + if (!left_key_tab_) + left_key_ = BoxLeftKey(); + if (left_key_ > BoxLeftKey() && textord_debug_bugs) { + // TODO(rays) investigate the causes of these error messages, to find + // out if they are genuinely harmful, or just indicative of junk input. + tprintf("Computed left-illegal partition\n"); + Print(); + } + if (!right_key_tab_) + right_key_ = BoxRightKey(); + if (right_key_ < BoxRightKey() && textord_debug_bugs) { + tprintf("Computed right-illegal partition\n"); + Print(); + } + if (it.empty()) + return; + if (IsImageType() || blob_type() == BRT_RECTIMAGE || + blob_type() == BRT_POLYIMAGE) { + median_top_ = bounding_box_.top(); + median_bottom_ = bounding_box_.bottom(); + median_height_ = bounding_box_.height(); + median_left_ = bounding_box_.left(); + median_right_ = bounding_box_.right(); + median_width_ = bounding_box_.width(); + } else { + STATS top_stats(bounding_box_.bottom(), bounding_box_.top() + 1); + STATS bottom_stats(bounding_box_.bottom(), bounding_box_.top() + 1); + STATS height_stats(0, bounding_box_.height() + 1); + STATS left_stats(bounding_box_.left(), bounding_box_.right() + 1); + STATS right_stats(bounding_box_.left(), bounding_box_.right() + 1); + STATS width_stats(0, bounding_box_.width() + 1); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + bbox = it.data(); + if (non_leader_count == 0 || bbox->flow() != BTFT_LEADER) { + const TBOX& box = bbox->bounding_box(); + int area = box.area(); + top_stats.add(box.top(), area); + bottom_stats.add(box.bottom(), area); + height_stats.add(box.height(), area); + left_stats.add(box.left(), area); + right_stats.add(box.right(), area); + width_stats.add(box.width(), area); + } + } + median_top_ = static_cast<int>(top_stats.median() + 0.5); + median_bottom_ = static_cast<int>(bottom_stats.median() + 0.5); + median_height_ = static_cast<int>(height_stats.median() + 0.5); + median_left_ = static_cast<int>(left_stats.median() + 0.5); + median_right_ = static_cast<int>(right_stats.median() + 0.5); + median_width_ = static_cast<int>(width_stats.median() + 0.5); + } + + if (right_margin_ < bounding_box_.right() && textord_debug_bugs) { + tprintf("Made partition with bad right coords, %d < %d\n", + right_margin_, bounding_box_.right()); + Print(); + } + if (left_margin_ > bounding_box_.left() && textord_debug_bugs) { + tprintf("Made partition with bad left coords, %d > %d\n", + left_margin_, bounding_box_.left()); + Print(); + } + // Fix partner lists. The bounding box has changed and partners are stored + // in bounding box order, so remove and reinsert this as a partner + // of all its partners. + for (int upper = 0; upper < 2; ++upper) { + ColPartition_CLIST partners; + ColPartition_C_IT part_it(&partners); + part_it.add_list_after(upper ? &upper_partners_ : &lower_partners_); + for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) { + ColPartition* partner = part_it.extract(); + partner->RemovePartner(!upper, this); + partner->AddPartner(!upper, this); + } + } + if (TabFind::WithinTestRegion(2, bounding_box_.left(), + bounding_box_.bottom())) { + tprintf("Recomputed box for partition %p\n", this); + Print(); + } +} + +// Returns the number of boxes that overlap the given box. +int ColPartition::CountOverlappingBoxes(const TBOX& box) { + BLOBNBOX_C_IT it(&boxes_); + int overlap_count = 0; + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + BLOBNBOX* bbox = it.data(); + if (box.overlap(bbox->bounding_box())) + ++overlap_count; + } + return overlap_count; +} + +// Computes and sets the type_ and first_column_, last_column_ and column_set_. +// resolution refers to the ppi resolution of the image. +void ColPartition::SetPartitionType(int resolution, ColPartitionSet* columns) { + int first_spanned_col = -1; + ColumnSpanningType span_type = + columns->SpanningType(resolution, + bounding_box_.left(), bounding_box_.right(), + std::min(bounding_box_.height(), bounding_box_.width()), + MidY(), left_margin_, right_margin_, + &first_column_, &last_column_, + &first_spanned_col); + column_set_ = columns; + if (first_column_ < last_column_ && span_type == CST_PULLOUT && + !IsLineType()) { + // Unequal columns may indicate that the pullout spans one of the columns + // it lies in, so force it to be allocated to just that column. + if (first_spanned_col >= 0) { + first_column_ = first_spanned_col; + last_column_ = first_spanned_col; + } else { + if ((first_column_ & 1) == 0) + last_column_ = first_column_; + else if ((last_column_ & 1) == 0) + first_column_ = last_column_; + else + first_column_ = last_column_ = (first_column_ + last_column_) / 2; + } + } + type_ = PartitionType(span_type); +} + +// Returns the PartitionType from the current BlobRegionType and a column +// flow spanning type ColumnSpanningType, generated by +// ColPartitionSet::SpanningType, that indicates how the partition sits +// in the columns. +PolyBlockType ColPartition::PartitionType(ColumnSpanningType flow) const { + if (flow == CST_NOISE) { + if (blob_type_ != BRT_HLINE && blob_type_ != BRT_VLINE && + blob_type_ != BRT_RECTIMAGE && blob_type_ != BRT_VERT_TEXT) + return PT_NOISE; + flow = CST_FLOWING; + } + + switch (blob_type_) { + case BRT_NOISE: + return PT_NOISE; + case BRT_HLINE: + return PT_HORZ_LINE; + case BRT_VLINE: + return PT_VERT_LINE; + case BRT_RECTIMAGE: + case BRT_POLYIMAGE: + switch (flow) { + case CST_FLOWING: + return PT_FLOWING_IMAGE; + case CST_HEADING: + return PT_HEADING_IMAGE; + case CST_PULLOUT: + return PT_PULLOUT_IMAGE; + default: + ASSERT_HOST(!"Undefined flow type for image!"); + } + break; + case BRT_VERT_TEXT: + return PT_VERTICAL_TEXT; + case BRT_TEXT: + case BRT_UNKNOWN: + default: + switch (flow) { + case CST_FLOWING: + return PT_FLOWING_TEXT; + case CST_HEADING: + return PT_HEADING_TEXT; + case CST_PULLOUT: + return PT_PULLOUT_TEXT; + default: + ASSERT_HOST(!"Undefined flow type for text!"); + } + } + ASSERT_HOST(!"Should never get here!"); + return PT_NOISE; +} + +// Returns the first and last column touched by this partition. +// resolution refers to the ppi resolution of the image. +void ColPartition::ColumnRange(int resolution, ColPartitionSet* columns, + int* first_col, int* last_col) { + int first_spanned_col = -1; + ColumnSpanningType span_type = + columns->SpanningType(resolution, + bounding_box_.left(), bounding_box_.right(), + std::min(bounding_box_.height(), bounding_box_.width()), + MidY(), left_margin_, right_margin_, + first_col, last_col, + &first_spanned_col); + type_ = PartitionType(span_type); +} + +// Sets the internal flags good_width_ and good_column_. +void ColPartition::SetColumnGoodness(WidthCallback cb) { + int y = MidY(); + int width = RightAtY(y) - LeftAtY(y); + good_width_ = cb(width); + good_column_ = blob_type_ == BRT_TEXT && left_key_tab_ && right_key_tab_; +} + +// Determines whether the blobs in this partition mostly represent +// a leader (fixed pitch sequence) and sets the member blobs accordingly. +// Note that height is assumed to have been tested elsewhere, and that this +// function will find most fixed-pitch text as leader without a height filter. +// Leader detection is limited to sequences of identical width objects, +// such as .... or ----, so patterns, such as .-.-.-.-. will not be found. +bool ColPartition::MarkAsLeaderIfMonospaced() { + bool result = false; + // Gather statistics on the gaps between blobs and the widths of the blobs. + int part_width = bounding_box_.width(); + STATS gap_stats(0, part_width); + STATS width_stats(0, part_width); + BLOBNBOX_C_IT it(&boxes_); + BLOBNBOX* prev_blob = it.data(); + prev_blob->set_flow(BTFT_NEIGHBOURS); + width_stats.add(prev_blob->bounding_box().width(), 1); + int blob_count = 1; + for (it.forward(); !it.at_first(); it.forward()) { + BLOBNBOX* blob = it.data(); + int left = blob->bounding_box().left(); + int right = blob->bounding_box().right(); + gap_stats.add(left - prev_blob->bounding_box().right(), 1); + width_stats.add(right - left, 1); + blob->set_flow(BTFT_NEIGHBOURS); + prev_blob = blob; + ++blob_count; + } + double median_gap = gap_stats.median(); + double median_width = width_stats.median(); + double max_width = std::max(median_gap, median_width); + double min_width = std::min(median_gap, median_width); + double gap_iqr = gap_stats.ile(0.75f) - gap_stats.ile(0.25f); + if (textord_debug_tabfind >= 4) { + tprintf("gap iqr = %g, blob_count=%d, limits=%g,%g\n", + gap_iqr, blob_count, max_width * kMaxLeaderGapFractionOfMax, + min_width * kMaxLeaderGapFractionOfMin); + } + if (gap_iqr < max_width * kMaxLeaderGapFractionOfMax && + gap_iqr < min_width * kMaxLeaderGapFractionOfMin && + blob_count >= kMinLeaderCount) { + // This is stable enough to be called a leader, so check the widths. + // Since leader dashes can join, run a dp cutting algorithm and go + // on the cost. + int offset = static_cast<int>(ceil(gap_iqr * 2)); + int min_step = static_cast<int>(median_gap + median_width + 0.5); + int max_step = min_step + offset; + min_step -= offset; + // Pad the buffer with min_step/2 on each end. + int part_left = bounding_box_.left() - min_step / 2; + part_width += min_step; + auto* projection = new DPPoint[part_width]; + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + BLOBNBOX* blob = it.data(); + int left = blob->bounding_box().left(); + int right = blob->bounding_box().right(); + int height = blob->bounding_box().height(); + for (int x = left; x < right; ++x) { + projection[left - part_left].AddLocalCost(height); + } + } + DPPoint* best_end = DPPoint::Solve(min_step, max_step, false, + &DPPoint::CostWithVariance, + part_width, projection); + if (best_end != nullptr && best_end->total_cost() < blob_count) { + // Good enough. Call it a leader. + result = true; + bool modified_blob_list = false; + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + BLOBNBOX* blob = it.data(); + // If the first or last blob is spaced too much, don't mark it. + if (it.at_first()) { + int gap = it.data_relative(1)->bounding_box().left() - + blob->bounding_box().right(); + if (blob->bounding_box().width() + gap > max_step) { + it.extract(); + modified_blob_list = true; + continue; + } + } + if (it.at_last()) { + int gap = blob->bounding_box().left() - + it.data_relative(-1)->bounding_box().right(); + if (blob->bounding_box().width() + gap > max_step) { + it.extract(); + modified_blob_list = true; + break; + } + } + blob->set_region_type(BRT_TEXT); + blob->set_flow(BTFT_LEADER); + } + if (modified_blob_list) ComputeLimits(); + blob_type_ = BRT_TEXT; + flow_ = BTFT_LEADER; + } else if (textord_debug_tabfind) { + if (best_end == nullptr) { + tprintf("No path\n"); + } else { + tprintf("Total cost = %d vs allowed %d\n", best_end->total_cost(), + blob_count); + } + } + delete [] projection; + } + return result; +} + +// Given the result of TextlineProjection::EvaluateColPartition, (positive for +// horizontal text, negative for vertical text, and near zero for non-text), +// sets the blob_type_ and flow_ for this partition to indicate whether it +// is strongly or weakly vertical or horizontal text, or non-text. +// The function assumes that the blob neighbours are valid (from +// StrokeWidth::SetNeighbours) and that those neighbours have their +// region_type() set. +void ColPartition::SetRegionAndFlowTypesFromProjectionValue(int value) { + int blob_count = 0; // Total # blobs. + int good_blob_score_ = 0; // Total # good strokewidth neighbours. + int noisy_count = 0; // Total # neighbours marked as noise. + int hline_count = 0; + int vline_count = 0; + BLOBNBOX_C_IT it(&boxes_); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + BLOBNBOX* blob = it.data(); + ++blob_count; + noisy_count += blob->NoisyNeighbours(); + good_blob_score_ += blob->GoodTextBlob(); + if (blob->region_type() == BRT_HLINE) ++hline_count; + if (blob->region_type() == BRT_VLINE) ++vline_count; + } + flow_ = BTFT_NEIGHBOURS; + blob_type_ = BRT_UNKNOWN; + if (hline_count > vline_count) { + flow_ = BTFT_NONE; + blob_type_ = BRT_HLINE; + } else if (vline_count > hline_count) { + flow_ = BTFT_NONE; + blob_type_ = BRT_VLINE; + } else if (value < -1 || 1 < value) { + int long_side; + int short_side; + if (value > 0) { + long_side = bounding_box_.width(); + short_side = bounding_box_.height(); + blob_type_ = BRT_TEXT; + } else { + long_side = bounding_box_.height(); + short_side = bounding_box_.width(); + blob_type_ = BRT_VERT_TEXT; + } + // We will combine the old metrics using aspect ratio and blob counts + // with the input value by allowing a strong indication to flip the + // STRONG_CHAIN/CHAIN flow values. + int strong_score = blob_count >= kHorzStrongTextlineCount ? 1 : 0; + if (short_side > kHorzStrongTextlineHeight) ++strong_score; + if (short_side * kHorzStrongTextlineAspect < long_side) ++strong_score; + if (abs(value) >= kMinStrongTextValue) + flow_ = BTFT_STRONG_CHAIN; + else if (abs(value) >= kMinChainTextValue) + flow_ = BTFT_CHAIN; + else + flow_ = BTFT_NEIGHBOURS; + // Upgrade chain to strong chain if the other indicators are good + if (flow_ == BTFT_CHAIN && strong_score == 3) + flow_ = BTFT_STRONG_CHAIN; + // Downgrade strong vertical text to chain if the indicators are bad. + if (flow_ == BTFT_STRONG_CHAIN && value < 0 && strong_score < 2) + flow_ = BTFT_CHAIN; + } + if (flow_ == BTFT_NEIGHBOURS) { + // Check for noisy neighbours. + if (noisy_count >= blob_count) { + flow_ = BTFT_NONTEXT; + blob_type_= BRT_NOISE; + } + } + if (TabFind::WithinTestRegion(2, bounding_box_.left(), + bounding_box_.bottom())) { + tprintf("RegionFlowTypesFromProjectionValue count=%d, noisy=%d, score=%d,", + blob_count, noisy_count, good_blob_score_); + tprintf(" Projection value=%d, flow=%d, blob_type=%d\n", + value, flow_, blob_type_); + Print(); + } + SetBlobTypes(); +} + +// Sets all blobs with the partition blob type and flow, but never overwrite +// leader blobs, as we need to be able to identify them later. +void ColPartition::SetBlobTypes() { + if (!owns_blobs()) + return; + BLOBNBOX_C_IT it(&boxes_); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + BLOBNBOX* blob = it.data(); + if (blob->flow() != BTFT_LEADER) + blob->set_flow(flow_); + blob->set_region_type(blob_type_); + ASSERT_HOST(blob->owner() == nullptr || blob->owner() == this); + } +} + +// Returns true if a decent baseline can be fitted through the blobs. +// Works for both horizontal and vertical text. +bool ColPartition::HasGoodBaseline() { + // Approximation of the baseline. + DetLineFit linepoints; + // Calculation of the mean height on this line segment. Note that these + // variable names apply to the context of a horizontal line, and work + // analogously, rather than literally in the case of a vertical line. + int total_height = 0; + int coverage = 0; + int height_count = 0; + int width = 0; + BLOBNBOX_C_IT it(&boxes_); + TBOX box(it.data()->bounding_box()); + // Accumulate points representing the baseline at the middle of each blob, + // but add an additional point for each end of the line. This makes it + // harder to fit a severe skew angle, as it is most likely not right. + if (IsVerticalType()) { + // For a vertical line, use the right side as the baseline. + ICOORD first_pt(box.right(), box.bottom()); + // Use the bottom-right of the first (bottom) box, the top-right of the + // last, and the middle-right of all others. + linepoints.Add(first_pt); + for (it.forward(); !it.at_last(); it.forward()) { + BLOBNBOX* blob = it.data(); + box = blob->bounding_box(); + ICOORD box_pt(box.right(), (box.top() + box.bottom()) / 2); + linepoints.Add(box_pt); + total_height += box.width(); + coverage += box.height(); + ++height_count; + } + box = it.data()->bounding_box(); + ICOORD last_pt(box.right(), box.top()); + linepoints.Add(last_pt); + width = last_pt.y() - first_pt.y(); + + } else { + // Horizontal lines use the bottom as the baseline. + TBOX box(it.data()->bounding_box()); + // Use the bottom-left of the first box, the the bottom-right of the last, + // and the middle of all others. + ICOORD first_pt(box.left(), box.bottom()); + linepoints.Add(first_pt); + for (it.forward(); !it.at_last(); it.forward()) { + BLOBNBOX* blob = it.data(); + box = blob->bounding_box(); + ICOORD box_pt((box.left() + box.right()) / 2, box.bottom()); + linepoints.Add(box_pt); + total_height += box.height(); + coverage += box.width(); + ++height_count; + } + box = it.data()->bounding_box(); + ICOORD last_pt(box.right(), box.bottom()); + linepoints.Add(last_pt); + width = last_pt.x() - first_pt.x(); + } + // Maximum median error allowed to be a good text line. + if (height_count == 0) + return false; + double max_error = kMaxBaselineError * total_height / height_count; + ICOORD start_pt, end_pt; + double error = linepoints.Fit(&start_pt, &end_pt); + return error < max_error && coverage >= kMinBaselineCoverage * width; +} + +// Adds this ColPartition to a matching WorkingPartSet if one can be found, +// otherwise starts a new one in the appropriate column, ending the previous. +void ColPartition::AddToWorkingSet(const ICOORD& bleft, const ICOORD& tright, + int resolution, + ColPartition_LIST* used_parts, + WorkingPartSet_LIST* working_sets) { + if (block_owned_) + return; // Done it already. + block_owned_ = true; + WorkingPartSet_IT it(working_sets); + // If there is an upper partner use its working_set_ directly. + ColPartition* partner = SingletonPartner(true); + if (partner != nullptr && partner->working_set_ != nullptr) { + working_set_ = partner->working_set_; + working_set_->AddPartition(this); + return; + } + if (partner != nullptr && textord_debug_bugs) { + tprintf("Partition with partner has no working set!:"); + Print(); + partner->Print(); + } + // Search for the column that the left edge fits in. + WorkingPartSet* work_set = nullptr; + it.move_to_first(); + int col_index = 0; + for (it.mark_cycle_pt(); !it.cycled_list() && + col_index != first_column_; + it.forward(), ++col_index); + if (textord_debug_tabfind >= 2) { + tprintf("Match is %s for:", (col_index & 1) ? "Real" : "Between"); + Print(); + } + if (it.cycled_list() && textord_debug_bugs) { + tprintf("Target column=%d, only had %d\n", first_column_, col_index); + } + ASSERT_HOST(!it.cycled_list()); + work_set = it.data(); + // If last_column_ != first_column, then we need to scoop up all blocks + // between here and the last_column_ and put back in work_set. + if (!it.cycled_list() && last_column_ != first_column_ && !IsPulloutType()) { + // Find the column that the right edge falls in. + BLOCK_LIST completed_blocks; + TO_BLOCK_LIST to_blocks; + for (; !it.cycled_list() && col_index <= last_column_; + it.forward(), ++col_index) { + WorkingPartSet* end_set = it.data(); + end_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts, + &completed_blocks, &to_blocks); + } + work_set->InsertCompletedBlocks(&completed_blocks, &to_blocks); + } + working_set_ = work_set; + work_set->AddPartition(this); +} + +// From the given block_parts list, builds one or more BLOCKs and +// corresponding TO_BLOCKs, such that the line spacing is uniform in each. +// Created blocks are appended to the end of completed_blocks and to_blocks. +// The used partitions are put onto used_parts, as they may still be referred +// to in the partition grid. bleft, tright and resolution are the bounds +// and resolution of the original image. +void ColPartition::LineSpacingBlocks(const ICOORD& bleft, const ICOORD& tright, + int resolution, + ColPartition_LIST* block_parts, + ColPartition_LIST* used_parts, + BLOCK_LIST* completed_blocks, + TO_BLOCK_LIST* to_blocks) { + int page_height = tright.y() - bleft.y(); + // Compute the initial spacing stats. + ColPartition_IT it(block_parts); + int part_count = 0; + int max_line_height = 0; + + // TODO(joeliu): We should add some special logic for PT_INLINE_EQUATION type + // because their line spacing with their neighbors maybe smaller and their + // height may be slightly larger. + + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + ColPartition* part = it.data(); + ASSERT_HOST(!part->boxes()->empty()); + STATS side_steps(0, part->bounding_box().height()); + if (part->bounding_box().height() > max_line_height) + max_line_height = part->bounding_box().height(); + BLOBNBOX_C_IT blob_it(part->boxes()); + int prev_bottom = blob_it.data()->bounding_box().bottom(); + for (blob_it.forward(); !blob_it.at_first(); blob_it.forward()) { + BLOBNBOX* blob = blob_it.data(); + int bottom = blob->bounding_box().bottom(); + int step = bottom - prev_bottom; + if (step < 0) + step = -step; + side_steps.add(step, 1); + prev_bottom = bottom; + } + part->set_side_step(static_cast<int>(side_steps.median() + 0.5)); + if (!it.at_last()) { + ColPartition* next_part = it.data_relative(1); + part->set_bottom_spacing(part->median_bottom() - + next_part->median_bottom()); + part->set_top_spacing(part->median_top() - next_part->median_top()); + } else { + part->set_bottom_spacing(page_height); + part->set_top_spacing(page_height); + } + if (textord_debug_tabfind) { + part->Print(); + tprintf("side step = %.2f, top spacing = %d, bottom spacing=%d\n", + side_steps.median(), part->top_spacing(), part->bottom_spacing()); + } + ++part_count; + } + if (part_count == 0) + return; + + SmoothSpacings(resolution, page_height, block_parts); + + // Move the partitions into individual block lists and make the blocks. + BLOCK_IT block_it(completed_blocks); + TO_BLOCK_IT to_block_it(to_blocks); + ColPartition_LIST spacing_parts; + ColPartition_IT sp_block_it(&spacing_parts); + int same_block_threshold = max_line_height * kMaxSameBlockLineSpacing; + for (it.mark_cycle_pt(); !it.empty();) { + ColPartition* part = it.extract(); + sp_block_it.add_to_end(part); + it.forward(); + if (it.empty() || part->bottom_spacing() > same_block_threshold || + !part->SpacingsEqual(*it.data(), resolution)) { + // There is a spacing boundary. Check to see if it.data() belongs + // better in the current block or the next one. + if (!it.empty() && part->bottom_spacing() <= same_block_threshold) { + ColPartition* next_part = it.data(); + // If there is a size match one-way, then the middle line goes with + // its matched size, otherwise it goes with the smallest spacing. + ColPartition* third_part = it.at_last() ? nullptr : it.data_relative(1); + if (textord_debug_tabfind) { + tprintf("Spacings unequal: upper:%d/%d, lower:%d/%d," + " sizes %d %d %d\n", + part->top_spacing(), part->bottom_spacing(), + next_part->top_spacing(), next_part->bottom_spacing(), + part->median_height(), next_part->median_height(), + third_part != nullptr ? third_part->median_height() : 0); + } + // We can only consider adding the next line to the block if the sizes + // match and the lines are close enough for their size. + if (part->SizesSimilar(*next_part) && + next_part->median_height() * kMaxSameBlockLineSpacing > + part->bottom_spacing() && + part->median_height() * kMaxSameBlockLineSpacing > + part->top_spacing()) { + // Even now, we can only add it as long as the third line doesn't + // match in the same way and have a smaller bottom spacing. + if (third_part == nullptr || + !next_part->SizesSimilar(*third_part) || + third_part->median_height() * kMaxSameBlockLineSpacing <= + next_part->bottom_spacing() || + next_part->median_height() * kMaxSameBlockLineSpacing <= + next_part->top_spacing() || + next_part->bottom_spacing() > part->bottom_spacing()) { + // Add to the current block. + sp_block_it.add_to_end(it.extract()); + it.forward(); + if (textord_debug_tabfind) { + tprintf("Added line to current block.\n"); + } + } + } + } + TO_BLOCK* to_block = MakeBlock(bleft, tright, &spacing_parts, used_parts); + if (to_block != nullptr) { + to_block_it.add_to_end(to_block); + block_it.add_to_end(to_block->block); + } + sp_block_it.set_to_list(&spacing_parts); + } else { + if (textord_debug_tabfind && !it.empty()) { + ColPartition* next_part = it.data(); + tprintf("Spacings equal: upper:%d/%d, lower:%d/%d, median:%d/%d\n", + part->top_spacing(), part->bottom_spacing(), + next_part->top_spacing(), next_part->bottom_spacing(), + part->median_height(), next_part->median_height()); + } + } + } +} + +// Helper function to clip the input pos to the given bleft, tright bounds. +static void ClipCoord(const ICOORD& bleft, const ICOORD& tright, ICOORD* pos) { + if (pos->x() < bleft.x()) + pos->set_x(bleft.x()); + if (pos->x() > tright.x()) + pos->set_x(tright.x()); + if (pos->y() < bleft.y()) + pos->set_y(bleft.y()); + if (pos->y() > tright.y()) + pos->set_y(tright.y()); +} + +// Helper moves the blobs from the given list of block_parts into the block +// itself. Sets up the block for (old) textline formation correctly for +// vertical and horizontal text. The partitions are moved to used_parts +// afterwards, as they cannot be deleted yet. +static TO_BLOCK* MoveBlobsToBlock(bool vertical_text, int line_spacing, + BLOCK* block, + ColPartition_LIST* block_parts, + ColPartition_LIST* used_parts) { + // Make a matching TO_BLOCK and put all the BLOBNBOXes from the parts in it. + // Move all the parts to a done list as they are no longer needed, except + // that have have to continue to exist until the part grid is deleted. + // Compute the median blob size as we go, as the block needs to know. + TBOX block_box(block->pdblk.bounding_box()); + STATS sizes(0, std::max(block_box.width(), block_box.height())); + bool text_type = block->pdblk.poly_block()->IsText(); + ColPartition_IT it(block_parts); + auto* to_block = new TO_BLOCK(block); + BLOBNBOX_IT blob_it(&to_block->blobs); + ColPartition_IT used_it(used_parts); + for (it.move_to_first(); !it.empty(); it.forward()) { + ColPartition* part = it.extract(); + // Transfer blobs from all regions to the output blocks. + // Blobs for non-text regions will be used to define the polygonal + // bounds of the region. + for (BLOBNBOX_C_IT bb_it(part->boxes()); !bb_it.empty(); + bb_it.forward()) { + BLOBNBOX* bblob = bb_it.extract(); + if (bblob->owner() != part) { + tprintf("Ownership incorrect for blob:"); + bblob->bounding_box().print(); + tprintf("Part="); + part->Print(); + if (bblob->owner() == nullptr) { + tprintf("Not owned\n"); + } else { + tprintf("Owner part:"); + bblob->owner()->Print(); + } + } + ASSERT_HOST(bblob->owner() == part); + // Assert failure here is caused by arbitrarily changing the partition + // type without also changing the blob type, such as in + // InsertSmallBlobsAsUnknowns. + ASSERT_HOST(!text_type || bblob->region_type() >= BRT_UNKNOWN); + C_OUTLINE_LIST* outlines = bblob->cblob()->out_list(); + C_OUTLINE_IT ol_it(outlines); + ASSERT_HOST(!text_type || ol_it.data()->pathlength() > 0); + if (vertical_text) + sizes.add(bblob->bounding_box().width(), 1); + else + sizes.add(bblob->bounding_box().height(), 1); + blob_it.add_after_then_move(bblob); + } + used_it.add_to_end(part); + } + if (text_type && blob_it.empty()) { + delete block; + delete to_block; + return nullptr; + } + to_block->line_size = sizes.median(); + if (vertical_text) { + int block_width = block->pdblk.bounding_box().width(); + if (block_width < line_spacing) + line_spacing = block_width; + to_block->line_spacing = static_cast<float>(line_spacing); + to_block->max_blob_size = static_cast<float>(block_width + 1); + } else { + int block_height = block->pdblk.bounding_box().height(); + if (block_height < line_spacing) + line_spacing = block_height; + to_block->line_spacing = static_cast<float>(line_spacing); + to_block->max_blob_size = static_cast<float>(block_height + 1); + } + return to_block; +} + +// Constructs a block from the given list of partitions. +// Arguments are as LineSpacingBlocks above. +TO_BLOCK* ColPartition::MakeBlock(const ICOORD& bleft, const ICOORD& tright, + ColPartition_LIST* block_parts, + ColPartition_LIST* used_parts) { + if (block_parts->empty()) + return nullptr; // Nothing to do. + // If the block_parts are not in reading order, then it will make an invalid + // block polygon and bounding_box, so sort by bounding box now just to make + // sure. + block_parts->sort(&ColPartition::SortByBBox); + ColPartition_IT it(block_parts); + ColPartition* part = it.data(); + PolyBlockType type = part->type(); + if (type == PT_VERTICAL_TEXT) + return MakeVerticalTextBlock(bleft, tright, block_parts, used_parts); + // LineSpacingBlocks has handed us a collection of evenly spaced lines and + // put the average spacing in each partition, so we can just take the + // linespacing from the first partition. + int line_spacing = part->bottom_spacing(); + if (line_spacing < part->median_height()) + line_spacing = part->bounding_box().height(); + ICOORDELT_LIST vertices; + ICOORDELT_IT vert_it(&vertices); + ICOORD start, end; + int min_x = INT32_MAX; + int max_x = -INT32_MAX; + int min_y = INT32_MAX; + int max_y = -INT32_MAX; + int iteration = 0; + do { + if (iteration == 0) + ColPartition::LeftEdgeRun(&it, &start, &end); + else + ColPartition::RightEdgeRun(&it, &start, &end); + ClipCoord(bleft, tright, &start); + ClipCoord(bleft, tright, &end); + vert_it.add_after_then_move(new ICOORDELT(start)); + vert_it.add_after_then_move(new ICOORDELT(end)); + UpdateRange(start.x(), &min_x, &max_x); + UpdateRange(end.x(), &min_x, &max_x); + UpdateRange(start.y(), &min_y, &max_y); + UpdateRange(end.y(), &min_y, &max_y); + if ((iteration == 0 && it.at_first()) || + (iteration == 1 && it.at_last())) { + ++iteration; + it.move_to_last(); + } + } while (iteration < 2); + if (textord_debug_tabfind) + tprintf("Making block at (%d,%d)->(%d,%d)\n", + min_x, min_y, max_x, max_y); + auto* block = new BLOCK("", true, 0, 0, min_x, min_y, max_x, max_y); + block->pdblk.set_poly_block(new POLY_BLOCK(&vertices, type)); + return MoveBlobsToBlock(false, line_spacing, block, block_parts, used_parts); +} + +// Constructs a block from the given list of vertical text partitions. +// Currently only creates rectangular blocks. +TO_BLOCK* ColPartition::MakeVerticalTextBlock(const ICOORD& bleft, + const ICOORD& tright, + ColPartition_LIST* block_parts, + ColPartition_LIST* used_parts) { + if (block_parts->empty()) + return nullptr; // Nothing to do. + ColPartition_IT it(block_parts); + ColPartition* part = it.data(); + TBOX block_box = part->bounding_box(); + int line_spacing = block_box.width(); + PolyBlockType type = it.data()->type(); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + block_box += it.data()->bounding_box(); + } + if (textord_debug_tabfind) { + tprintf("Making block at:"); + block_box.print(); + } + auto* block = new BLOCK("", true, 0, 0, block_box.left(), block_box.bottom(), + block_box.right(), block_box.top()); + block->pdblk.set_poly_block(new POLY_BLOCK(block_box, type)); + return MoveBlobsToBlock(true, line_spacing, block, block_parts, used_parts); +} + +// Makes a TO_ROW matching this and moves all the blobs to it, transferring +// ownership to to returned TO_ROW. +TO_ROW* ColPartition::MakeToRow() { + BLOBNBOX_C_IT blob_it(&boxes_); + TO_ROW* row = nullptr; + int line_size = IsVerticalType() ? median_width_ : median_height_; + // Add all the blobs to a single TO_ROW. + for (; !blob_it.empty(); blob_it.forward()) { + BLOBNBOX* blob = blob_it.extract(); +// blob->compute_bounding_box(); + int top = blob->bounding_box().top(); + int bottom = blob->bounding_box().bottom(); + if (row == nullptr) { + row = new TO_ROW(blob, static_cast<float>(top), + static_cast<float>(bottom), + static_cast<float>(line_size)); + } else { + row->add_blob(blob, static_cast<float>(top), + static_cast<float>(bottom), + static_cast<float>(line_size)); + } + } + return row; +} + +// Returns a copy of everything except the list of boxes. The resulting +// ColPartition is only suitable for keeping in a column candidate list. +ColPartition* ColPartition::ShallowCopy() const { + auto* part = new ColPartition(blob_type_, vertical_); + part->left_margin_ = left_margin_; + part->right_margin_ = right_margin_; + part->bounding_box_ = bounding_box_; + memcpy(part->special_blobs_densities_, special_blobs_densities_, + sizeof(special_blobs_densities_)); + part->median_bottom_ = median_bottom_; + part->median_top_ = median_top_; + part->median_height_ = median_height_; + part->median_left_ = median_left_; + part->median_right_ = median_right_; + part->median_width_ = median_width_; + part->good_width_ = good_width_; + part->good_column_ = good_column_; + part->left_key_tab_ = left_key_tab_; + part->right_key_tab_ = right_key_tab_; + part->type_ = type_; + part->flow_ = flow_; + part->left_key_ = left_key_; + part->right_key_ = right_key_; + part->first_column_ = first_column_; + part->last_column_ = last_column_; + part->owns_blobs_ = false; + return part; +} + +ColPartition* ColPartition::CopyButDontOwnBlobs() { + ColPartition* copy = ShallowCopy(); + copy->set_owns_blobs(false); + BLOBNBOX_C_IT inserter(copy->boxes()); + BLOBNBOX_C_IT traverser(boxes()); + for (traverser.mark_cycle_pt(); !traverser.cycled_list(); traverser.forward()) + inserter.add_after_then_move(traverser.data()); + return copy; +} + +#ifndef GRAPHICS_DISABLED +// Provides a color for BBGrid to draw the rectangle. +// Must be kept in sync with PolyBlockType. +ScrollView::Color ColPartition::BoxColor() const { + if (type_ == PT_UNKNOWN) + return BLOBNBOX::TextlineColor(blob_type_, flow_); + return POLY_BLOCK::ColorForPolyBlockType(type_); +} +#endif // !GRAPHICS_DISABLED + +// Keep in sync with BlobRegionType. +static char kBlobTypes[BRT_COUNT + 1] = "NHSRIUVT"; + +// Prints debug information on this. +void ColPartition::Print() const { + int y = MidY(); + tprintf("ColPart:%c(M%d-%c%d-B%d/%d,%d/%d)->(%dB-%d%c-%dM/%d,%d/%d)" + " w-ok=%d, v-ok=%d, type=%d%c%d, fc=%d, lc=%d, boxes=%d" + " ts=%d bs=%d ls=%d rs=%d\n", + boxes_.empty() ? 'E' : ' ', + left_margin_, left_key_tab_ ? 'T' : 'B', LeftAtY(y), + bounding_box_.left(), median_left_, + bounding_box_.bottom(), median_bottom_, + bounding_box_.right(), RightAtY(y), right_key_tab_ ? 'T' : 'B', + right_margin_, median_right_, bounding_box_.top(), median_top_, + good_width_, good_column_, type_, + kBlobTypes[blob_type_], flow_, + first_column_, last_column_, boxes_.length(), + space_above_, space_below_, space_to_left_, space_to_right_); +} + +// Prints debug information on the colors. +void ColPartition::PrintColors() { + tprintf("Colors:(%d, %d, %d)%d -> (%d, %d, %d)\n", + color1_[COLOR_RED], color1_[COLOR_GREEN], color1_[COLOR_BLUE], + color1_[L_ALPHA_CHANNEL], + color2_[COLOR_RED], color2_[COLOR_GREEN], color2_[COLOR_BLUE]); +} + +// Sets the types of all partitions in the run to be the max of the types. +void ColPartition::SmoothPartnerRun(int working_set_count) { + STATS left_stats(0, working_set_count); + STATS right_stats(0, working_set_count); + PolyBlockType max_type = type_; + ColPartition* partner; + for (partner = SingletonPartner(false); partner != nullptr; + partner = partner->SingletonPartner(false)) { + if (partner->type_ > max_type) + max_type = partner->type_; + if (column_set_ == partner->column_set_) { + left_stats.add(partner->first_column_, 1); + right_stats.add(partner->last_column_, 1); + } + } + type_ = max_type; + // TODO(rays) Either establish that it isn't necessary to set the columns, + // or find a way to do it that does not cause an assert failure in + // AddToWorkingSet. +#if 0 + first_column_ = left_stats.mode(); + last_column_ = right_stats.mode(); + if (last_column_ < first_column_) + last_column_ = first_column_; +#endif + + for (partner = SingletonPartner(false); partner != nullptr; + partner = partner->SingletonPartner(false)) { + partner->type_ = max_type; +#if 0 // See TODO above + if (column_set_ == partner->column_set_) { + partner->first_column_ = first_column_; + partner->last_column_ = last_column_; + } +#endif + } +} + +// ======= Scenario common to all Refine*Partners* functions ======= +// ColPartitions are aiming to represent textlines, or horizontal slices +// of images, and we are trying to form bi-directional (upper/lower) chains +// of UNIQUE partner ColPartitions that can be made into blocks. +// The ColPartitions have previously been typed (see SetPartitionType) +// according to a combination of the content type and +// how they lie on the columns. We want to chain text into +// groups of a single type, but image ColPartitions may have been typed +// differently in different parts of the image, due to being non-rectangular. +// +// We previously ran a search for upper and lower partners, but there may +// be more than one, and they may be of mixed types, so now we wish to +// refine the partners down to at most one. +// A heading may have multiple partners: +// =============================== +// ======== ========== ========= +// ======== ========== ========= +// but it should be a different type. +// A regular flowing text line may have multiple partners: +// ================== =================== +// ======= ================= =========== +// This could be the start of a pull-out, or it might all be in a single +// column and might be caused by tightly spaced text, bold words, bullets, +// funny punctuation etc, all of which can cause textlines to be split into +// multiple ColPartitions. Pullouts and figure captions should now be different +// types so we can more aggressively merge groups of partners that all sit +// in a single column. +// +// Cleans up the partners of the given type so that there is at most +// one partner. This makes block creation simpler. +// If get_desperate is true, goes to more desperate merge methods +// to merge flowing text before breaking partnerships. +void ColPartition::RefinePartners(PolyBlockType type, bool get_desperate, + ColPartitionGrid* grid) { + if (TypesSimilar(type_, type)) { + RefinePartnersInternal(true, get_desperate, grid); + RefinePartnersInternal(false, get_desperate, grid); + } else if (type == PT_COUNT) { + // This is the final pass. Make sure only the correctly typed + // partners surivive, however many there are. + RefinePartnersByType(true, &upper_partners_); + RefinePartnersByType(false, &lower_partners_); + // It is possible for a merge to have given a partition multiple + // partners again, so the last resort is to use overlap which is + // guaranteed to leave at most one partner left. + if (!upper_partners_.empty() && !upper_partners_.singleton()) + RefinePartnersByOverlap(true, &upper_partners_); + if (!lower_partners_.empty() && !lower_partners_.singleton()) + RefinePartnersByOverlap(false, &lower_partners_); + } +} + +////////////////// PRIVATE CODE ///////////////////////////// + +// Cleans up the partners above if upper is true, else below. +// If get_desperate is true, goes to more desperate merge methods +// to merge flowing text before breaking partnerships. +void ColPartition::RefinePartnersInternal(bool upper, bool get_desperate, + ColPartitionGrid* grid) { + ColPartition_CLIST* partners = upper ? &upper_partners_ : &lower_partners_; + if (!partners->empty() && !partners->singleton()) { + RefinePartnersByType(upper, partners); + if (!partners->empty() && !partners->singleton()) { + // Check for transitive partnerships and break the cycle. + RefinePartnerShortcuts(upper, partners); + if (!partners->empty() && !partners->singleton()) { + // Types didn't fix it. Flowing text keeps the one with the longest + // sequence of singleton matching partners. All others max overlap. + if (TypesSimilar(type_, PT_FLOWING_TEXT) && get_desperate) { + RefineTextPartnersByMerge(upper, false, partners, grid); + if (!partners->empty() && !partners->singleton()) + RefineTextPartnersByMerge(upper, true, partners, grid); + } + // The last resort is to use overlap. + if (!partners->empty() && !partners->singleton()) + RefinePartnersByOverlap(upper, partners); + } + } + } +} + +// Cleans up the partners above if upper is true, else below. +// Restricts the partners to only desirable types. For text and BRT_HLINE this +// means the same type_ , and for image types it means any image type. +void ColPartition::RefinePartnersByType(bool upper, + ColPartition_CLIST* partners) { + bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(), + bounding_box_.bottom()); + if (debug) { + tprintf("Refining %d %s partners by type for:\n", + partners->length(), upper ? "Upper" : "Lower"); + Print(); + } + ColPartition_C_IT it(partners); + // Purify text by type. + if (!IsImageType() && !IsLineType() && type() != PT_TABLE) { + // Keep only partners matching type_. + // Exception: PT_VERTICAL_TEXT is allowed to stay with the other + // text types if it is the only partner. + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + ColPartition* partner = it.data(); + if (!TypesSimilar(type_, partner->type_)) { + if (debug) { + tprintf("Removing partner:"); + partner->Print(); + } + partner->RemovePartner(!upper, this); + it.extract(); + } else if (debug) { + tprintf("Keeping partner:"); + partner->Print(); + } + } + } else { + // Only polyimages are allowed to have partners of any kind! + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + ColPartition* partner = it.data(); + if (partner->blob_type() != BRT_POLYIMAGE || + blob_type() != BRT_POLYIMAGE) { + if (debug) { + tprintf("Removing partner:"); + partner->Print(); + } + partner->RemovePartner(!upper, this); + it.extract(); + } else if (debug) { + tprintf("Keeping partner:"); + partner->Print(); + } + } + } +} + +// Cleans up the partners above if upper is true, else below. +// Remove transitive partnerships: this<->a, and a<->b and this<->b. +// Gets rid of this<->b, leaving a clean chain. +// Also if we have this<->a and a<->this, then gets rid of this<->a, as +// this has multiple partners. +void ColPartition::RefinePartnerShortcuts(bool upper, + ColPartition_CLIST* partners) { + bool done_any = false; + do { + done_any = false; + ColPartition_C_IT it(partners); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + ColPartition* a = it.data(); + // Check for a match between all of a's partners (it1/b1) and all + // of this's partners (it2/b2). + ColPartition_C_IT it1(upper ? &a->upper_partners_ : &a->lower_partners_); + for (it1.mark_cycle_pt(); !it1.cycled_list(); it1.forward()) { + ColPartition* b1 = it1.data(); + if (b1 == this) { + done_any = true; + it.extract(); + a->RemovePartner(!upper, this); + break; + } + ColPartition_C_IT it2(partners); + for (it2.mark_cycle_pt(); !it2.cycled_list(); it2.forward()) { + ColPartition* b2 = it2.data(); + if (b1 == b2) { + // Jackpot! b2 should not be a partner of this. + it2.extract(); + b2->RemovePartner(!upper, this); + done_any = true; + // That potentially invalidated all the iterators, so break out + // and start again. + break; + } + } + if (done_any) + break; + } + if (done_any) + break; + } + } while (done_any && !partners->empty() && !partners->singleton()); +} + +// Cleans up the partners above if upper is true, else below. +// If multiple text partners can be merged, (with each other, NOT with this), +// then do so. +// If desperate is true, then an increase in overlap with the merge is +// allowed. If the overlap increases, then the desperately_merged_ flag +// is set, indicating that the textlines probably need to be regenerated +// by aggressive line fitting/splitting, as there are probably vertically +// joined blobs that cross textlines. +void ColPartition::RefineTextPartnersByMerge(bool upper, bool desperate, + ColPartition_CLIST* partners, + ColPartitionGrid* grid) { + bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(), + bounding_box_.bottom()); + if (debug) { + tprintf("Refining %d %s partners by merge for:\n", + partners->length(), upper ? "Upper" : "Lower"); + Print(); + } + while (!partners->empty() && !partners->singleton()) { + // Absorb will mess up the iterators, so we have to merge one partition + // at a time and rebuild the iterators each time. + ColPartition_C_IT it(partners); + ColPartition* part = it.data(); + // Gather a list of merge candidates, from the list of partners, that + // are all in the same single column. See general scenario comment above. + ColPartition_CLIST candidates; + ColPartition_C_IT cand_it(&candidates); + for (it.forward(); !it.at_first(); it.forward()) { + ColPartition* candidate = it.data(); + if (part->first_column_ == candidate->last_column_ && + part->last_column_ == candidate->first_column_) + cand_it.add_after_then_move(it.data()); + } + int overlap_increase; + ColPartition* candidate = grid->BestMergeCandidate(part, &candidates, debug, + nullptr, &overlap_increase); + if (candidate != nullptr && (overlap_increase <= 0 || desperate)) { + if (debug) { + tprintf("Merging:hoverlap=%d, voverlap=%d, OLI=%d\n", + part->HCoreOverlap(*candidate), part->VCoreOverlap(*candidate), + overlap_increase); + } + // Remove before merge and re-insert to keep the integrity of the grid. + grid->RemoveBBox(candidate); + grid->RemoveBBox(part); + part->Absorb(candidate, nullptr); + // We modified the box of part, so re-insert it into the grid. + grid->InsertBBox(true, true, part); + if (overlap_increase > 0) + part->desperately_merged_ = true; + } else { + break; // Can't merge. + } + } +} + +// Cleans up the partners above if upper is true, else below. +// Keep the partner with the biggest overlap. +void ColPartition::RefinePartnersByOverlap(bool upper, + ColPartition_CLIST* partners) { + bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(), + bounding_box_.bottom()); + if (debug) { + tprintf("Refining %d %s partners by overlap for:\n", + partners->length(), upper ? "Upper" : "Lower"); + Print(); + } + ColPartition_C_IT it(partners); + ColPartition* best_partner = it.data(); + // Find the partner with the best overlap. + int best_overlap = 0; + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + ColPartition* partner = it.data(); + int overlap = std::min(bounding_box_.right(), partner->bounding_box_.right()) + - std::max(bounding_box_.left(), partner->bounding_box_.left()); + if (overlap > best_overlap) { + best_overlap = overlap; + best_partner = partner; + } + } + // Keep only the best partner. + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + ColPartition* partner = it.data(); + if (partner != best_partner) { + if (debug) { + tprintf("Removing partner:"); + partner->Print(); + } + partner->RemovePartner(!upper, this); + it.extract(); + } + } +} + +// Return true if bbox belongs better in this than other. +bool ColPartition::ThisPartitionBetter(BLOBNBOX* bbox, + const ColPartition& other) { + const TBOX& box = bbox->bounding_box(); + // Margins take priority. + int left = box.left(); + int right = box.right(); + if (left < left_margin_ || right > right_margin_) + return false; + if (left < other.left_margin_ || right > other.right_margin_) + return true; + int top = box.top(); + int bottom = box.bottom(); + int this_overlap = std::min(top, median_top_) - std::max(bottom, median_bottom_); + int other_overlap = std::min(top, other.median_top_) - + std::max(bottom, other.median_bottom_); + int this_miss = median_top_ - median_bottom_ - this_overlap; + int other_miss = other.median_top_ - other.median_bottom_ - other_overlap; + if (TabFind::WithinTestRegion(3, box.left(), box.bottom())) { + tprintf("Unique on (%d,%d)->(%d,%d) overlap %d/%d, miss %d/%d, mt=%d/%d\n", + box.left(), box.bottom(), box.right(), box.top(), + this_overlap, other_overlap, this_miss, other_miss, + median_top_, other.median_top_); + } + if (this_miss < other_miss) + return true; + if (this_miss > other_miss) + return false; + if (this_overlap > other_overlap) + return true; + if (this_overlap < other_overlap) + return false; + return median_top_ >= other.median_top_; +} + +// Returns the median line-spacing between the current position and the end +// of the list. +// The iterator is passed by value so the iteration does not modify the +// caller's iterator. +static int MedianSpacing(int page_height, ColPartition_IT it) { + STATS stats(0, page_height); + while (!it.cycled_list()) { + ColPartition* part = it.data(); + it.forward(); + stats.add(part->bottom_spacing(), 1); + stats.add(part->top_spacing(), 1); + } + return static_cast<int>(stats.median() + 0.5); +} + +// Returns true if this column partition is in the same column as +// part. This function will only work after the SetPartitionType function +// has been called on both column partitions. This is useful for +// doing a SideSearch when you want things in the same page column. +// +// Currently called by the table detection code to identify if potential table +// partitions exist in the same column. +bool ColPartition::IsInSameColumnAs(const ColPartition& part) const { + // Overlap does not occur when last < part.first or first > part.last. + // In other words, one is completely to the side of the other. + // This is just DeMorgan's law applied to that so the function returns true. + return (last_column_ >= part.first_column_) && + (first_column_ <= part.last_column_); +} + +// Smoothes the spacings in the list into groups of equal linespacing. +// resolution is the resolution of the original image, used as a basis +// for thresholds in change of spacing. page_height is in pixels. +void ColPartition::SmoothSpacings(int resolution, int page_height, + ColPartition_LIST* parts) { + // The task would be trivial if we didn't have to allow for blips - + // occasional offsets in spacing caused by anomalous text, such as all + // caps, groups of descenders, joined words, Arabic etc. + // The neighbourhood stores a consecutive group of partitions so that + // blips can be detected correctly, yet conservatively enough to not + // mistake genuine spacing changes for blips. See example below. + ColPartition* neighbourhood[PN_COUNT]; + ColPartition_IT it(parts); + it.mark_cycle_pt(); + // Although we know nothing about the spacings is this list, the median is + // used as an approximation to allow blips. + // If parts of this block aren't spaced to the median, then we can't + // accept blips in those parts, but we'll recalculate it each time we + // split the block, so the median becomes more likely to match all the text. + int median_space = MedianSpacing(page_height, it); + ColPartition_IT start_it(it); + ColPartition_IT end_it(it); + for (int i = 0; i < PN_COUNT; ++i) { + if (i < PN_UPPER || it.cycled_list()) { + neighbourhood[i] = nullptr; + } else { + if (i == PN_LOWER) + end_it = it; + neighbourhood[i] = it.data(); + it.forward(); + } + } + while (neighbourhood[PN_UPPER] != nullptr) { + // Test for end of a group. Normally SpacingsEqual is true within a group, + // but in the case of a blip, it will be false. Here is an example: + // Line enum Spacing below (spacing between tops of lines) + // 1 ABOVE2 20 + // 2 ABOVE1 20 + // 3 UPPER 15 + // 4 LOWER 25 + // 5 BELOW1 20 + // 6 BELOW2 20 + // Line 4 is all in caps (regular caps), so the spacing between line 3 + // and line 4 (looking at the tops) is smaller than normal, and the + // spacing between line 4 and line 5 is larger than normal, but the + // two of them add to twice the normal spacing. + // The following if has to accept unequal spacings 3 times to pass the + // blip (20/15, 15/25 and 25/20) + // When the blip is in the middle, OKSpacingBlip tests that one of + // ABOVE1 and BELOW1 matches the median. + // The first time, everything is shifted down 1, so we present + // OKSpacingBlip with neighbourhood+1 and check that PN_UPPER is median. + // The last time, everything is shifted up 1, so we present OKSpacingBlip + // with neighbourhood-1 and check that PN_LOWER matches the median. + if (neighbourhood[PN_LOWER] == nullptr || + (!neighbourhood[PN_UPPER]->SpacingsEqual(*neighbourhood[PN_LOWER], + resolution) && + (neighbourhood[PN_UPPER] == nullptr || + neighbourhood[PN_LOWER] == nullptr || + !OKSpacingBlip(resolution, median_space, neighbourhood, 0)) && + (neighbourhood[PN_UPPER - 1] == nullptr || + neighbourhood[PN_LOWER - 1] == nullptr || + !OKSpacingBlip(resolution, median_space, neighbourhood, -1) || + !neighbourhood[PN_LOWER]->SpacingEqual(median_space, resolution)) && + (neighbourhood[PN_UPPER + 1] == nullptr || + neighbourhood[PN_LOWER + 1] == nullptr || + !OKSpacingBlip(resolution, median_space, neighbourhood, 1) || + !neighbourhood[PN_UPPER]->SpacingEqual(median_space, resolution)))) { + // The group has ended. PN_UPPER is the last member. + // Compute the mean spacing over the group. + ColPartition_IT sum_it(start_it); + ColPartition* last_part = neighbourhood[PN_UPPER]; + double total_bottom = 0.0; + double total_top = 0.0; + int total_count = 0; + ColPartition* upper = sum_it.data(); + // We do not process last_part, as its spacing is different. + while (upper != last_part) { + total_bottom += upper->bottom_spacing(); + total_top += upper->top_spacing(); + ++total_count; + sum_it.forward(); + upper = sum_it.data(); + } + if (total_count > 0) { + // There were at least 2 lines, so set them all to the mean. + int top_spacing = static_cast<int>(total_top / total_count + 0.5); + int bottom_spacing = static_cast<int>(total_bottom / total_count + 0.5); + if (textord_debug_tabfind) { + tprintf("Spacing run ended. Cause:"); + if (neighbourhood[PN_LOWER] == nullptr) { + tprintf("No more lines\n"); + } else { + tprintf("Spacing change. Spacings:\n"); + for (int i = 0; i < PN_COUNT; ++i) { + if (neighbourhood[i] == nullptr) { + tprintf("NULL"); + if (i > 0 && neighbourhood[i - 1] != nullptr) { + if (neighbourhood[i - 1]->SingletonPartner(false) != nullptr) { + tprintf(" Lower partner:"); + neighbourhood[i - 1]->SingletonPartner(false)->Print(); + } else { + tprintf(" nullptr lower partner:\n"); + } + } else { + tprintf("\n"); + } + } else { + tprintf("Top = %d, bottom = %d\n", + neighbourhood[i]->top_spacing(), + neighbourhood[i]->bottom_spacing()); + } + } + } + tprintf("Mean spacing = %d/%d\n", top_spacing, bottom_spacing); + } + sum_it = start_it; + upper = sum_it.data(); + while (upper != last_part) { + upper->set_top_spacing(top_spacing); + upper->set_bottom_spacing(bottom_spacing); + if (textord_debug_tabfind) { + tprintf("Setting mean on:"); + upper->Print(); + } + sum_it.forward(); + upper = sum_it.data(); + } + } + // PN_LOWER starts the next group and end_it is the next start_it. + start_it = end_it; + // Recalculate the median spacing to maximize the chances of detecting + // spacing blips. + median_space = MedianSpacing(page_height, end_it); + } + // Shuffle pointers. + for (int j = 1; j < PN_COUNT; ++j) { + neighbourhood[j - 1] = neighbourhood[j]; + } + if (it.cycled_list()) { + neighbourhood[PN_COUNT - 1] = nullptr; + } else { + neighbourhood[PN_COUNT - 1] = it.data(); + it.forward(); + } + end_it.forward(); + } +} + +// Returns true if the parts array of pointers to partitions matches the +// condition for a spacing blip. See SmoothSpacings for what this means +// and how it is used. +bool ColPartition::OKSpacingBlip(int resolution, int median_spacing, + ColPartition** parts, int offset) { + // The blip is OK if upper and lower sum to an OK value and at least + // one of above1 and below1 is equal to the median. + parts += offset; + return parts[PN_UPPER]->SummedSpacingOK(*parts[PN_LOWER], + median_spacing, resolution) && + ((parts[PN_ABOVE1] != nullptr && + parts[PN_ABOVE1]->SpacingEqual(median_spacing, resolution)) || + (parts[PN_BELOW1] != nullptr && + parts[PN_BELOW1]->SpacingEqual(median_spacing, resolution))); +} + +// Returns true if both the top and bottom spacings of this match the given +// spacing to within suitable margins dictated by the image resolution. +bool ColPartition::SpacingEqual(int spacing, int resolution) const { + int bottom_error = BottomSpacingMargin(resolution); + int top_error = TopSpacingMargin(resolution); + return NearlyEqual(bottom_spacing_, spacing, bottom_error) && + NearlyEqual(top_spacing_, spacing, top_error); +} + +// Returns true if both the top and bottom spacings of this and other +// match to within suitable margins dictated by the image resolution. +bool ColPartition::SpacingsEqual(const ColPartition& other, + int resolution) const { + int bottom_error = std::max(BottomSpacingMargin(resolution), + other.BottomSpacingMargin(resolution)); + int top_error = std::max(TopSpacingMargin(resolution), + other.TopSpacingMargin(resolution)); + return NearlyEqual(bottom_spacing_, other.bottom_spacing_, bottom_error) && + (NearlyEqual(top_spacing_, other.top_spacing_, top_error) || + NearlyEqual(top_spacing_ + other.top_spacing_, bottom_spacing_ * 2, + bottom_error)); +} + +// Returns true if the sum spacing of this and other match the given +// spacing (or twice the given spacing) to within a suitable margin dictated +// by the image resolution. +bool ColPartition::SummedSpacingOK(const ColPartition& other, + int spacing, int resolution) const { + int bottom_error = std::max(BottomSpacingMargin(resolution), + other.BottomSpacingMargin(resolution)); + int top_error = std::max(TopSpacingMargin(resolution), + other.TopSpacingMargin(resolution)); + int bottom_total = bottom_spacing_ + other.bottom_spacing_; + int top_total = top_spacing_ + other.top_spacing_; + return (NearlyEqual(spacing, bottom_total, bottom_error) && + NearlyEqual(spacing, top_total, top_error)) || + (NearlyEqual(spacing * 2, bottom_total, bottom_error) && + NearlyEqual(spacing * 2, top_total, top_error)); +} + +// Returns a suitable spacing margin that can be applied to bottoms of +// text lines, based on the resolution and the stored side_step_. +int ColPartition::BottomSpacingMargin(int resolution) const { + return static_cast<int>(kMaxSpacingDrift * resolution + 0.5) + side_step_; +} + +// Returns a suitable spacing margin that can be applied to tops of +// text lines, based on the resolution and the stored side_step_. +int ColPartition::TopSpacingMargin(int resolution) const { + return static_cast<int>(kMaxTopSpacingFraction * median_height_ + 0.5) + + BottomSpacingMargin(resolution); +} + +// Returns true if the median text sizes of this and other agree to within +// a reasonable multiplicative factor. +bool ColPartition::SizesSimilar(const ColPartition& other) const { + return median_height_ <= other.median_height_ * kMaxSizeRatio && + other.median_height_ <= median_height_ * kMaxSizeRatio; +} + +// Helper updates margin_left and margin_right, being the bounds of the left +// margin of part of a block. Returns false and does not update the bounds if +// this partition has a disjoint margin with the established margin. +static bool UpdateLeftMargin(const ColPartition& part, + int* margin_left, int* margin_right) { + const TBOX& part_box = part.bounding_box(); + int top = part_box.top(); + int bottom = part_box.bottom(); + int tl_key = part.SortKey(part.left_margin(), top); + int tr_key = part.SortKey(part_box.left(), top); + int bl_key = part.SortKey(part.left_margin(), bottom); + int br_key = part.SortKey(part_box.left(), bottom); + int left_key = std::max(tl_key, bl_key); + int right_key = std::min(tr_key, br_key); + if (left_key <= *margin_right && right_key >= *margin_left) { + // This part is good - let's keep it. + *margin_right = std::min(*margin_right, right_key); + *margin_left = std::max(*margin_left, left_key); + return true; + } + return false; +} + +// Computes and returns in start, end a line segment formed from a +// forwards-iterated group of left edges of partitions that satisfy the +// condition that the intersection of the left margins is non-empty, ie the +// rightmost left margin is to the left of the leftmost left bounding box edge. +// On return the iterator is set to the start of the next run. +void ColPartition::LeftEdgeRun(ColPartition_IT* part_it, + ICOORD* start, ICOORD* end) { + ColPartition* part = part_it->data(); + ColPartition* start_part = part; + int start_y = part->bounding_box_.top(); + if (!part_it->at_first()) { + int prev_bottom = part_it->data_relative(-1)->bounding_box_.bottom(); + if (prev_bottom < start_y) + start_y = prev_bottom; + else if (prev_bottom > start_y) + start_y = (start_y + prev_bottom) / 2; + } + int end_y = part->bounding_box_.bottom(); + int margin_right = INT32_MAX; + int margin_left = -INT32_MAX; + UpdateLeftMargin(*part, &margin_left, &margin_right); + do { + part_it->forward(); + part = part_it->data(); + } while (!part_it->at_first() && + UpdateLeftMargin(*part, &margin_left, &margin_right)); + // The run ended. If we were pushed inwards, compute the next run and + // extend it backwards into the run we just calculated to find the end of + // this run that provides a tight box. + int next_margin_right = INT32_MAX; + int next_margin_left = -INT32_MAX; + UpdateLeftMargin(*part, &next_margin_left, &next_margin_right); + if (next_margin_left > margin_right) { + ColPartition_IT next_it(*part_it); + do { + next_it.forward(); + part = next_it.data(); + } while (!next_it.at_first() && + UpdateLeftMargin(*part, &next_margin_left, &next_margin_right)); + // Now extend the next run backwards into the original run to get the + // tightest fit. + do { + part_it->backward(); + part = part_it->data(); + } while (part != start_part && + UpdateLeftMargin(*part, &next_margin_left, &next_margin_right)); + part_it->forward(); + } + // Now calculate the end_y. + part = part_it->data_relative(-1); + end_y = part->bounding_box_.bottom(); + if (!part_it->at_first() && part_it->data()->bounding_box_.top() < end_y) + end_y = (end_y + part_it->data()->bounding_box_.top()) / 2; + start->set_y(start_y); + start->set_x(part->XAtY(margin_right, start_y)); + end->set_y(end_y); + end->set_x(part->XAtY(margin_right, end_y)); + if (textord_debug_tabfind && !part_it->at_first()) + tprintf("Left run from y=%d to %d terminated with sum %d-%d, new %d-%d\n", + start_y, end_y, part->XAtY(margin_left, end_y), + end->x(), part->left_margin_, part->bounding_box_.left()); +} + +// Helper updates margin_left and margin_right, being the bounds of the right +// margin of part of a block. Returns false and does not update the bounds if +// this partition has a disjoint margin with the established margin. +static bool UpdateRightMargin(const ColPartition& part, + int* margin_left, int* margin_right) { + const TBOX& part_box = part.bounding_box(); + int top = part_box.top(); + int bottom = part_box.bottom(); + int tl_key = part.SortKey(part_box.right(), top); + int tr_key = part.SortKey(part.right_margin(), top); + int bl_key = part.SortKey(part_box.right(), bottom); + int br_key = part.SortKey(part.right_margin(), bottom); + int left_key = std::max(tl_key, bl_key); + int right_key = std::min(tr_key, br_key); + if (left_key <= *margin_right && right_key >= *margin_left) { + // This part is good - let's keep it. + *margin_right = std::min(*margin_right, right_key); + *margin_left = std::max(*margin_left, left_key); + return true; + } + return false; +} + +// Computes and returns in start, end a line segment formed from a +// backwards-iterated group of right edges of partitions that satisfy the +// condition that the intersection of the right margins is non-empty, ie the +// leftmost right margin is to the right of the rightmost right bounding box +// edge. +// On return the iterator is set to the start of the next run. +void ColPartition::RightEdgeRun(ColPartition_IT* part_it, + ICOORD* start, ICOORD* end) { + ColPartition* part = part_it->data(); + ColPartition* start_part = part; + int start_y = part->bounding_box_.bottom(); + if (!part_it->at_last()) { + int next_y = part_it->data_relative(1)->bounding_box_.top(); + if (next_y > start_y) + start_y = next_y; + else if (next_y < start_y) + start_y = (start_y + next_y) / 2; + } + int end_y = part->bounding_box_.top(); + int margin_right = INT32_MAX; + int margin_left = -INT32_MAX; + UpdateRightMargin(*part, &margin_left, &margin_right); + do { + part_it->backward(); + part = part_it->data(); + } while (!part_it->at_last() && + UpdateRightMargin(*part, &margin_left, &margin_right)); + // The run ended. If we were pushed inwards, compute the next run and + // extend it backwards to find the end of this run for a tight box. + int next_margin_right = INT32_MAX; + int next_margin_left = -INT32_MAX; + UpdateRightMargin(*part, &next_margin_left, &next_margin_right); + if (next_margin_right < margin_left) { + ColPartition_IT next_it(*part_it); + do { + next_it.backward(); + part = next_it.data(); + } while (!next_it.at_last() && + UpdateRightMargin(*part, &next_margin_left, + &next_margin_right)); + // Now extend the next run forwards into the original run to get the + // tightest fit. + do { + part_it->forward(); + part = part_it->data(); + } while (part != start_part && + UpdateRightMargin(*part, &next_margin_left, + &next_margin_right)); + part_it->backward(); + } + // Now calculate the end_y. + part = part_it->data_relative(1); + end_y = part->bounding_box().top(); + if (!part_it->at_last() && + part_it->data()->bounding_box_.bottom() > end_y) + end_y = (end_y + part_it->data()->bounding_box_.bottom()) / 2; + start->set_y(start_y); + start->set_x(part->XAtY(margin_left, start_y)); + end->set_y(end_y); + end->set_x(part->XAtY(margin_left, end_y)); + if (textord_debug_tabfind && !part_it->at_last()) + tprintf("Right run from y=%d to %d terminated with sum %d-%d, new %d-%d\n", + start_y, end_y, end->x(), part->XAtY(margin_right, end_y), + part->bounding_box_.right(), part->right_margin_); +} + +} // namespace tesseract. diff --git a/tesseract/src/textord/colpartition.h b/tesseract/src/textord/colpartition.h new file mode 100644 index 00000000..5c299b3e --- /dev/null +++ b/tesseract/src/textord/colpartition.h @@ -0,0 +1,927 @@ +/////////////////////////////////////////////////////////////////////// +// File: colpartition.h +// Description: Class to hold partitions of the page that correspond +// roughly to text lines. +// Author: Ray Smith +// +// (C) Copyright 2008, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_TEXTORD_COLPARTITION_H_ +#define TESSERACT_TEXTORD_COLPARTITION_H_ + +#include "bbgrid.h" +#include "blobbox.h" // For BlobRegionType. +#include "ocrblock.h" +#include "rect.h" // For TBOX. +#include "scrollview.h" +#include "tabfind.h" // For WidthCallback. +#include "tabvector.h" // For BLOBNBOX_CLIST. + +#include <algorithm> + +namespace tesseract { + +// Number of colors in the color1, color2 arrays. +const int kRGBRMSColors = 4; + +class ColPartition; +class ColPartitionSet; +class ColPartitionGrid; +class WorkingPartSet; +class WorkingPartSet_LIST; + +// An enum to indicate how a partition sits on the columns. +// The order of flowing/heading/pullout must be kept consistent with +// PolyBlockType. +enum ColumnSpanningType { + CST_NOISE, // Strictly between columns. + CST_FLOWING, // Strictly within a single column. + CST_HEADING, // Spans multiple columns. + CST_PULLOUT, // Touches multiple columns, but doesn't span them. + CST_COUNT // Number of entries. +}; + +ELIST2IZEH(ColPartition) +CLISTIZEH(ColPartition) + +/** + * ColPartition is a partition of a horizontal slice of the page. + * It starts out as a collection of blobs at a particular y-coord in the grid, + * but ends up (after merging and uniquing) as an approximate text line. + * ColPartitions are also used to hold a partitioning of the page into + * columns, each representing one column. Although a ColPartition applies + * to a given y-coordinate range, eventually, a ColPartitionSet of ColPartitions + * emerges, which represents the columns over a wide y-coordinate range. + */ +class TESS_API ColPartition : public ELIST2_LINK { + public: + // This empty constructor is here only so that the class can be ELISTIZED. + // TODO(rays) change deep_copy in elst.h line 955 to take a callback copier + // and eliminate CLASSNAME##_copier. + ColPartition() = default; + + /** + * @param blob_type is the blob_region_type_ of the blobs in this partition. + * @param vertical is the direction of logical vertical on the possibly skewed image. + */ + ColPartition(BlobRegionType blob_type, const ICOORD& vertical); + /** + * Constructs a fake ColPartition with no BLOBNBOXes to represent a + * horizontal or vertical line, given a type and a bounding box. + */ + static ColPartition* MakeLinePartition(BlobRegionType blob_type, + const ICOORD& vertical, + int left, int bottom, + int right, int top); + + // Constructs and returns a fake ColPartition with a single fake BLOBNBOX, + // all made from a single TBOX. + // WARNING: Despite being on C_LISTs, the BLOBNBOX owns the C_BLOB and + // the ColPartition owns the BLOBNBOX!!! + // Call DeleteBoxes before deleting the ColPartition. + static ColPartition* FakePartition(const TBOX& box, + PolyBlockType block_type, + BlobRegionType blob_type, + BlobTextFlowType flow); + + // Constructs and returns a ColPartition with the given real BLOBNBOX, + // and sets it up to be a "big" partition (single-blob partition bigger + // than the surrounding text that may be a dropcap, two or more vertically + // touching characters, or some graphic element. + // If the given list is not nullptr, the partition is also added to the list. + static ColPartition* MakeBigPartition(BLOBNBOX* box, + ColPartition_LIST* big_part_list); + + ~ColPartition(); + + // Simple accessors. + const TBOX& bounding_box() const { + return bounding_box_; + } + int left_margin() const { + return left_margin_; + } + void set_left_margin(int margin) { + left_margin_ = margin; + } + int right_margin() const { + return right_margin_; + } + void set_right_margin(int margin) { + right_margin_ = margin; + } + int median_top() const { + return median_top_; + } + int median_bottom() const { + return median_bottom_; + } + int median_left() const { + return median_left_; + } + int median_right() const { + return median_right_; + } + int median_height() const { + return median_height_; + } + void set_median_height(int height) { + median_height_ = height; + } + int median_width() const { + return median_width_; + } + void set_median_width(int width) { + median_width_ = width; + } + BlobRegionType blob_type() const { + return blob_type_; + } + void set_blob_type(BlobRegionType t) { + blob_type_ = t; + } + BlobTextFlowType flow() const { + return flow_; + } + void set_flow(BlobTextFlowType f) { + flow_ = f; + } + int good_blob_score() const { + return good_blob_score_; + } + bool good_width() const { + return good_width_; + } + bool good_column() const { + return good_column_; + } + bool left_key_tab() const { + return left_key_tab_; + } + int left_key() const { + return left_key_; + } + bool right_key_tab() const { + return right_key_tab_; + } + int right_key() const { + return right_key_; + } + PolyBlockType type() const { + return type_; + } + void set_type(PolyBlockType t) { + type_ = t; + } + BLOBNBOX_CLIST* boxes() { + return &boxes_; + } + int boxes_count() const { + return boxes_.length(); + } + void set_vertical(const ICOORD& v) { + vertical_ = v; + } + ColPartition_CLIST* upper_partners() { + return &upper_partners_; + } + ColPartition_CLIST* lower_partners() { + return &lower_partners_; + } + void set_working_set(WorkingPartSet* working_set) { + working_set_ = working_set; + } + bool block_owned() const { + return block_owned_; + } + void set_block_owned(bool owned) { + block_owned_ = owned; + } + bool desperately_merged() const { + return desperately_merged_; + } + ColPartitionSet* column_set() const { + return column_set_; + } + void set_side_step(int step) { + side_step_ = step; + } + int bottom_spacing() const { + return bottom_spacing_; + } + void set_bottom_spacing(int spacing) { + bottom_spacing_ = spacing; + } + int top_spacing() const { + return top_spacing_; + } + void set_top_spacing(int spacing) { + top_spacing_ = spacing; + } + + void set_table_type() { + if (type_ != PT_TABLE) { + type_before_table_ = type_; + type_ = PT_TABLE; + } + } + void clear_table_type() { + if (type_ == PT_TABLE) + type_ = type_before_table_; + } + bool inside_table_column() { + return inside_table_column_; + } + void set_inside_table_column(bool val) { + inside_table_column_ = val; + } + ColPartition* nearest_neighbor_above() const { + return nearest_neighbor_above_; + } + void set_nearest_neighbor_above(ColPartition* part) { + nearest_neighbor_above_ = part; + } + ColPartition* nearest_neighbor_below() const { + return nearest_neighbor_below_; + } + void set_nearest_neighbor_below(ColPartition* part) { + nearest_neighbor_below_ = part; + } + int space_above() const { + return space_above_; + } + void set_space_above(int space) { + space_above_ = space; + } + int space_below() const { + return space_below_; + } + void set_space_below(int space) { + space_below_ = space; + } + int space_to_left() const { + return space_to_left_; + } + void set_space_to_left(int space) { + space_to_left_ = space; + } + int space_to_right() const { + return space_to_right_; + } + void set_space_to_right(int space) { + space_to_right_ = space; + } + uint8_t* color1() { + return color1_; + } + uint8_t* color2() { + return color2_; + } + bool owns_blobs() const { + return owns_blobs_; + } + void set_owns_blobs(bool owns_blobs) { + // Do NOT change ownership flag when there are blobs in the list. + // Immediately set the ownership flag when creating copies. + ASSERT_HOST(boxes_.empty()); + owns_blobs_ = owns_blobs; + } + + // Inline quasi-accessors that require some computation. + + // Returns the middle y-coord of the bounding box. + int MidY() const { + return (bounding_box_.top() + bounding_box_.bottom()) / 2; + } + // Returns the middle y-coord of the median top and bottom. + int MedianY() const { + return (median_top_ + median_bottom_) / 2; + } + // Returns the middle x-coord of the bounding box. + int MidX() const { + return (bounding_box_.left() + bounding_box_.right()) / 2; + } + // Returns the sort key at any given x,y. + int SortKey(int x, int y) const { + return TabVector::SortKey(vertical_, x, y); + } + // Returns the x corresponding to the sortkey, y pair. + int XAtY(int sort_key, int y) const { + return TabVector::XAtY(vertical_, sort_key, y); + } + // Returns the x difference between the two sort keys. + int KeyWidth(int left_key, int right_key) const { + return (right_key - left_key) / vertical_.y(); + } + // Returns the column width between the left and right keys. + int ColumnWidth() const { + return KeyWidth(left_key_, right_key_); + } + // Returns the sort key of the box left edge. + int BoxLeftKey() const { + return SortKey(bounding_box_.left(), MidY()); + } + // Returns the sort key of the box right edge. + int BoxRightKey() const { + return SortKey(bounding_box_.right(), MidY()); + } + // Returns the left edge at the given y, using the sort key. + int LeftAtY(int y) const { + return XAtY(left_key_, y); + } + // Returns the right edge at the given y, using the sort key. + int RightAtY(int y) const { + return XAtY(right_key_, y); + } + // Returns true if the right edge of this is to the left of the right + // edge of other. + bool IsLeftOf(const ColPartition& other) const { + return bounding_box_.right() < other.bounding_box_.right(); + } + // Returns true if the partition contains the given x coordinate at the y. + bool ColumnContains(int x, int y) const { + return LeftAtY(y) - 1 <= x && x <= RightAtY(y) + 1; + } + // Returns true if there are no blobs in the list. + bool IsEmpty() const { + return boxes_.empty(); + } + // Returns true if there is a single blob in the list. + bool IsSingleton() const { + return boxes_.singleton(); + } + // Returns true if this and other overlap horizontally by bounding box. + bool HOverlaps(const ColPartition& other) const { + return bounding_box_.x_overlap(other.bounding_box_); + } + // Returns true if this and other's bounding boxes overlap vertically. + // TODO(rays) Make HOverlaps and VOverlaps truly symmetric. + bool VOverlaps(const ColPartition& other) const { + return bounding_box_.y_gap(other.bounding_box_) < 0; + } + // Returns the vertical overlap (by median) of this and other. + // WARNING! Only makes sense on horizontal partitions! + int VCoreOverlap(const ColPartition& other) const { + if (median_bottom_ == INT32_MAX || other.median_bottom_ == INT32_MAX) { + return 0; + } + return std::min(median_top_, other.median_top_) - + std::max(median_bottom_, other.median_bottom_); + } + // Returns the horizontal overlap (by median) of this and other. + // WARNING! Only makes sense on vertical partitions! + int HCoreOverlap(const ColPartition& other) const { + return std::min(median_right_, other.median_right_) - + std::max(median_left_, other.median_left_); + } + // Returns true if this and other overlap significantly vertically. + // WARNING! Only makes sense on horizontal partitions! + bool VSignificantCoreOverlap(const ColPartition& other) const { + if (median_bottom_ == INT32_MAX || other.median_bottom_ == INT32_MAX) { + return false; + } + int overlap = VCoreOverlap(other); + int height = std::min(median_top_ - median_bottom_, + other.median_top_ - other.median_bottom_); + return overlap * 3 > height; + } + // Returns true if this and other can be combined without putting a + // horizontal step in either left or right edge of the resulting block. + bool WithinSameMargins(const ColPartition& other) const { + return left_margin_ <= other.bounding_box_.left() && + bounding_box_.left() >= other.left_margin_ && + bounding_box_.right() <= other.right_margin_ && + right_margin_ >= other.bounding_box_.right(); + } + // Returns true if the region types (aligned_text_) match. + // Lines never match anything, as they should never be merged or chained. + bool TypesMatch(const ColPartition& other) const { + return TypesMatch(blob_type_, other.blob_type_); + } + static bool TypesMatch(BlobRegionType type1, BlobRegionType type2) { + return (type1 == type2 || type1 == BRT_UNKNOWN || type2 == BRT_UNKNOWN) && + !BLOBNBOX::IsLineType(type1) && !BLOBNBOX::IsLineType(type2); + } + + // Returns true if the types are similar to each other. + static bool TypesSimilar(PolyBlockType type1, PolyBlockType type2) { + return (type1 == type2 || + (type1 == PT_FLOWING_TEXT && type2 == PT_INLINE_EQUATION) || + (type2 == PT_FLOWING_TEXT && type1 == PT_INLINE_EQUATION)); + } + + // Returns true if partitions is of horizontal line type + bool IsLineType() const { + return PTIsLineType(type_); + } + // Returns true if partitions is of image type + bool IsImageType() const { + return PTIsImageType(type_); + } + // Returns true if partitions is of text type + bool IsTextType() const { + return PTIsTextType(type_); + } + // Returns true if partitions is of pullout(inter-column) type + bool IsPulloutType() const { + return PTIsPulloutType(type_); + } + // Returns true if the partition is of an exclusively vertical type. + bool IsVerticalType() const { + return blob_type_ == BRT_VERT_TEXT || blob_type_ == BRT_VLINE; + } + // Returns true if the partition is of a definite horizontal type. + bool IsHorizontalType() const { + return blob_type_ == BRT_TEXT || blob_type_ == BRT_HLINE; + } + // Returns true is the partition is of a type that cannot be merged. + bool IsUnMergeableType() const { + return BLOBNBOX::UnMergeableType(blob_type_) || type_ == PT_NOISE; + } + // Returns true if this partition is a vertical line + // TODO(nbeato): Use PartitionType enum when Ray's code is submitted. + bool IsVerticalLine() const { + return IsVerticalType() && IsLineType(); + } + // Returns true if this partition is a horizontal line + // TODO(nbeato): Use PartitionType enum when Ray's code is submitted. + bool IsHorizontalLine() const { + return IsHorizontalType() && IsLineType(); + } + + // Adds the given box to the partition, updating the partition bounds. + // The list of boxes in the partition is updated, ensuring that no box is + // recorded twice, and the boxes are kept in increasing left position. + void AddBox(BLOBNBOX* box); + + // Removes the given box from the partition, updating the bounds. + void RemoveBox(BLOBNBOX* box); + + // Returns the tallest box in the partition, as measured perpendicular to the + // presumed flow of text. + BLOBNBOX* BiggestBox(); + + // Returns the bounding box excluding the given box. + TBOX BoundsWithoutBox(BLOBNBOX* box); + + // Claims the boxes in the boxes_list by marking them with a this owner + // pointer. + void ClaimBoxes(); + + // nullptr the owner of the blobs in this partition, so they can be deleted + // independently of the ColPartition. + void DisownBoxes(); + // nullptr the owner of the blobs in this partition that are owned by this + // partition, so they can be deleted independently of the ColPartition. + // Any blobs that are not owned by this partition get to keep their owner + // without an assert failure. + void DisownBoxesNoAssert(); + // Nulls the owner of the blobs in this partition that are owned by this + // partition and not leader blobs, removing them from the boxes_ list, thus + // turning this partition back to a leader partition if it contains a leader, + // or otherwise leaving it empty. Returns true if any boxes remain. + bool ReleaseNonLeaderBoxes(); + + // Delete the boxes that this partition owns. + void DeleteBoxes(); + + // Reflects the partition in the y-axis, assuming that its blobs have + // already been done. Corrects only a limited part of the members, since + // this function is assumed to be used shortly after initial creation, which + // is before a lot of the members are used. + void ReflectInYAxis(); + + // Returns true if this is a legal partition - meaning that the conditions + // left_margin <= bounding_box left + // left_key <= bounding box left key + // bounding box left <= bounding box right + // and likewise for right margin and key + // are all met. + bool IsLegal(); + + // Returns true if the left and right edges are approximately equal. + bool MatchingColumns(const ColPartition& other) const; + + // Returns true if the colors match for two text partitions. + bool MatchingTextColor(const ColPartition& other) const; + + // Returns true if the sizes match for two text partitions, + // taking orientation into account + bool MatchingSizes(const ColPartition& other) const; + + // Returns true if there is no tabstop violation in merging this and other. + bool ConfirmNoTabViolation(const ColPartition& other) const; + + // Returns true if other has a similar stroke width to this. + bool MatchingStrokeWidth(const ColPartition& other, + double fractional_tolerance, + double constant_tolerance) const; + // Returns true if candidate is an acceptable diacritic base char merge + // with this as the diacritic. + bool OKDiacriticMerge(const ColPartition& candidate, bool debug) const; + + // Sets the sort key using either the tab vector, or the bounding box if + // the tab vector is nullptr. If the tab_vector lies inside the bounding_box, + // use the edge of the box as a key any way. + void SetLeftTab(const TabVector* tab_vector); + void SetRightTab(const TabVector* tab_vector); + + // Copies the left/right tab from the src partition, but if take_box is + // true, copies the box instead and uses that as a key. + void CopyLeftTab(const ColPartition& src, bool take_box); + void CopyRightTab(const ColPartition& src, bool take_box); + + // Returns the left rule line x coord of the leftmost blob. + int LeftBlobRule() const; + // Returns the right rule line x coord of the rightmost blob. + int RightBlobRule() const; + + // Returns the density value for a particular BlobSpecialTextType. + float SpecialBlobsDensity(const BlobSpecialTextType type) const; + // Returns the number of blobs for a particular BlobSpecialTextType. + int SpecialBlobsCount(const BlobSpecialTextType type); + // Set the density value for a particular BlobSpecialTextType, should ONLY be + // used for debugging or testing. In production code, use + // ComputeSpecialBlobsDensity instead. + void SetSpecialBlobsDensity( + const BlobSpecialTextType type, const float density); + // Compute the SpecialTextType density of blobs, where we assume + // that the SpecialTextType in the boxes_ has been set. + void ComputeSpecialBlobsDensity(); + + // Add a partner above if upper, otherwise below. + // Add them uniquely and keep the list sorted by box left. + // Partnerships are added symmetrically to partner and this. + void AddPartner(bool upper, ColPartition* partner); + // Removes the partner from this, but does not remove this from partner. + // This asymmetric removal is so as not to mess up the iterator that is + // working on partner's partner list. + void RemovePartner(bool upper, ColPartition* partner); + // Returns the partner if the given partner is a singleton, otherwise nullptr. + ColPartition* SingletonPartner(bool upper); + + // Merge with the other partition and delete it. + void Absorb(ColPartition* other, WidthCallback cb); + + // Returns true if the overlap between this and the merged pair of + // merge candidates is sufficiently trivial to be allowed. + // The merged box can graze the edge of this by the ok_box_overlap + // if that exceeds the margin to the median top and bottom. + bool OKMergeOverlap(const ColPartition& merge1, const ColPartition& merge2, + int ok_box_overlap, bool debug); + + // Find the blob at which to split this to minimize the overlap with the + // given box. Returns the first blob to go in the second partition. + BLOBNBOX* OverlapSplitBlob(const TBOX& box); + + // Split this partition keeping the first half in this and returning + // the second half. + // Splits by putting the split_blob and the blobs that follow + // in the second half, and the rest in the first half. + ColPartition* SplitAtBlob(BLOBNBOX* split_blob); + + // Splits this partition at the given x coordinate, returning the right + // half and keeping the left half in this. + ColPartition* SplitAt(int split_x); + + // Recalculates all the coordinate limits of the partition. + void ComputeLimits(); + + // Returns the number of boxes that overlap the given box. + int CountOverlappingBoxes(const TBOX& box); + + // Computes and sets the type_, first_column_, last_column_ and column_set_. + // resolution refers to the ppi resolution of the image. + void SetPartitionType(int resolution, ColPartitionSet* columns); + + // Returns the PartitionType from the current BlobRegionType and a column + // flow spanning type ColumnSpanningType, generated by + // ColPartitionSet::SpanningType, that indicates how the partition sits + // in the columns. + PolyBlockType PartitionType(ColumnSpanningType flow) const; + + // Returns the first and last column touched by this partition. + // resolution refers to the ppi resolution of the image. + void ColumnRange(int resolution, ColPartitionSet* columns, + int* first_col, int* last_col); + + // Sets the internal flags good_width_ and good_column_. + void SetColumnGoodness(WidthCallback cb); + + // Determines whether the blobs in this partition mostly represent + // a leader (fixed pitch sequence) and sets the member blobs accordingly. + // Note that height is assumed to have been tested elsewhere, and that this + // function will find most fixed-pitch text as leader without a height filter. + // Leader detection is limited to sequences of identical width objects, + // such as .... or ----, so patterns, such as .-.-.-.-. will not be found. + bool MarkAsLeaderIfMonospaced(); + // Given the result of TextlineProjection::EvaluateColPartition, (positive for + // horizontal text, negative for vertical text, and near zero for non-text), + // sets the blob_type_ and flow_ for this partition to indicate whether it + // is strongly or weakly vertical or horizontal text, or non-text. + void SetRegionAndFlowTypesFromProjectionValue(int value); + + // Sets all blobs with the partition blob type and flow, but never overwrite + // leader blobs, as we need to be able to identify them later. + void SetBlobTypes(); + + // Returns true if a decent baseline can be fitted through the blobs. + // Works for both horizontal and vertical text. + bool HasGoodBaseline(); + + // Adds this ColPartition to a matching WorkingPartSet if one can be found, + // otherwise starts a new one in the appropriate column, ending the previous. + void AddToWorkingSet(const ICOORD& bleft, const ICOORD& tright, + int resolution, ColPartition_LIST* used_parts, + WorkingPartSet_LIST* working_set); + + // From the given block_parts list, builds one or more BLOCKs and + // corresponding TO_BLOCKs, such that the line spacing is uniform in each. + // Created blocks are appended to the end of completed_blocks and to_blocks. + // The used partitions are put onto used_parts, as they may still be referred + // to in the partition grid. bleft, tright and resolution are the bounds + // and resolution of the original image. + static void LineSpacingBlocks(const ICOORD& bleft, const ICOORD& tright, + int resolution, + ColPartition_LIST* block_parts, + ColPartition_LIST* used_parts, + BLOCK_LIST* completed_blocks, + TO_BLOCK_LIST* to_blocks); + // Constructs a block from the given list of partitions. + // Arguments are as LineSpacingBlocks above. + static TO_BLOCK* MakeBlock(const ICOORD& bleft, const ICOORD& tright, + ColPartition_LIST* block_parts, + ColPartition_LIST* used_parts); + + // Constructs a block from the given list of vertical text partitions. + // Currently only creates rectangular blocks. + static TO_BLOCK* MakeVerticalTextBlock(const ICOORD& bleft, + const ICOORD& tright, + ColPartition_LIST* block_parts, + ColPartition_LIST* used_parts); + + // Makes a TO_ROW matching this and moves all the blobs to it, transferring + // ownership to to returned TO_ROW. + TO_ROW* MakeToRow(); + + + // Returns a copy of everything except the list of boxes. The resulting + // ColPartition is only suitable for keeping in a column candidate list. + ColPartition* ShallowCopy() const; + // Returns a copy of everything with a shallow copy of the blobs. + // The blobs are still owned by their original parent, so they are + // treated as read-only. + ColPartition* CopyButDontOwnBlobs(); + + #ifndef GRAPHICS_DISABLED + // Provides a color for BBGrid to draw the rectangle. + ScrollView::Color BoxColor() const; + #endif // !GRAPHICS_DISABLED + + // Prints debug information on this. + void Print() const; + // Prints debug information on the colors. + void PrintColors(); + + // Sets the types of all partitions in the run to be the max of the types. + void SmoothPartnerRun(int working_set_count); + + // Cleans up the partners of the given type so that there is at most + // one partner. This makes block creation simpler. + // If get_desperate is true, goes to more desperate merge methods + // to merge flowing text before breaking partnerships. + void RefinePartners(PolyBlockType type, bool get_desperate, + ColPartitionGrid* grid); + + // Returns true if this column partition is in the same column as + // part. This function will only work after the SetPartitionType function + // has been called on both column partitions. This is useful for + // doing a SideSearch when you want things in the same page column. + bool IsInSameColumnAs(const ColPartition& part) const; + + // Sort function to sort by bounding box. + static int SortByBBox(const void* p1, const void* p2) { + const ColPartition* part1 = *static_cast<const ColPartition* const*>(p1); + const ColPartition* part2 = *static_cast<const ColPartition* const*>(p2); + int mid_y1 = part1->bounding_box_.y_middle(); + int mid_y2 = part2->bounding_box_.y_middle(); + if ((part2->bounding_box_.bottom() <= mid_y1 && + mid_y1 <= part2->bounding_box_.top()) || + (part1->bounding_box_.bottom() <= mid_y2 && + mid_y2 <= part1->bounding_box_.top())) { + // Sort by increasing x. + return part1->bounding_box_.x_middle() - part2->bounding_box_.x_middle(); + } + // Sort by decreasing y. + return mid_y2 - mid_y1; + } + + // Sets the column bounds. Primarily used in testing. + void set_first_column(int column) { + first_column_ = column; + } + void set_last_column(int column) { + last_column_ = column; + } + + private: + // Cleans up the partners above if upper is true, else below. + // If get_desperate is true, goes to more desperate merge methods + // to merge flowing text before breaking partnerships. + void RefinePartnersInternal(bool upper, bool get_desperate, + ColPartitionGrid* grid); + // Restricts the partners to only desirable types. For text and BRT_HLINE this + // means the same type_ , and for image types it means any image type. + void RefinePartnersByType(bool upper, ColPartition_CLIST* partners); + // Remove transitive partnerships: this<->a, and a<->b and this<->b. + // Gets rid of this<->b, leaving a clean chain. + // Also if we have this<->a and a<->this, then gets rid of this<->a, as + // this has multiple partners. + void RefinePartnerShortcuts(bool upper, ColPartition_CLIST* partners); + // If multiple text partners can be merged, then do so. + // If desperate is true, then an increase in overlap with the merge is + // allowed. If the overlap increases, then the desperately_merged_ flag + // is set, indicating that the textlines probably need to be regenerated + // by aggressive line fitting/splitting, as there are probably vertically + // joined blobs that cross textlines. + void RefineTextPartnersByMerge(bool upper, bool desperate, + ColPartition_CLIST* partners, + ColPartitionGrid* grid); + // Keep the partner with the biggest overlap. + void RefinePartnersByOverlap(bool upper, ColPartition_CLIST* partners); + + // Return true if bbox belongs better in this than other. + bool ThisPartitionBetter(BLOBNBOX* bbox, const ColPartition& other); + + // Smoothes the spacings in the list into groups of equal linespacing. + // resolution is the resolution of the original image, used as a basis + // for thresholds in change of spacing. page_height is in pixels. + static void SmoothSpacings(int resolution, int page_height, + ColPartition_LIST* parts); + + // Returns true if the parts array of pointers to partitions matches the + // condition for a spacing blip. See SmoothSpacings for what this means + // and how it is used. + static bool OKSpacingBlip(int resolution, int median_spacing, + ColPartition** parts, int offset); + + // Returns true if both the top and bottom spacings of this match the given + // spacing to within suitable margins dictated by the image resolution. + bool SpacingEqual(int spacing, int resolution) const; + + // Returns true if both the top and bottom spacings of this and other + // match to within suitable margins dictated by the image resolution. + bool SpacingsEqual(const ColPartition& other, int resolution) const; + + // Returns true if the sum spacing of this and other match the given + // spacing (or twice the given spacing) to within a suitable margin dictated + // by the image resolution. + bool SummedSpacingOK(const ColPartition& other, + int spacing, int resolution) const; + + // Returns a suitable spacing margin that can be applied to bottoms of + // text lines, based on the resolution and the stored side_step_. + int BottomSpacingMargin(int resolution) const; + + // Returns a suitable spacing margin that can be applied to tops of + // text lines, based on the resolution and the stored side_step_. + int TopSpacingMargin(int resolution) const; + + // Returns true if the median text sizes of this and other agree to within + // a reasonable multiplicative factor. + bool SizesSimilar(const ColPartition& other) const; + + // Computes and returns in start, end a line segment formed from a + // forwards-iterated group of left edges of partitions that satisfy the + // condition that the rightmost left margin is to the left of the + // leftmost left bounding box edge. + // TODO(rays) Not good enough. Needs improving to tightly wrap text in both + // directions, and to loosely wrap images. + static void LeftEdgeRun(ColPartition_IT* part_it, + ICOORD* start, ICOORD* end); + // Computes and returns in start, end a line segment formed from a + // backwards-iterated group of right edges of partitions that satisfy the + // condition that the leftmost right margin is to the right of the + // rightmost right bounding box edge. + // TODO(rays) Not good enough. Needs improving to tightly wrap text in both + // directions, and to loosely wrap images. + static void RightEdgeRun(ColPartition_IT* part_it, + ICOORD* start, ICOORD* end); + + // The margins are determined by the position of the nearest vertically + // overlapping neighbour to the side. They indicate the maximum extent + // that the block/column may be extended without touching something else. + // Leftmost coordinate that the region may occupy over the y limits. + int left_margin_ = 0; + // Rightmost coordinate that the region may occupy over the y limits. + int right_margin_ = 0; + // Bounding box of all blobs in the partition. + TBOX bounding_box_; + // Median top and bottom of blobs in this partition. + int median_bottom_ = 0; + int median_top_ = 0; + // Median height of blobs in this partition. + int median_height_ = 0; + // Median left and right of blobs in this partition. + int median_left_ = 0; + int median_right_ = 0; + // Median width of blobs in this partition. + int median_width_ = 0; + // blob_region_type_ for the blobs in this partition. + BlobRegionType blob_type_ = BRT_UNKNOWN; + BlobTextFlowType flow_ = BTFT_NONE; // Quality of text flow. + // Total of GoodTextBlob results for all blobs in the partition. + int good_blob_score_ = 0; + // True if this partition has a common width. + bool good_width_ = false; + // True if this is a good column candidate. + bool good_column_ = false; + // True if the left_key_ is from a tab vector. + bool left_key_tab_ = false; + // True if the right_key_ is from a tab vector. + bool right_key_tab_ = false; + // Left and right sort keys for the edges of the partition. + // If the respective *_key_tab_ is true then this key came from a tab vector. + // If not, then the class promises to keep the key equal to the sort key + // for the respective edge of the bounding box at the MidY, so that + // LeftAtY and RightAtY always returns an x coordinate on the line parallel + // to vertical_ through the bounding box edge at MidY. + int left_key_ = 0; + int right_key_ = 0; + // Type of this partition after looking at its relation to the columns. + PolyBlockType type_ = PT_UNKNOWN; + // The global vertical skew direction. + ICOORD vertical_; + // All boxes in the partition stored in increasing left edge coordinate. + BLOBNBOX_CLIST boxes_; + // The partitions above that matched this. + ColPartition_CLIST upper_partners_; + // The partitions below that matched this. + ColPartition_CLIST lower_partners_; + // The WorkingPartSet it lives in while blocks are being made. + WorkingPartSet* working_set_ = nullptr; + // Column_set_ is the column layout applicable to this ColPartition. + ColPartitionSet* column_set_ = nullptr; + // Flag is true when AddBox is sorting vertically, false otherwise. + bool last_add_was_vertical_ = false; + // True when the partition's ownership has been taken from the grid and + // placed in a working set, or, after that, in the good_parts_ list. + bool block_owned_ = false; + // Flag to indicate that this partition was subjected to a desperate merge, + // and therefore the textlines need rebuilding. + bool desperately_merged_ = false; + bool owns_blobs_ = true; // Does the partition own its blobs? + // The first and last column that this partition applies to. + // Flowing partitions (see type_) will have an equal first and last value + // of the form 2n + 1, where n is the zero-based index into the partitions + // in column_set_. (See ColPartitionSet::GetColumnByIndex). + // Heading partitions will have unequal values of the same form. + // Pullout partitions will have equal values, but may have even values, + // indicating placement between columns. + int first_column_ = -1; + int last_column_ = -1; + // Linespacing data. + int side_step_ = 0; // Median y-shift to next blob on same line. + int top_spacing_ = 0; // Line spacing from median_top_. + int bottom_spacing_ = 0; // Line spacing from median_bottom_. + + // Nearest neighbor above with major x-overlap + ColPartition* nearest_neighbor_above_ = nullptr; + // Nearest neighbor below with major x-overlap + ColPartition* nearest_neighbor_below_ = nullptr; + int space_above_ = 0; // Distance from nearest_neighbor_above + int space_below_ = 0; // Distance from nearest_neighbor_below + int space_to_left_ = 0; // Distance from the left edge of the column + int space_to_right_ = 0; // Distance from the right edge of the column + // Color foreground/background data. + uint8_t color1_[kRGBRMSColors]; + uint8_t color2_[kRGBRMSColors]; + // The density of special blobs. + float special_blobs_densities_[BSTT_COUNT]; + // Type of this partition before considering it as a table cell. This is + // used to revert the type if a partition is first marked as a table cell but + // later filtering steps decide it does not belong to a table + PolyBlockType type_before_table_ = PT_UNKNOWN; + // Check whether the current partition has been assigned to a table column. + bool inside_table_column_ = false; +}; + +// Typedef it now in case it becomes a class later. +using ColPartitionGridSearch = GridSearch<ColPartition, + ColPartition_CLIST, + ColPartition_C_IT> ; + +} // namespace tesseract. + +#endif // TESSERACT_TEXTORD_COLPARTITION_H_ diff --git a/tesseract/src/textord/colpartitiongrid.cpp b/tesseract/src/textord/colpartitiongrid.cpp new file mode 100644 index 00000000..fcf9b000 --- /dev/null +++ b/tesseract/src/textord/colpartitiongrid.cpp @@ -0,0 +1,1743 @@ +/////////////////////////////////////////////////////////////////////// +// File: colpartitiongrid.cpp +// Description: Class collecting code that acts on a BBGrid of ColPartitions. +// Author: Ray Smith +// +// (C) Copyright 2009, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#include "colpartitiongrid.h" +#include "colpartitionset.h" +#include "imagefind.h" + +#include <algorithm> + +namespace tesseract { + +// Max pad factor used to search the neighbourhood of a partition to smooth +// partition types. +const int kMaxPadFactor = 6; +// Max multiple of size (min(height, width)) for the distance of the nearest +// neighbour for the change of type to be used. +const int kMaxNeighbourDistFactor = 4; +// Maximum number of lines in a credible figure caption. +const int kMaxCaptionLines = 7; +// Min ratio between biggest and smallest gap to bound a caption. +const double kMinCaptionGapRatio = 2.0; +// Min ratio between biggest gap and mean line height to bound a caption. +const double kMinCaptionGapHeightRatio = 0.5; +// Min fraction of ColPartition height to be overlapping for margin purposes. +const double kMarginOverlapFraction = 0.25; +// Size ratio required to consider an unmerged overlapping partition to be big. +const double kBigPartSizeRatio = 1.75; +// Fraction of gridsize to allow arbitrary overlap between partitions. +const double kTinyEnoughTextlineOverlapFraction = 0.25; +// Max vertical distance of neighbouring ColPartition as a multiple of +// partition height for it to be a partner. +// TODO(rays) fix the problem that causes a larger number to not work well. +// The value needs to be larger as sparse text blocks in a page that gets +// marked as single column will not find adjacent lines as partners, and +// will merge horizontally distant, but aligned lines. See rep.4B3 p5. +// The value needs to be small because double-spaced legal docs written +// in a single column, but justified courier have widely spaced lines +// that need to get merged before they partner-up with the lines above +// and below. See legal.3B5 p13/17. Neither of these should depend on +// the value of kMaxPartitionSpacing to be successful, and ColPartition +// merging needs attention to fix this problem. +const double kMaxPartitionSpacing = 1.75; +// Margin by which text has to beat image or vice-versa to make a firm +// decision in GridSmoothNeighbour. +const int kSmoothDecisionMargin = 4; + +ColPartitionGrid::ColPartitionGrid(int gridsize, + const ICOORD& bleft, const ICOORD& tright) + : BBGrid<ColPartition, ColPartition_CLIST, ColPartition_C_IT>(gridsize, + bleft, tright) { +} + +// Handles a click event in a display window. +void ColPartitionGrid::HandleClick(int x, int y) { + BBGrid<ColPartition, + ColPartition_CLIST, ColPartition_C_IT>::HandleClick(x, y); + // Run a radial search for partitions that overlap. + ColPartitionGridSearch radsearch(this); + radsearch.SetUniqueMode(true); + radsearch.StartRadSearch(x, y, 1); + ColPartition* neighbour; + FCOORD click(x, y); + while ((neighbour = radsearch.NextRadSearch()) != nullptr) { + const TBOX& nbox = neighbour->bounding_box(); + if (nbox.contains(click)) { + tprintf("Block box:"); + neighbour->bounding_box().print(); + neighbour->Print(); + } + } +} + +// Merges ColPartitions in the grid that look like they belong in the same +// textline. +// For all partitions in the grid, calls the box_cb permanent callback +// to compute the search box, searches the box, and if a candidate is found, +// calls the confirm_cb to check any more rules. If the confirm_cb returns +// true, then the partitions are merged. +// Both callbacks are deleted before returning. +void ColPartitionGrid::Merges( + std::function<bool(ColPartition*, TBOX*)> box_cb, + std::function<bool(const ColPartition*, const ColPartition*)> confirm_cb) { + // Iterate the ColPartitions in the grid. + ColPartitionGridSearch gsearch(this); + gsearch.StartFullSearch(); + ColPartition* part; + while ((part = gsearch.NextFullSearch()) != nullptr) { + if (MergePart(box_cb, confirm_cb, part)) + gsearch.RepositionIterator(); + } +} + +// For the given partition, calls the box_cb permanent callback +// to compute the search box, searches the box, and if a candidate is found, +// calls the confirm_cb to check any more rules. If the confirm_cb returns +// true, then the partitions are merged. +// Returns true if the partition is consumed by one or more merges. +bool ColPartitionGrid::MergePart( + std::function<bool(ColPartition*, TBOX*)> box_cb, + std::function<bool(const ColPartition*, const ColPartition*)> confirm_cb, + ColPartition* part) { + if (part->IsUnMergeableType()) + return false; + bool any_done = false; + // Repeatedly merge part while we find a best merge candidate that works. + bool merge_done = false; + do { + merge_done = false; + TBOX box = part->bounding_box(); + bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom()); + if (debug) { + tprintf("Merge candidate:"); + box.print(); + } + // Set up a rectangle search bounded by the part. + if (!box_cb(part, &box)) + continue; + // Create a list of merge candidates. + ColPartition_CLIST merge_candidates; + FindMergeCandidates(part, box, debug, &merge_candidates); + // Find the best merge candidate based on minimal overlap increase. + int overlap_increase; + ColPartition* neighbour = BestMergeCandidate(part, &merge_candidates, debug, + confirm_cb, + &overlap_increase); + if (neighbour != nullptr && overlap_increase <= 0) { + if (debug) { + tprintf("Merging:hoverlap=%d, voverlap=%d, OLI=%d\n", + part->HCoreOverlap(*neighbour), part->VCoreOverlap(*neighbour), + overlap_increase); + } + // Looks like a good candidate so merge it. + RemoveBBox(neighbour); + // We will modify the box of part, so remove it from the grid, merge + // it and then re-insert it into the grid. + RemoveBBox(part); + part->Absorb(neighbour, nullptr); + InsertBBox(true, true, part); + merge_done = true; + any_done = true; + } else if (neighbour != nullptr) { + if (debug) { + tprintf("Overlapped when merged with increase %d: ", overlap_increase); + neighbour->bounding_box().print(); + } + } else if (debug) { + tprintf("No candidate neighbour returned\n"); + } + } while (merge_done); + return any_done; +} + +// Returns true if the given part and merge candidate might believably +// be part of a single text line according to the default rules. +// In general we only want to merge partitions that look like they +// are on the same text line, ie their median limits overlap, but we have +// to make exceptions for diacritics and stray punctuation. +static bool OKMergeCandidate(const ColPartition* part, + const ColPartition* candidate, + bool debug) { + const TBOX& part_box = part->bounding_box(); + if (candidate == part) + return false; // Ignore itself. + if (!part->TypesMatch(*candidate) || candidate->IsUnMergeableType()) + return false; // Don't mix inappropriate types. + + const TBOX& c_box = candidate->bounding_box(); + if (debug) { + tprintf("Examining merge candidate:"); + c_box.print(); + } + // Candidates must be within a reasonable distance. + if (candidate->IsVerticalType() || part->IsVerticalType()) { + int h_dist = -part->HCoreOverlap(*candidate); + if (h_dist >= std::max(part_box.width(), c_box.width()) / 2) { + if (debug) + tprintf("Too far away: h_dist = %d\n", h_dist); + return false; + } + } else { + // Coarse filter by vertical distance between partitions. + int v_dist = -part->VCoreOverlap(*candidate); + if (v_dist >= std::max(part_box.height(), c_box.height()) / 2) { + if (debug) + tprintf("Too far away: v_dist = %d\n", v_dist); + return false; + } + // Candidates must either overlap in median y, + // or part or candidate must be an acceptable diacritic. + if (!part->VSignificantCoreOverlap(*candidate) && + !part->OKDiacriticMerge(*candidate, debug) && + !candidate->OKDiacriticMerge(*part, debug)) { + if (debug) + tprintf("Candidate fails overlap and diacritic tests!\n"); + return false; + } + } + return true; +} + +// Helper function to compute the increase in overlap of the parts list of +// Colpartitions with the combination of merge1 and merge2, compared to +// the overlap with them uncombined. +// An overlap is not counted if passes the OKMergeOverlap test with ok_overlap +// as the pixel overlap limit. merge1 and merge2 must both be non-nullptr. +static int IncreaseInOverlap(const ColPartition* merge1, + const ColPartition* merge2, + int ok_overlap, + ColPartition_CLIST* parts) { + ASSERT_HOST(merge1 != nullptr && merge2 != nullptr); + int total_area = 0; + ColPartition_C_IT it(parts); + TBOX merged_box(merge1->bounding_box()); + merged_box += merge2->bounding_box(); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + ColPartition* part = it.data(); + if (part == merge1 || part == merge2) + continue; + TBOX part_box = part->bounding_box(); + // Compute the overlap of the merged box with part. + int overlap_area = part_box.intersection(merged_box).area(); + if (overlap_area > 0 && !part->OKMergeOverlap(*merge1, *merge2, + ok_overlap, false)) { + total_area += overlap_area; + // Subtract the overlap of merge1 and merge2 individually. + overlap_area = part_box.intersection(merge1->bounding_box()).area(); + if (overlap_area > 0) + total_area -= overlap_area; + TBOX intersection_box = part_box.intersection(merge2->bounding_box()); + overlap_area = intersection_box.area(); + if (overlap_area > 0) { + total_area -= overlap_area; + // Add back the 3-way area. + intersection_box &= merge1->bounding_box(); // In-place intersection. + overlap_area = intersection_box.area(); + if (overlap_area > 0) + total_area += overlap_area; + } + } + } + return total_area; +} + +// Helper function to test that each partition in candidates is either a +// good diacritic merge with part or an OK merge candidate with all others +// in the candidates list. +// ASCII Art Scenario: +// We sometimes get text such as "join-this" where the - is actually a long +// dash culled from a standard set of extra characters that don't match the +// font of the text. This makes its strokewidth not match and forms a broken +// set of 3 partitions for "join", "-" and "this" and the dash may slightly +// overlap BOTH words. +// ------- ------- +// | ==== | +// ------- ------- +// The standard merge rule: "you can merge 2 partitions as long as there is +// no increase in overlap elsewhere" fails miserably here. Merge any pair +// of partitions and the combined box overlaps more with the third than +// before. To allow the merge, we need to consider whether it is safe to +// merge everything, without merging separate text lines. For that we need +// everything to be an OKMergeCandidate (which is supposed to prevent +// separate text lines merging), but this is hard for diacritics to satisfy, +// so an alternative to being OKMergeCandidate with everything is to be an +// OKDiacriticMerge with part as the base character. +static bool TestCompatibleCandidates(const ColPartition& part, bool debug, + ColPartition_CLIST* candidates) { + ColPartition_C_IT it(candidates); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + ColPartition* candidate = it.data(); + if (!candidate->OKDiacriticMerge(part, false)) { + ColPartition_C_IT it2(it); + for (it2.mark_cycle_pt(); !it2.cycled_list(); it2.forward()) { + ColPartition* candidate2 = it2.data(); + if (candidate2 != candidate && + !OKMergeCandidate(candidate, candidate2, false)) { + if (debug) { + tprintf("NC overlap failed:Candidate:"); + candidate2->bounding_box().print(); + tprintf("fails to be a good merge with:"); + candidate->bounding_box().print(); + } + return false; + } + } + } + } + return true; +} + +// Computes and returns the total overlap of all partitions in the grid. +// If overlap_grid is non-null, it is filled with a grid that holds empty +// partitions representing the union of all overlapped partitions. +int ColPartitionGrid::ComputeTotalOverlap(ColPartitionGrid** overlap_grid) { + int total_overlap = 0; + // Iterate the ColPartitions in the grid. + ColPartitionGridSearch gsearch(this); + gsearch.StartFullSearch(); + ColPartition* part; + while ((part = gsearch.NextFullSearch()) != nullptr) { + ColPartition_CLIST neighbors; + const TBOX& part_box = part->bounding_box(); + FindOverlappingPartitions(part_box, part, &neighbors); + ColPartition_C_IT n_it(&neighbors); + bool any_part_overlap = false; + for (n_it.mark_cycle_pt(); !n_it.cycled_list(); n_it.forward()) { + const TBOX& n_box = n_it.data()->bounding_box(); + int overlap = n_box.intersection(part_box).area(); + if (overlap > 0 && overlap_grid != nullptr) { + if (*overlap_grid == nullptr) { + *overlap_grid = new ColPartitionGrid(gridsize(), bleft(), tright()); + } + (*overlap_grid)->InsertBBox(true, true, n_it.data()->ShallowCopy()); + if (!any_part_overlap) { + (*overlap_grid)->InsertBBox(true, true, part->ShallowCopy()); + } + } + any_part_overlap = true; + total_overlap += overlap; + } + } + return total_overlap; +} + +// Finds all the ColPartitions in the grid that overlap with the given +// box and returns them SortByBoxLeft(ed) and uniqued in the given list. +// Any partition equal to not_this (may be nullptr) is excluded. +void ColPartitionGrid::FindOverlappingPartitions(const TBOX& box, + const ColPartition* not_this, + ColPartition_CLIST* parts) { + ColPartitionGridSearch rsearch(this); + rsearch.StartRectSearch(box); + ColPartition* part; + while ((part = rsearch.NextRectSearch()) != nullptr) { + if (part != not_this) + parts->add_sorted(SortByBoxLeft<ColPartition>, true, part); + } +} + +// Finds and returns the best candidate ColPartition to merge with part, +// selected from the candidates list, based on the minimum increase in +// pairwise overlap among all the partitions overlapped by the combined box. +// If overlap_increase is not nullptr then it returns the increase in overlap +// that would result from the merge. +// confirm_cb is a permanent callback that (if non-null) will be used to +// confirm the validity of a proposed merge candidate before selecting it. +// +// ======HOW MERGING WORKS====== +// The problem: +// We want to merge all the parts of a textline together, but avoid merging +// separate textlines. Diacritics, i dots, punctuation, and broken characters +// are examples of small bits that need merging with the main textline. +// Drop-caps and descenders in one line that touch ascenders in the one below +// are examples of cases where we don't want to merge. +// +// The solution: +// Merges that increase overlap among other partitions are generally bad. +// Those that don't increase overlap (much) and minimize the total area +// seem to be good. +// +// Ascii art example: +// The text: +// groggy descenders +// minimum ascenders +// The boxes: The === represents a small box near or overlapping the lower box. +// ----------------- +// | | +// ----------------- +// -===------------- +// | | +// ----------------- +// In considering what to do with the small === box, we find the 2 larger +// boxes as neighbours and possible merge candidates, but merging with the +// upper box increases overlap with the lower box, whereas merging with the +// lower box does not increase overlap. +// If the small === box didn't overlap either to start with, total area +// would be minimized by merging with the nearer (lower) box. +// +// This is a simple example. In reality, we have to allow some increase +// in overlap, or tightly spaced text would end up in bits. +ColPartition* ColPartitionGrid::BestMergeCandidate( + const ColPartition* part, ColPartition_CLIST* candidates, bool debug, + std::function<bool(const ColPartition*, const ColPartition*)> confirm_cb, + int* overlap_increase) { + if (overlap_increase != nullptr) + *overlap_increase = 0; + if (candidates->empty()) + return nullptr; + int ok_overlap = + static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5); + // The best neighbour to merge with is the one that causes least + // total pairwise overlap among all the neighbours. + // If more than one offers the same total overlap, choose the one + // with the least total area. + const TBOX& part_box = part->bounding_box(); + ColPartition_C_IT it(candidates); + ColPartition* best_candidate = nullptr; + // Find the total combined box of all candidates and the original. + TBOX full_box(part_box); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + ColPartition* candidate = it.data(); + full_box += candidate->bounding_box(); + } + // Keep valid neighbours in a list. + ColPartition_CLIST neighbours; + // Now run a rect search of the merged box for overlapping neighbours, as + // we need anything that might be overlapped by the merged box. + FindOverlappingPartitions(full_box, part, &neighbours); + if (debug) { + tprintf("Finding best merge candidate from %d, %d neighbours for box:", + candidates->length(), neighbours.length()); + part_box.print(); + } + // If the best increase in overlap is positive, then we also check the + // worst non-candidate overlap. This catches the case of multiple good + // candidates that overlap each other when merged. If the worst + // non-candidate overlap is better than the best overlap, then return + // the worst non-candidate overlap instead. + ColPartition_CLIST non_candidate_neighbours; + non_candidate_neighbours.set_subtract(SortByBoxLeft<ColPartition>, true, + &neighbours, candidates); + int worst_nc_increase = 0; + int best_increase = INT32_MAX; + int best_area = 0; + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + ColPartition* candidate = it.data(); + if (confirm_cb != nullptr && !confirm_cb(part, candidate)) { + if (debug) { + tprintf("Candidate not confirmed:"); + candidate->bounding_box().print(); + } + continue; + } + int increase = IncreaseInOverlap(part, candidate, ok_overlap, &neighbours); + const TBOX& cand_box = candidate->bounding_box(); + if (best_candidate == nullptr || increase < best_increase) { + best_candidate = candidate; + best_increase = increase; + best_area = cand_box.bounding_union(part_box).area() - cand_box.area(); + if (debug) { + tprintf("New best merge candidate has increase %d, area %d, over box:", + increase, best_area); + full_box.print(); + candidate->Print(); + } + } else if (increase == best_increase) { + int area = cand_box.bounding_union(part_box).area() - cand_box.area(); + if (area < best_area) { + best_area = area; + best_candidate = candidate; + } + } + increase = IncreaseInOverlap(part, candidate, ok_overlap, + &non_candidate_neighbours); + if (increase > worst_nc_increase) + worst_nc_increase = increase; + } + if (best_increase > 0) { + // If the worst non-candidate increase is less than the best increase + // including the candidates, then all the candidates can merge together + // and the increase in outside overlap would be less, so use that result, + // but only if each candidate is either a good diacritic merge with part, + // or an ok merge candidate with all the others. + // See TestCompatibleCandidates for more explanation and a picture. + if (worst_nc_increase < best_increase && + TestCompatibleCandidates(*part, debug, candidates)) { + best_increase = worst_nc_increase; + } + } + if (overlap_increase != nullptr) + *overlap_increase = best_increase; + return best_candidate; +} + +// Helper to remove the given box from the given partition, put it in its +// own partition, and add to the partition list. +static void RemoveBadBox(BLOBNBOX* box, ColPartition* part, + ColPartition_LIST* part_list) { + part->RemoveBox(box); + ColPartition::MakeBigPartition(box, part_list); +} + + +// Split partitions where it reduces overlap between their bounding boxes. +// ColPartitions are after all supposed to be a partitioning of the blobs +// AND of the space on the page! +// Blobs that cause overlaps get removed, put in individual partitions +// and added to the big_parts list. They are most likely characters on +// 2 textlines that touch, or something big like a dropcap. +void ColPartitionGrid::SplitOverlappingPartitions( + ColPartition_LIST* big_parts) { + int ok_overlap = + static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5); + // Iterate the ColPartitions in the grid. + ColPartitionGridSearch gsearch(this); + gsearch.StartFullSearch(); + ColPartition* part; + while ((part = gsearch.NextFullSearch()) != nullptr) { + // Set up a rectangle search bounded by the part. + const TBOX& box = part->bounding_box(); + ColPartitionGridSearch rsearch(this); + rsearch.SetUniqueMode(true); + rsearch.StartRectSearch(box); + int unresolved_overlaps = 0; + + ColPartition* neighbour; + while ((neighbour = rsearch.NextRectSearch()) != nullptr) { + if (neighbour == part) + continue; + const TBOX& neighbour_box = neighbour->bounding_box(); + if (neighbour->OKMergeOverlap(*part, *part, ok_overlap, false) && + part->OKMergeOverlap(*neighbour, *neighbour, ok_overlap, false)) + continue; // The overlap is OK both ways. + + // If removal of the biggest box from either partition eliminates the + // overlap, and it is much bigger than the box left behind, then + // it is either a drop-cap, an inter-line join, or some junk that + // we don't want anyway, so put it in the big_parts list. + if (!part->IsSingleton()) { + BLOBNBOX* excluded = part->BiggestBox(); + TBOX shrunken = part->BoundsWithoutBox(excluded); + if (!shrunken.overlap(neighbour_box) && + excluded->bounding_box().height() > + kBigPartSizeRatio * shrunken.height()) { + // Removing the biggest box fixes the overlap, so do it! + gsearch.RemoveBBox(); + RemoveBadBox(excluded, part, big_parts); + InsertBBox(true, true, part); + gsearch.RepositionIterator(); + break; + } + } else if (box.contains(neighbour_box)) { + ++unresolved_overlaps; + continue; // No amount of splitting will fix it. + } + if (!neighbour->IsSingleton()) { + BLOBNBOX* excluded = neighbour->BiggestBox(); + TBOX shrunken = neighbour->BoundsWithoutBox(excluded); + if (!shrunken.overlap(box) && + excluded->bounding_box().height() > + kBigPartSizeRatio * shrunken.height()) { + // Removing the biggest box fixes the overlap, so do it! + rsearch.RemoveBBox(); + RemoveBadBox(excluded, neighbour, big_parts); + InsertBBox(true, true, neighbour); + gsearch.RepositionIterator(); + break; + } + } + int part_overlap_count = part->CountOverlappingBoxes(neighbour_box); + int neighbour_overlap_count = neighbour->CountOverlappingBoxes(box); + ColPartition* right_part = nullptr; + if (neighbour_overlap_count <= part_overlap_count || + part->IsSingleton()) { + // Try to split the neighbour to reduce overlap. + BLOBNBOX* split_blob = neighbour->OverlapSplitBlob(box); + if (split_blob != nullptr) { + rsearch.RemoveBBox(); + right_part = neighbour->SplitAtBlob(split_blob); + InsertBBox(true, true, neighbour); + ASSERT_HOST(right_part != nullptr); + } + } else { + // Try to split part to reduce overlap. + BLOBNBOX* split_blob = part->OverlapSplitBlob(neighbour_box); + if (split_blob != nullptr) { + gsearch.RemoveBBox(); + right_part = part->SplitAtBlob(split_blob); + InsertBBox(true, true, part); + ASSERT_HOST(right_part != nullptr); + } + } + if (right_part != nullptr) { + InsertBBox(true, true, right_part); + gsearch.RepositionIterator(); + rsearch.RepositionIterator(); + break; + } + } + if (unresolved_overlaps > 2 && part->IsSingleton()) { + // This part is no good so just add to big_parts. + RemoveBBox(part); + ColPartition_IT big_it(big_parts); + part->set_block_owned(true); + big_it.add_to_end(part); + gsearch.RepositionIterator(); + } + } +} + +// Filters partitions of source_type by looking at local neighbours. +// Where a majority of neighbours have a text type, the partitions are +// changed to text, where the neighbours have image type, they are changed +// to image, and partitions that have no definite neighbourhood type are +// left unchanged. +// im_box and rerotation are used to map blob coordinates onto the +// nontext_map, which is used to prevent the spread of text neighbourhoods +// into images. +// Returns true if anything was changed. +bool ColPartitionGrid::GridSmoothNeighbours(BlobTextFlowType source_type, + Pix* nontext_map, + const TBOX& im_box, + const FCOORD& rotation) { + // Iterate the ColPartitions in the grid. + ColPartitionGridSearch gsearch(this); + gsearch.StartFullSearch(); + ColPartition* part; + bool any_changed = false; + while ((part = gsearch.NextFullSearch()) != nullptr) { + if (part->flow() != source_type || BLOBNBOX::IsLineType(part->blob_type())) + continue; + const TBOX& box = part->bounding_box(); + bool debug = AlignedBlob::WithinTestRegion(2, box.left(), box.bottom()); + if (SmoothRegionType(nontext_map, im_box, rotation, debug, part)) + any_changed = true; + } + return any_changed; +} + +// Reflects the grid and its colpartitions in the y-axis, assuming that +// all blob boxes have already been done. +void ColPartitionGrid::ReflectInYAxis() { + ColPartition_LIST parts; + ColPartition_IT part_it(&parts); + // Iterate the ColPartitions in the grid to extract them. + ColPartitionGridSearch gsearch(this); + gsearch.StartFullSearch(); + ColPartition* part; + while ((part = gsearch.NextFullSearch()) != nullptr) { + part_it.add_after_then_move(part); + } + ICOORD bot_left(-tright().x(), bleft().y()); + ICOORD top_right(-bleft().x(), tright().y()); + // Reinitializing the grid with reflected coords also clears all the + // pointers, so parts will now own the ColPartitions. (Briefly). + Init(gridsize(), bot_left, top_right); + for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) { + part = part_it.extract(); + part->ReflectInYAxis(); + InsertBBox(true, true, part); + } +} + +// Transforms the grid of partitions to the output blocks, putting each +// partition into a separate block. We don't really care about the order, +// as we just want to get as much text as possible without trying to organize +// it into proper blocks or columns. +// TODO(rays) some kind of sort function would be useful and probably better +// than the default here, which is to sort by order of the grid search. +void ColPartitionGrid::ExtractPartitionsAsBlocks(BLOCK_LIST* blocks, + TO_BLOCK_LIST* to_blocks) { + TO_BLOCK_IT to_block_it(to_blocks); + BLOCK_IT block_it(blocks); + // All partitions will be put on this list and deleted on return. + ColPartition_LIST parts; + ColPartition_IT part_it(&parts); + // Iterate the ColPartitions in the grid to extract them. + ColPartitionGridSearch gsearch(this); + gsearch.StartFullSearch(); + ColPartition* part; + while ((part = gsearch.NextFullSearch()) != nullptr) { + part_it.add_after_then_move(part); + // The partition has to be at least vaguely like text. + BlobRegionType blob_type = part->blob_type(); + if (BLOBNBOX::IsTextType(blob_type) || + (blob_type == BRT_UNKNOWN && part->boxes_count() > 1)) { + PolyBlockType type = blob_type == BRT_VERT_TEXT ? PT_VERTICAL_TEXT + : PT_FLOWING_TEXT; + // Get metrics from the row that will be used for the block. + TBOX box = part->bounding_box(); + int median_width = part->median_width(); + int median_height = part->median_height(); + // Turn the partition into a TO_ROW. + TO_ROW* row = part->MakeToRow(); + if (row == nullptr) { + // This partition is dead. + part->DeleteBoxes(); + continue; + } + auto* block = new BLOCK("", true, 0, 0, box.left(), box.bottom(), + box.right(), box.top()); + block->pdblk.set_poly_block(new POLY_BLOCK(box, type)); + auto* to_block = new TO_BLOCK(block); + TO_ROW_IT row_it(to_block->get_rows()); + row_it.add_after_then_move(row); + // We haven't differentially rotated vertical and horizontal text at + // this point, so use width or height as appropriate. + if (blob_type == BRT_VERT_TEXT) { + to_block->line_size = static_cast<float>(median_width); + to_block->line_spacing = static_cast<float>(box.width()); + to_block->max_blob_size = static_cast<float>(box.width() + 1); + } else { + to_block->line_size = static_cast<float>(median_height); + to_block->line_spacing = static_cast<float>(box.height()); + to_block->max_blob_size = static_cast<float>(box.height() + 1); + } + if (to_block->line_size == 0) to_block->line_size = 1; + block_it.add_to_end(block); + to_block_it.add_to_end(to_block); + } else { + // This partition is dead. + part->DeleteBoxes(); + } + } + Clear(); + // Now it is safe to delete the ColPartitions as parts goes out of scope. +} + +// Rotates the grid and its colpartitions by the given angle, assuming that +// all blob boxes have already been done. +void ColPartitionGrid::Deskew(const FCOORD& deskew) { + ColPartition_LIST parts; + ColPartition_IT part_it(&parts); + // Iterate the ColPartitions in the grid to extract them. + ColPartitionGridSearch gsearch(this); + gsearch.StartFullSearch(); + ColPartition* part; + while ((part = gsearch.NextFullSearch()) != nullptr) { + part_it.add_after_then_move(part); + } + // Rebuild the grid to the new size. + TBOX grid_box(bleft_, tright_); + grid_box.rotate_large(deskew); + Init(gridsize(), grid_box.botleft(), grid_box.topright()); + // Reinitializing the grid with rotated coords also clears all the + // pointers, so parts will now own the ColPartitions. (Briefly). + for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) { + part = part_it.extract(); + part->ComputeLimits(); + InsertBBox(true, true, part); + } +} + +// Sets the left and right tabs of the partitions in the grid. +void ColPartitionGrid::SetTabStops(TabFind* tabgrid) { + // Iterate the ColPartitions in the grid. + ColPartitionGridSearch gsearch(this); + gsearch.StartFullSearch(); + ColPartition* part; + while ((part = gsearch.NextFullSearch()) != nullptr) { + const TBOX& part_box = part->bounding_box(); + TabVector* left_line = tabgrid->LeftTabForBox(part_box, true, false); + // If the overlapping line is not a left tab, try for non-overlapping. + if (left_line != nullptr && !left_line->IsLeftTab()) + left_line = tabgrid->LeftTabForBox(part_box, false, false); + if (left_line != nullptr && left_line->IsLeftTab()) + part->SetLeftTab(left_line); + TabVector* right_line = tabgrid->RightTabForBox(part_box, true, false); + if (right_line != nullptr && !right_line->IsRightTab()) + right_line = tabgrid->RightTabForBox(part_box, false, false); + if (right_line != nullptr && right_line->IsRightTab()) + part->SetRightTab(right_line); + part->SetColumnGoodness(tabgrid->WidthCB()); + } +} + +// Makes the ColPartSets and puts them in the PartSetVector ready +// for finding column bounds. Returns false if no partitions were found. +bool ColPartitionGrid::MakeColPartSets(PartSetVector* part_sets) { + auto* part_lists = new ColPartition_LIST[gridheight()]; + part_sets->reserve(gridheight()); + // Iterate the ColPartitions in the grid to get parts onto lists for the + // y bottom of each. + ColPartitionGridSearch gsearch(this); + gsearch.StartFullSearch(); + ColPartition* part; + bool any_parts_found = false; + while ((part = gsearch.NextFullSearch()) != nullptr) { + BlobRegionType blob_type = part->blob_type(); + if (blob_type != BRT_NOISE && + (blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) { + int grid_x, grid_y; + const TBOX& part_box = part->bounding_box(); + GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y); + ColPartition_IT part_it(&part_lists[grid_y]); + part_it.add_to_end(part); + any_parts_found = true; + } + } + if (any_parts_found) { + for (int grid_y = 0; grid_y < gridheight(); ++grid_y) { + ColPartitionSet* line_set = nullptr; + if (!part_lists[grid_y].empty()) { + line_set = new ColPartitionSet(&part_lists[grid_y]); + } + part_sets->push_back(line_set); + } + } + delete [] part_lists; + return any_parts_found; +} + +// Makes a single ColPartitionSet consisting of a single ColPartition that +// represents the total horizontal extent of the significant content on the +// page. Used for the single column setting in place of automatic detection. +// Returns nullptr if the page is empty of significant content. +ColPartitionSet* ColPartitionGrid::MakeSingleColumnSet(WidthCallback cb) { + ColPartition* single_column_part = nullptr; + // Iterate the ColPartitions in the grid to get parts onto lists for the + // y bottom of each. + ColPartitionGridSearch gsearch(this); + gsearch.StartFullSearch(); + ColPartition* part; + while ((part = gsearch.NextFullSearch()) != nullptr) { + BlobRegionType blob_type = part->blob_type(); + if (blob_type != BRT_NOISE && + (blob_type != BRT_UNKNOWN || !part->boxes()->singleton())) { + // Consider for single column. + BlobTextFlowType flow = part->flow(); + if ((blob_type == BRT_TEXT && + (flow == BTFT_STRONG_CHAIN || flow == BTFT_CHAIN || + flow == BTFT_LEADER || flow == BTFT_TEXT_ON_IMAGE)) || + blob_type == BRT_RECTIMAGE || blob_type == BRT_POLYIMAGE) { + if (single_column_part == nullptr) { + single_column_part = part->ShallowCopy(); + single_column_part->set_blob_type(BRT_TEXT); + // Copy the tabs from itself to properly setup the margins. + single_column_part->CopyLeftTab(*single_column_part, false); + single_column_part->CopyRightTab(*single_column_part, false); + } else { + if (part->left_key() < single_column_part->left_key()) + single_column_part->CopyLeftTab(*part, false); + if (part->right_key() > single_column_part->right_key()) + single_column_part->CopyRightTab(*part, false); + } + } + } + } + if (single_column_part != nullptr) { + // Make a ColPartitionSet out of the single_column_part as a candidate + // for the single column case. + single_column_part->SetColumnGoodness(cb); + return new ColPartitionSet(single_column_part); + } + return nullptr; +} + +// Mark the BLOBNBOXes in each partition as being owned by that partition. +void ColPartitionGrid::ClaimBoxes() { + // Iterate the ColPartitions in the grid. + ColPartitionGridSearch gsearch(this); + gsearch.StartFullSearch(); + ColPartition* part; + while ((part = gsearch.NextFullSearch()) != nullptr) { + part->ClaimBoxes(); + } +} + +// Retypes all the blobs referenced by the partitions in the grid. +// Image blobs are found and returned in the im_blobs list, as they are not +// owned by the block. +void ColPartitionGrid::ReTypeBlobs(BLOBNBOX_LIST* im_blobs) { + BLOBNBOX_IT im_blob_it(im_blobs); + ColPartition_LIST dead_parts; + ColPartition_IT dead_part_it(&dead_parts); + // Iterate the ColPartitions in the grid. + ColPartitionGridSearch gsearch(this); + gsearch.StartFullSearch(); + ColPartition* part; + while ((part = gsearch.NextFullSearch()) != nullptr) { + BlobRegionType blob_type = part->blob_type(); + BlobTextFlowType flow = part->flow(); + bool any_blobs_moved = false; + if (blob_type == BRT_POLYIMAGE || blob_type == BRT_RECTIMAGE) { + BLOBNBOX_C_IT blob_it(part->boxes()); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + BLOBNBOX* blob = blob_it.data(); + im_blob_it.add_after_then_move(blob); + } + } else if (blob_type != BRT_NOISE) { + // Make sure the blobs are marked with the correct type and flow. + BLOBNBOX_C_IT blob_it(part->boxes()); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + BLOBNBOX* blob = blob_it.data(); + if (blob->region_type() == BRT_NOISE) { + // TODO(rays) Deprecated. Change this section to an assert to verify + // and then delete. + ASSERT_HOST(blob->cblob()->area() != 0); + blob->set_owner(nullptr); + blob_it.extract(); + any_blobs_moved = true; + } else { + blob->set_region_type(blob_type); + if (blob->flow() != BTFT_LEADER) + blob->set_flow(flow); + } + } + } + if (blob_type == BRT_NOISE || part->boxes()->empty()) { + BLOBNBOX_C_IT blob_it(part->boxes()); + part->DisownBoxes(); + dead_part_it.add_to_end(part); + gsearch.RemoveBBox(); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + BLOBNBOX* blob = blob_it.data(); + if (blob->cblob()->area() == 0) { + // Any blob with zero area is a fake image blob and should be deleted. + delete blob->cblob(); + delete blob; + } + } + } else if (any_blobs_moved) { + gsearch.RemoveBBox(); + part->ComputeLimits(); + InsertBBox(true, true, part); + gsearch.RepositionIterator(); + } + } +} + +// The boxes within the partitions have changed (by deskew) so recompute +// the bounds of all the partitions and reinsert them into the grid. +void ColPartitionGrid::RecomputeBounds(int gridsize, + const ICOORD& bleft, + const ICOORD& tright, + const ICOORD& vertical) { + ColPartition_LIST saved_parts; + ColPartition_IT part_it(&saved_parts); + // Iterate the ColPartitions in the grid to get parts onto a list. + ColPartitionGridSearch gsearch(this); + gsearch.StartFullSearch(); + ColPartition* part; + while ((part = gsearch.NextFullSearch()) != nullptr) { + part_it.add_to_end(part); + } + // Reinitialize grid to the new size. + Init(gridsize, bleft, tright); + // Recompute the bounds of the parts and put them back in the new grid. + for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) { + part = part_it.extract(); + part->set_vertical(vertical); + part->ComputeLimits(); + InsertBBox(true, true, part); + } +} + +// Improves the margins of the ColPartitions in the grid by calling +// FindPartitionMargins on each. +// best_columns, which may be nullptr, is an array of pointers indicating the +// column set at each y-coordinate in the grid. +// best_columns is usually the best_columns_ member of ColumnFinder. +void ColPartitionGrid::GridFindMargins(ColPartitionSet** best_columns) { + // Iterate the ColPartitions in the grid. + ColPartitionGridSearch gsearch(this); + gsearch.StartFullSearch(); + ColPartition* part; + while ((part = gsearch.NextFullSearch()) != nullptr) { + // Set up a rectangle search x-bounded by the column and y by the part. + ColPartitionSet* columns = best_columns != nullptr + ? best_columns[gsearch.GridY()] + : nullptr; + FindPartitionMargins(columns, part); + const TBOX& box = part->bounding_box(); + if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom())) { + tprintf("Computed margins for part:"); + part->Print(); + } + } +} + +// Improves the margins of the ColPartitions in the list by calling +// FindPartitionMargins on each. +// best_columns, which may be nullptr, is an array of pointers indicating the +// column set at each y-coordinate in the grid. +// best_columns is usually the best_columns_ member of ColumnFinder. +void ColPartitionGrid::ListFindMargins(ColPartitionSet** best_columns, + ColPartition_LIST* parts) { + ColPartition_IT part_it(parts); + for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) { + ColPartition* part = part_it.data(); + ColPartitionSet* columns = nullptr; + if (best_columns != nullptr) { + const TBOX& part_box = part->bounding_box(); + // Get the columns from the y grid coord. + int grid_x, grid_y; + GridCoords(part_box.left(), part_box.bottom(), &grid_x, &grid_y); + columns = best_columns[grid_y]; + } + FindPartitionMargins(columns, part); + } +} + +// Deletes all the partitions in the grid after disowning all the blobs. +void ColPartitionGrid::DeleteParts() { + ColPartition_LIST dead_parts; + ColPartition_IT dead_it(&dead_parts); + ColPartitionGridSearch gsearch(this); + gsearch.StartFullSearch(); + ColPartition* part; + while ((part = gsearch.NextFullSearch()) != nullptr) { + part->DisownBoxes(); + dead_it.add_to_end(part); // Parts will be deleted on return. + } + Clear(); +} + +// Deletes all the partitions in the grid that are of type BRT_UNKNOWN and +// all the blobs in them. +void ColPartitionGrid::DeleteUnknownParts(TO_BLOCK* block) { + ColPartitionGridSearch gsearch(this); + gsearch.StartFullSearch(); + ColPartition* part; + while ((part = gsearch.NextFullSearch()) != nullptr) { + if (part->blob_type() == BRT_UNKNOWN) { + gsearch.RemoveBBox(); + // Once marked, the blobs will be swept up by DeleteUnownedNoise. + part->set_flow(BTFT_NONTEXT); + part->set_blob_type(BRT_NOISE); + part->SetBlobTypes(); + part->DisownBoxes(); + delete part; + } + } + block->DeleteUnownedNoise(); +} + +// Deletes all the partitions in the grid that are NOT of flow type BTFT_LEADER. +void ColPartitionGrid::DeleteNonLeaderParts() { + ColPartitionGridSearch gsearch(this); + gsearch.StartFullSearch(); + ColPartition* part; + while ((part = gsearch.NextFullSearch()) != nullptr) { + if (part->flow() != BTFT_LEADER) { + gsearch.RemoveBBox(); + if (part->ReleaseNonLeaderBoxes()) { + InsertBBox(true, true, part); + gsearch.RepositionIterator(); + } else { + delete part; + } + } + } +} + +// Finds and marks text partitions that represent figure captions. +void ColPartitionGrid::FindFigureCaptions() { + // For each image region find its best candidate text caption region, + // if any and mark it as such. + ColPartitionGridSearch gsearch(this); + gsearch.StartFullSearch(); + ColPartition* part; + while ((part = gsearch.NextFullSearch()) != nullptr) { + if (part->IsImageType()) { + const TBOX& part_box = part->bounding_box(); + bool debug = AlignedBlob::WithinTestRegion(2, part_box.left(), + part_box.bottom()); + ColPartition* best_caption = nullptr; + int best_dist = 0; // Distance to best_caption. + int best_upper = 0; // Direction of best_caption. + // Handle both lower and upper directions. + for (int upper = 0; upper < 2; ++upper) { + ColPartition_C_IT partner_it(upper ? part->upper_partners() + : part->lower_partners()); + // If there are no image partners, then this direction is ok. + for (partner_it.mark_cycle_pt(); !partner_it.cycled_list(); + partner_it.forward()) { + ColPartition* partner = partner_it.data(); + if (partner->IsImageType()) { + break; + } + } + if (!partner_it.cycled_list()) continue; + // Find the nearest totally overlapping text partner. + for (partner_it.mark_cycle_pt(); !partner_it.cycled_list(); + partner_it.forward()) { + ColPartition* partner = partner_it.data(); + if (!partner->IsTextType() || partner->type() == PT_TABLE) continue; + const TBOX& partner_box = partner->bounding_box(); + if (debug) { + tprintf("Finding figure captions for image part:"); + part_box.print(); + tprintf("Considering partner:"); + partner_box.print(); + } + if (partner_box.left() >= part_box.left() && + partner_box.right() <= part_box.right()) { + int dist = partner_box.y_gap(part_box); + if (best_caption == nullptr || dist < best_dist) { + best_dist = dist; + best_caption = partner; + best_upper = upper; + } + } + } + } + if (best_caption != nullptr) { + if (debug) { + tprintf("Best caption candidate:"); + best_caption->bounding_box().print(); + } + // We have a candidate caption. Qualify it as being separable from + // any body text. We are looking for either a small number of lines + // or a big gap that indicates a separation from the body text. + int line_count = 0; + int biggest_gap = 0; + int smallest_gap = INT16_MAX; + int total_height = 0; + int mean_height = 0; + ColPartition* end_partner = nullptr; + ColPartition* next_partner = nullptr; + for (ColPartition* partner = best_caption; partner != nullptr && + line_count <= kMaxCaptionLines; + partner = next_partner) { + if (!partner->IsTextType()) { + end_partner = partner; + break; + } + ++line_count; + total_height += partner->bounding_box().height(); + next_partner = partner->SingletonPartner(best_upper); + if (next_partner != nullptr) { + int gap = partner->bounding_box().y_gap( + next_partner->bounding_box()); + if (gap > biggest_gap) { + biggest_gap = gap; + end_partner = next_partner; + mean_height = total_height / line_count; + } else if (gap < smallest_gap) { + smallest_gap = gap; + } + // If the gap looks big compared to the text size and the smallest + // gap seen so far, then we can stop. + if (biggest_gap > mean_height * kMinCaptionGapHeightRatio && + biggest_gap > smallest_gap * kMinCaptionGapRatio) + break; + } + } + if (debug) { + tprintf("Line count=%d, biggest gap %d, smallest%d, mean height %d\n", + line_count, biggest_gap, smallest_gap, mean_height); + if (end_partner != nullptr) { + tprintf("End partner:"); + end_partner->bounding_box().print(); + } + } + if (next_partner == nullptr && line_count <= kMaxCaptionLines) + end_partner = nullptr; // No gap, but line count is small. + if (line_count <= kMaxCaptionLines) { + // This is a qualified caption. Mark the text as caption. + for (ColPartition* partner = best_caption; partner != nullptr && + partner != end_partner; + partner = next_partner) { + partner->set_type(PT_CAPTION_TEXT); + partner->SetBlobTypes(); + if (debug) { + tprintf("Set caption type for partition:"); + partner->bounding_box().print(); + } + next_partner = partner->SingletonPartner(best_upper); + } + } + } + } + } +} + +//////// Functions that manipulate ColPartitions in the part_grid_ ///// +//////// to find chains of partner partitions of the same type. /////// + +// For every ColPartition in the grid, finds its upper and lower neighbours. +void ColPartitionGrid::FindPartitionPartners() { + ColPartitionGridSearch gsearch(this); + gsearch.StartFullSearch(); + ColPartition* part; + while ((part = gsearch.NextFullSearch()) != nullptr) { + if (part->IsVerticalType()) { + FindVPartitionPartners(true, part); + FindVPartitionPartners(false, part); + } else { + FindPartitionPartners(true, part); + FindPartitionPartners(false, part); + } + } +} + +// Finds the best partner in the given direction for the given partition. +// Stores the result with AddPartner. +void ColPartitionGrid::FindPartitionPartners(bool upper, ColPartition* part) { + if (part->type() == PT_NOISE) + return; // Noise is not allowed to partner anything. + const TBOX& box = part->bounding_box(); + int top = part->median_top(); + int bottom = part->median_bottom(); + int height = top - bottom; + int mid_y = (bottom + top) / 2; + ColPartitionGridSearch vsearch(this); + // Search down for neighbour below + vsearch.StartVerticalSearch(box.left(), box.right(), part->MidY()); + ColPartition* neighbour; + ColPartition* best_neighbour = nullptr; + int best_dist = INT32_MAX; + while ((neighbour = vsearch.NextVerticalSearch(!upper)) != nullptr) { + if (neighbour == part || neighbour->type() == PT_NOISE) + continue; // Noise is not allowed to partner anything. + int neighbour_bottom = neighbour->median_bottom(); + int neighbour_top = neighbour->median_top(); + int neighbour_y = (neighbour_bottom + neighbour_top) / 2; + if (upper != (neighbour_y > mid_y)) + continue; + if (!part->HOverlaps(*neighbour) && !part->WithinSameMargins(*neighbour)) + continue; + if (!part->TypesMatch(*neighbour)) { + if (best_neighbour == nullptr) + best_neighbour = neighbour; + continue; + } + int dist = upper ? neighbour_bottom - top : bottom - neighbour_top; + if (dist <= kMaxPartitionSpacing * height) { + if (dist < best_dist) { + best_dist = dist; + best_neighbour = neighbour; + } + } else { + break; + } + } + if (best_neighbour != nullptr) + part->AddPartner(upper, best_neighbour); +} + +// Finds the best partner in the given direction for the given partition. +// Stores the result with AddPartner. +void ColPartitionGrid::FindVPartitionPartners(bool to_the_left, + ColPartition* part) { + if (part->type() == PT_NOISE) + return; // Noise is not allowed to partner anything. + const TBOX& box = part->bounding_box(); + int left = part->median_left(); + int right = part->median_right(); + int width = right >= left ? right - left : -1; + int mid_x = (left + right) / 2; + ColPartitionGridSearch hsearch(this); + // Search left for neighbour to_the_left + hsearch.StartSideSearch(mid_x, box.bottom(), box.top()); + ColPartition* neighbour; + ColPartition* best_neighbour = nullptr; + int best_dist = INT32_MAX; + while ((neighbour = hsearch.NextSideSearch(to_the_left)) != nullptr) { + if (neighbour == part || neighbour->type() == PT_NOISE) + continue; // Noise is not allowed to partner anything. + int neighbour_left = neighbour->median_left(); + int neighbour_right = neighbour->median_right(); + int neighbour_x = (neighbour_left + neighbour_right) / 2; + if (to_the_left != (neighbour_x < mid_x)) + continue; + if (!part->VOverlaps(*neighbour)) + continue; + if (!part->TypesMatch(*neighbour)) + continue; // Only match to other vertical text. + int dist = to_the_left ? left - neighbour_right : neighbour_left - right; + if (dist <= kMaxPartitionSpacing * width) { + if (dist < best_dist || best_neighbour == nullptr) { + best_dist = dist; + best_neighbour = neighbour; + } + } else { + break; + } + } + // For vertical partitions, the upper partner is to the left, and lower is + // to the right. + if (best_neighbour != nullptr) + part->AddPartner(to_the_left, best_neighbour); +} + +// For every ColPartition with multiple partners in the grid, reduces the +// number of partners to 0 or 1. If get_desperate is true, goes to more +// desperate merge methods to merge flowing text before breaking partnerships. +void ColPartitionGrid::RefinePartitionPartners(bool get_desperate) { + ColPartitionGridSearch gsearch(this); + // Refine in type order so that chasing multiple partners can be done + // before eliminating type mis-matching partners. + for (int type = PT_UNKNOWN + 1; type <= PT_COUNT; type++) { + // Iterate the ColPartitions in the grid. + gsearch.StartFullSearch(); + ColPartition* part; + while ((part = gsearch.NextFullSearch()) != nullptr) { + part->RefinePartners(static_cast<PolyBlockType>(type), + get_desperate, this); + // Iterator may have been messed up by a merge. + gsearch.RepositionIterator(); + } + } +} + + +// ========================== PRIVATE CODE ======================== + +// Finds and returns a list of candidate ColPartitions to merge with part. +// The candidates must overlap search_box, and when merged must not +// overlap any other partitions that are not overlapped by each individually. +void ColPartitionGrid::FindMergeCandidates(const ColPartition* part, + const TBOX& search_box, bool debug, + ColPartition_CLIST* candidates) { + int ok_overlap = + static_cast<int>(kTinyEnoughTextlineOverlapFraction * gridsize() + 0.5); + const TBOX& part_box = part->bounding_box(); + // Now run the rect search. + ColPartitionGridSearch rsearch(this); + rsearch.SetUniqueMode(true); + rsearch.StartRectSearch(search_box); + ColPartition* candidate; + while ((candidate = rsearch.NextRectSearch()) != nullptr) { + if (!OKMergeCandidate(part, candidate, debug)) + continue; + const TBOX& c_box = candidate->bounding_box(); + // Candidate seems to be a potential merge with part. If one contains + // the other, then the merge is a no-brainer. Otherwise, search the + // combined box to see if anything else is inappropriately overlapped. + if (!part_box.contains(c_box) && !c_box.contains(part_box)) { + // Search the combined rectangle to see if anything new is overlapped. + // This is a preliminary test designed to quickly weed-out poor + // merge candidates that would create a big list of overlapped objects + // for the squared-order overlap analysis. Eg. vertical and horizontal + // line-like objects that overlap real text when merged: + // || ========================== + // || + // || r e a l t e x t + // || + // || + TBOX merged_box(part_box); + merged_box += c_box; + ColPartitionGridSearch msearch(this); + msearch.SetUniqueMode(true); + msearch.StartRectSearch(merged_box); + ColPartition* neighbour; + while ((neighbour = msearch.NextRectSearch()) != nullptr) { + if (neighbour == part || neighbour == candidate) + continue; // Ignore itself. + if (neighbour->OKMergeOverlap(*part, *candidate, ok_overlap, false)) + continue; // This kind of merge overlap is OK. + TBOX n_box = neighbour->bounding_box(); + // The overlap is OK if: + // * the n_box already overlapped the part or the candidate OR + // * the n_box is a suitable merge with either part or candidate + if (!n_box.overlap(part_box) && !n_box.overlap(c_box) && + !OKMergeCandidate(part, neighbour, false) && + !OKMergeCandidate(candidate, neighbour, false)) + break; + } + if (neighbour != nullptr) { + if (debug) { + tprintf("Combined box overlaps another that is not OK despite" + " allowance of %d:", ok_overlap); + neighbour->bounding_box().print(); + tprintf("Reason:"); + OKMergeCandidate(part, neighbour, true); + tprintf("...and:"); + OKMergeCandidate(candidate, neighbour, true); + tprintf("Overlap:"); + neighbour->OKMergeOverlap(*part, *candidate, ok_overlap, true); + } + continue; + } + } + if (debug) { + tprintf("Adding candidate:"); + candidate->bounding_box().print(); + } + // Unique elements as they arrive. + candidates->add_sorted(SortByBoxLeft<ColPartition>, true, candidate); + } +} + +// Smoothes the region type/flow type of the given part by looking at local +// neighbours and the given image mask. Searches a padded rectangle with the +// padding truncated on one size of the part's box in turn for each side, +// using the result (if any) that has the least distance to all neighbours +// that contribute to the decision. This biases in favor of rectangular +// regions without completely enforcing them. +// If a good decision cannot be reached, the part is left unchanged. +// im_box and rerotation are used to map blob coordinates onto the +// nontext_map, which is used to prevent the spread of text neighbourhoods +// into images. +// Returns true if the partition was changed. +bool ColPartitionGrid::SmoothRegionType(Pix* nontext_map, + const TBOX& im_box, + const FCOORD& rerotation, + bool debug, + ColPartition* part) { + const TBOX& part_box = part->bounding_box(); + if (debug) { + tprintf("Smooothing part at:"); + part_box.print(); + } + BlobRegionType best_type = BRT_UNKNOWN; + int best_dist = INT32_MAX; + int max_dist = std::min(part_box.width(), part_box.height()); + max_dist = std::max(max_dist * kMaxNeighbourDistFactor, gridsize() * 2); + // Search with the pad truncated on each side of the box in turn. + bool any_image = false; + bool all_image = true; + for (int d = 0; d < BND_COUNT; ++d) { + int dist; + auto dir = static_cast<BlobNeighbourDir>(d); + BlobRegionType type = SmoothInOneDirection(dir, nontext_map, im_box, + rerotation, debug, *part, + &dist); + if (debug) { + tprintf("Result in dir %d = %d at dist %d\n", dir, type, dist); + } + if (type != BRT_UNKNOWN && dist < best_dist) { + best_dist = dist; + best_type = type; + } + if (type == BRT_POLYIMAGE) + any_image = true; + else + all_image = false; + } + if (best_dist > max_dist) + return false; // Too far away to set the type with it. + if (part->flow() == BTFT_STRONG_CHAIN && !all_image) { + return false; // We are not modifying it. + } + BlobRegionType new_type = part->blob_type(); + BlobTextFlowType new_flow = part->flow(); + if (best_type == BRT_TEXT && !any_image) { + new_flow = BTFT_STRONG_CHAIN; + new_type = BRT_TEXT; + } else if (best_type == BRT_VERT_TEXT && !any_image) { + new_flow = BTFT_STRONG_CHAIN; + new_type = BRT_VERT_TEXT; + } else if (best_type == BRT_POLYIMAGE) { + new_flow = BTFT_NONTEXT; + new_type = BRT_UNKNOWN; + } + if (new_type != part->blob_type() || new_flow != part->flow()) { + part->set_flow(new_flow); + part->set_blob_type(new_type); + part->SetBlobTypes(); + if (debug) { + tprintf("Modified part:"); + part->Print(); + } + return true; + } else { + return false; + } +} + +// Sets up a search box based on the part_box, padded in all directions +// except direction. Also setup dist_scaling to weight x,y distances according +// to the given direction. +static void ComputeSearchBoxAndScaling(BlobNeighbourDir direction, + const TBOX& part_box, + int min_padding, + TBOX* search_box, + ICOORD* dist_scaling) { + *search_box = part_box; + // Generate a pad value based on the min dimension of part_box, but at least + // min_padding and then scaled by kMaxPadFactor. + int padding = std::min(part_box.height(), part_box.width()); + padding = std::max(padding, min_padding); + padding *= kMaxPadFactor; + search_box->pad(padding, padding); + // Truncate the box in the appropriate direction and make the distance + // metric slightly biased in the truncated direction. + switch (direction) { + case BND_LEFT: + search_box->set_left(part_box.left()); + *dist_scaling = ICOORD(2, 1); + break; + case BND_BELOW: + search_box->set_bottom(part_box.bottom()); + *dist_scaling = ICOORD(1, 2); + break; + case BND_RIGHT: + search_box->set_right(part_box.right()); + *dist_scaling = ICOORD(2, 1); + break; + case BND_ABOVE: + search_box->set_top(part_box.top()); + *dist_scaling = ICOORD(1, 2); + break; + default: + ASSERT_HOST(false); + } +} + +// Local enum used by SmoothInOneDirection and AccumulatePartDistances +// for the different types of partition neighbour. +enum NeighbourPartitionType { + NPT_HTEXT, // Definite horizontal text. + NPT_VTEXT, // Definite vertical text. + NPT_WEAK_HTEXT, // Weakly horizontal text. Counts as HTEXT for HTEXT, but + // image for image and VTEXT. + NPT_WEAK_VTEXT, // Weakly vertical text. Counts as VTEXT for VTEXT, but + // image for image and HTEXT. + NPT_IMAGE, // Defininte non-text. + NPT_COUNT // Number of array elements. +}; + +// Executes the search for SmoothRegionType in a single direction. +// Creates a bounding box that is padded in all directions except direction, +// and searches it for other partitions. Finds the nearest collection of +// partitions that makes a decisive result (if any) and returns the type +// and the distance of the collection. If there are any pixels in the +// nontext_map, then the decision is biased towards image. +BlobRegionType ColPartitionGrid::SmoothInOneDirection( + BlobNeighbourDir direction, Pix* nontext_map, + const TBOX& im_box, const FCOORD& rerotation, + bool debug, const ColPartition& part, int* best_distance) { + // Set up a rectangle search bounded by the part. + const TBOX& part_box = part.bounding_box(); + TBOX search_box; + ICOORD dist_scaling; + ComputeSearchBoxAndScaling(direction, part_box, gridsize(), + &search_box, &dist_scaling); + bool image_region = ImageFind::CountPixelsInRotatedBox(search_box, im_box, + rerotation, + nontext_map) > 0; + GenericVector<int> dists[NPT_COUNT]; + AccumulatePartDistances(part, dist_scaling, search_box, + nontext_map, im_box, rerotation, debug, dists); + // By iteratively including the next smallest distance across the vectors, + // (as in a merge sort) we can use the vector indices as counts of each type + // and find the nearest set of objects that give us a definite decision. + int counts[NPT_COUNT]; + memset(counts, 0, sizeof(counts[0]) * NPT_COUNT); + // If there is image in the search box, tip the balance in image's favor. + int image_bias = image_region ? kSmoothDecisionMargin / 2 : 0; + BlobRegionType text_dir = part.blob_type(); + BlobTextFlowType flow_type = part.flow(); + int min_dist = 0; + do { + // Find the minimum new entry across the vectors + min_dist = INT32_MAX; + for (int i = 0; i < NPT_COUNT; ++i) { + if (counts[i] < dists[i].size() && dists[i][counts[i]] < min_dist) + min_dist = dists[i][counts[i]]; + } + // Step all the indices/counts forward to include min_dist. + for (int i = 0; i < NPT_COUNT; ++i) { + while (counts[i] < dists[i].size() && dists[i][counts[i]] <= min_dist) + ++counts[i]; + } + *best_distance = min_dist; + if (debug) { + tprintf("Totals: htext=%d+%d, vtext=%d+%d, image=%d+%d, at dist=%d\n", + counts[NPT_HTEXT], counts[NPT_WEAK_HTEXT], + counts[NPT_VTEXT], counts[NPT_WEAK_VTEXT], + counts[NPT_IMAGE], image_bias, min_dist); + } + // See if we have a decision yet. + int image_count = counts[NPT_IMAGE]; + int htext_score = counts[NPT_HTEXT] + counts[NPT_WEAK_HTEXT] - + (image_count + counts[NPT_WEAK_VTEXT]); + int vtext_score = counts[NPT_VTEXT] + counts[NPT_WEAK_VTEXT] - + (image_count + counts[NPT_WEAK_HTEXT]); + if (image_count > 0 && + image_bias - htext_score >= kSmoothDecisionMargin && + image_bias - vtext_score >= kSmoothDecisionMargin) { + *best_distance = dists[NPT_IMAGE][0]; + if (!dists[NPT_WEAK_VTEXT].empty() && + *best_distance > dists[NPT_WEAK_VTEXT][0]) + *best_distance = dists[NPT_WEAK_VTEXT][0]; + if (!dists[NPT_WEAK_HTEXT].empty() && + *best_distance > dists[NPT_WEAK_HTEXT][0]) + *best_distance = dists[NPT_WEAK_HTEXT][0]; + return BRT_POLYIMAGE; + } + if ((text_dir != BRT_VERT_TEXT || flow_type != BTFT_CHAIN) && + counts[NPT_HTEXT] > 0 && htext_score >= kSmoothDecisionMargin) { + *best_distance = dists[NPT_HTEXT][0]; + return BRT_TEXT; + } else if ((text_dir != BRT_TEXT || flow_type != BTFT_CHAIN) && + counts[NPT_VTEXT] > 0 && vtext_score >= kSmoothDecisionMargin) { + *best_distance = dists[NPT_VTEXT][0]; + return BRT_VERT_TEXT; + } + } while (min_dist < INT32_MAX); + return BRT_UNKNOWN; +} + +// Counts the partitions in the given search_box by appending the gap +// distance (scaled by dist_scaling) of the part from the base_part to the +// vector of the appropriate type for the partition. Prior to return, the +// vectors in the dists array are sorted in increasing order. +// The nontext_map (+im_box, rerotation) is used to make text invisible if +// there is non-text in between. +// dists must be an array of GenericVectors of size NPT_COUNT. +void ColPartitionGrid::AccumulatePartDistances(const ColPartition& base_part, + const ICOORD& dist_scaling, + const TBOX& search_box, + Pix* nontext_map, + const TBOX& im_box, + const FCOORD& rerotation, + bool debug, + GenericVector<int>* dists) { + const TBOX& part_box = base_part.bounding_box(); + ColPartitionGridSearch rsearch(this); + rsearch.SetUniqueMode(true); + rsearch.StartRectSearch(search_box); + ColPartition* neighbour; + // Search for compatible neighbours with a similar strokewidth, but not + // on the other side of a tab vector. + while ((neighbour = rsearch.NextRectSearch()) != nullptr) { + if (neighbour->IsUnMergeableType() || + !base_part.ConfirmNoTabViolation(*neighbour) || + neighbour == &base_part) + continue; + TBOX nbox = neighbour->bounding_box(); + BlobRegionType n_type = neighbour->blob_type(); + if ((n_type == BRT_TEXT || n_type == BRT_VERT_TEXT) && + !ImageFind::BlankImageInBetween(part_box, nbox, im_box, rerotation, + nontext_map)) + continue; // Text not visible the other side of image. + if (BLOBNBOX::IsLineType(n_type)) + continue; // Don't use horizontal lines as neighbours. + int x_gap = std::max(part_box.x_gap(nbox), 0); + int y_gap = std::max(part_box.y_gap(nbox), 0); + int n_dist = x_gap * dist_scaling.x() + y_gap* dist_scaling.y(); + if (debug) { + tprintf("Part has x-gap=%d, y=%d, dist=%d at:", + x_gap, y_gap, n_dist); + nbox.print(); + } + // Truncate the number of boxes, so text doesn't get too much advantage. + int n_boxes = std::min(neighbour->boxes_count(), kSmoothDecisionMargin); + BlobTextFlowType n_flow = neighbour->flow(); + GenericVector<int>* count_vector = nullptr; + if (n_flow == BTFT_STRONG_CHAIN) { + if (n_type == BRT_TEXT) + count_vector = &dists[NPT_HTEXT]; + else + count_vector = &dists[NPT_VTEXT]; + if (debug) { + tprintf("%s %d\n", n_type == BRT_TEXT ? "Htext" : "Vtext", n_boxes); + } + } else if ((n_type == BRT_TEXT || n_type == BRT_VERT_TEXT) && + (n_flow == BTFT_CHAIN || n_flow == BTFT_NEIGHBOURS)) { + // Medium text counts as weak, and all else counts as image. + if (n_type == BRT_TEXT) + count_vector = &dists[NPT_WEAK_HTEXT]; + else + count_vector = &dists[NPT_WEAK_VTEXT]; + if (debug) tprintf("Weak %d\n", n_boxes); + } else { + count_vector = &dists[NPT_IMAGE]; + if (debug) tprintf("Image %d\n", n_boxes); + } + if (count_vector != nullptr) { + for (int i = 0; i < n_boxes; ++i) + count_vector->push_back(n_dist); + } + if (debug) { + neighbour->Print(); + } + } + for (int i = 0; i < NPT_COUNT; ++i) + dists[i].sort(); +} + +// Improves the margins of the part ColPartition by searching for +// neighbours that vertically overlap significantly. +// columns may be nullptr, and indicates the assigned column structure this +// is applicable to part. +void ColPartitionGrid::FindPartitionMargins(ColPartitionSet* columns, + ColPartition* part) { + // Set up a rectangle search x-bounded by the column and y by the part. + TBOX box = part->bounding_box(); + int y = part->MidY(); + // Initial left margin is based on the column, if there is one. + int left_margin = bleft().x(); + int right_margin = tright().x(); + if (columns != nullptr) { + ColPartition* column = columns->ColumnContaining(box.left(), y); + if (column != nullptr) + left_margin = column->LeftAtY(y); + column = columns->ColumnContaining(box.right(), y); + if (column != nullptr) + right_margin = column->RightAtY(y); + } + left_margin -= kColumnWidthFactor; + right_margin += kColumnWidthFactor; + // Search for ColPartitions that reduce the margin. + left_margin = FindMargin(box.left() + box.height(), true, left_margin, + box.bottom(), box.top(), part); + part->set_left_margin(left_margin); + // Search for ColPartitions that reduce the margin. + right_margin = FindMargin(box.right() - box.height(), false, right_margin, + box.bottom(), box.top(), part); + part->set_right_margin(right_margin); +} + +// Starting at x, and going in the specified direction, up to x_limit, finds +// the margin for the given y range by searching sideways, +// and ignoring not_this. +int ColPartitionGrid::FindMargin(int x, bool right_to_left, int x_limit, + int y_bottom, int y_top, + const ColPartition* not_this) { + int height = y_top - y_bottom; + // Iterate the ColPartitions in the grid. + ColPartitionGridSearch side_search(this); + side_search.SetUniqueMode(true); + side_search.StartSideSearch(x, y_bottom, y_top); + ColPartition* part; + while ((part = side_search.NextSideSearch(right_to_left)) != nullptr) { + // Ignore itself. + if (part == not_this) // || part->IsLineType()) + continue; + // Must overlap by enough, based on the min of the heights, so + // large partitions can't smash through small ones. + TBOX box = part->bounding_box(); + int min_overlap = std::min(height, static_cast<int>(box.height())); + min_overlap = static_cast<int>(min_overlap * kMarginOverlapFraction + 0.5); + int y_overlap = std::min(y_top, static_cast<int>(box.top())) - std::max(y_bottom, static_cast<int>(box.bottom())); + if (y_overlap < min_overlap) + continue; + // Must be going the right way. + int x_edge = right_to_left ? box.right() : box.left(); + if ((x_edge < x) != right_to_left) + continue; + // If we have gone past x_limit, then x_limit will do. + if ((x_edge < x_limit) == right_to_left) + break; + // It reduces x limit, so save the new one. + x_limit = x_edge; + } + return x_limit; +} + + +} // namespace tesseract. diff --git a/tesseract/src/textord/colpartitiongrid.h b/tesseract/src/textord/colpartitiongrid.h new file mode 100644 index 00000000..85ab7f3d --- /dev/null +++ b/tesseract/src/textord/colpartitiongrid.h @@ -0,0 +1,252 @@ +/////////////////////////////////////////////////////////////////////// +// File: colpartitiongrid.h +// Description: Class collecting code that acts on a BBGrid of ColPartitions. +// Author: Ray Smith +// +// (C) Copyright 2009, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_TEXTORD_COLPARTITIONGRID_H_ +#define TESSERACT_TEXTORD_COLPARTITIONGRID_H_ + +#include "bbgrid.h" +#include "colpartition.h" +#include "colpartitionset.h" + +namespace tesseract { + +class TabFind; + +// ColPartitionGrid is a BBGrid of ColPartition. +// It collects functions that work on the grid. +class TESS_API ColPartitionGrid : public BBGrid<ColPartition, + ColPartition_CLIST, + ColPartition_C_IT> { + public: + ColPartitionGrid() = default; + ColPartitionGrid(int gridsize, const ICOORD& bleft, const ICOORD& tright); + + ~ColPartitionGrid() override = default; + + // Handles a click event in a display window. + void HandleClick(int x, int y) override; + + // Merges ColPartitions in the grid that look like they belong in the same + // textline. + // For all partitions in the grid, calls the box_cb permanent callback + // to compute the search box, searches the box, and if a candidate is found, + // calls the confirm_cb to check any more rules. If the confirm_cb returns + // true, then the partitions are merged. + // Both callbacks are deleted before returning. + void Merges(std::function<bool(ColPartition*, TBOX*)> box_cb, + std::function<bool(const ColPartition*, + const ColPartition*)> confirm_cb); + + // For the given partition, calls the box_cb permanent callback + // to compute the search box, searches the box, and if a candidate is found, + // calls the confirm_cb to check any more rules. If the confirm_cb returns + // true, then the partitions are merged. + // Returns true if the partition is consumed by one or more merges. + bool MergePart(std::function<bool(ColPartition*, TBOX*)> box_cb, + std::function<bool(const ColPartition*, + const ColPartition*)> confirm_cb, + ColPartition* part); + + // Computes and returns the total overlap of all partitions in the grid. + // If overlap_grid is non-null, it is filled with a grid that holds empty + // partitions representing the union of all overlapped partitions. + int ComputeTotalOverlap(ColPartitionGrid** overlap_grid); + + // Finds all the ColPartitions in the grid that overlap with the given + // box and returns them SortByBoxLeft(ed) and uniqued in the given list. + // Any partition equal to not_this (may be nullptr) is excluded. + void FindOverlappingPartitions(const TBOX& box, const ColPartition* not_this, + ColPartition_CLIST* parts); + + // Finds and returns the best candidate ColPartition to merge with part, + // selected from the candidates list, based on the minimum increase in + // pairwise overlap among all the partitions overlapped by the combined box. + // If overlap_increase is not nullptr then it returns the increase in overlap + // that would result from the merge. + // See colpartitiongrid.cpp for a diagram. + ColPartition* BestMergeCandidate( + const ColPartition* part, ColPartition_CLIST* candidates, bool debug, + std::function<bool(const ColPartition*, + const ColPartition*)> confirm_cb, + int* overlap_increase); + + // Split partitions where it reduces overlap between their bounding boxes. + // ColPartitions are after all supposed to be a partitioning of the blobs + // AND of the space on the page! + // Blobs that cause overlaps get removed, put in individual partitions + // and added to the big_parts list. They are most likely characters on + // 2 textlines that touch, or something big like a dropcap. + void SplitOverlappingPartitions(ColPartition_LIST* big_parts); + + // Filters partitions of source_type by looking at local neighbours. + // Where a majority of neighbours have a text type, the partitions are + // changed to text, where the neighbours have image type, they are changed + // to image, and partitions that have no definite neighbourhood type are + // left unchanged. + // im_box and rerotation are used to map blob coordinates onto the + // nontext_map, which is used to prevent the spread of text neighbourhoods + // into images. + // Returns true if anything was changed. + bool GridSmoothNeighbours(BlobTextFlowType source_type, Pix* nontext_map, + const TBOX& im_box, const FCOORD& rerotation); + + // Reflects the grid and its colpartitions in the y-axis, assuming that + // all blob boxes have already been done. + void ReflectInYAxis(); + + // Rotates the grid and its colpartitions by the given angle, assuming that + // all blob boxes have already been done. + void Deskew(const FCOORD& deskew); + + // Transforms the grid of partitions to the output blocks, putting each + // partition into a separate block. We don't really care about the order, + // as we just want to get as much text as possible without trying to organize + // it into proper blocks or columns. + void ExtractPartitionsAsBlocks(BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks); + + // Sets the left and right tabs of the partitions in the grid. + void SetTabStops(TabFind* tabgrid); + + // Makes the ColPartSets and puts them in the PartSetVector ready + // for finding column bounds. Returns false if no partitions were found. + // Each ColPartition in the grid is placed in a single ColPartSet based + // on the bottom-left of its bounding box. + bool MakeColPartSets(PartSetVector* part_sets); + + // Makes a single ColPartitionSet consisting of a single ColPartition that + // represents the total horizontal extent of the significant content on the + // page. Used for the single column setting in place of automatic detection. + // Returns nullptr if the page is empty of significant content. + ColPartitionSet* MakeSingleColumnSet(WidthCallback cb); + + // Mark the BLOBNBOXes in each partition as being owned by that partition. + void ClaimBoxes(); + + // Retypes all the blobs referenced by the partitions in the grid. + // Image blobs are sliced on the grid boundaries to give the tab finder + // a better handle on the edges of the images, and the actual blobs are + // returned in the im_blobs list, as they are not owned by the block. + void ReTypeBlobs(BLOBNBOX_LIST* im_blobs); + + // The boxes within the partitions have changed (by deskew) so recompute + // the bounds of all the partitions and reinsert them into the grid. + void RecomputeBounds(int gridsize, const ICOORD& bleft, + const ICOORD& tright, const ICOORD& vertical); + + // Improves the margins of the ColPartitions in the grid by calling + // FindPartitionMargins on each. + void GridFindMargins(ColPartitionSet** best_columns); + + // Improves the margins of the ColPartitions in the list by calling + // FindPartitionMargins on each. + void ListFindMargins(ColPartitionSet** best_columns, + ColPartition_LIST* parts); + + // Deletes all the partitions in the grid after disowning all the blobs. + void DeleteParts(); + + // Deletes all the partitions in the grid that are of type BRT_UNKNOWN and + // all the blobs in them. + void DeleteUnknownParts(TO_BLOCK* block); + + // Deletes all the partitions in the grid that are NOT of flow type + // BTFT_LEADER. + void DeleteNonLeaderParts(); + + // Finds and marks text partitions that represent figure captions. + void FindFigureCaptions(); + + //////// Functions that manipulate ColPartitions in the grid /////// + //////// to find chains of partner partitions of the same type. /////// + // For every ColPartition in the grid, finds its upper and lower neighbours. + void FindPartitionPartners(); + // Finds the best partner in the given direction for the given partition. + // Stores the result with AddPartner. + void FindPartitionPartners(bool upper, ColPartition* part); + // Finds the best partner in the given direction for the given partition. + // Stores the result with AddPartner. + void FindVPartitionPartners(bool to_the_left, ColPartition* part); + // For every ColPartition with multiple partners in the grid, reduces the + // number of partners to 0 or 1. If get_desperate is true, goes to more + // desperate merge methods to merge flowing text before breaking partnerships. + void RefinePartitionPartners(bool get_desperate); + + private: + // Finds and returns a list of candidate ColPartitions to merge with part. + // The candidates must overlap search_box, and when merged must not + // overlap any other partitions that are not overlapped by each individually. + void FindMergeCandidates(const ColPartition* part, const TBOX& search_box, + bool debug, ColPartition_CLIST* candidates); + + // Smoothes the region type/flow type of the given part by looking at local + // neighbours and the given image mask. Searches a padded rectangle with the + // padding truncated on one size of the part's box in turn for each side, + // using the result (if any) that has the least distance to all neighbours + // that contribute to the decision. This biases in favor of rectangular + // regions without completely enforcing them. + // If a good decision cannot be reached, the part is left unchanged. + // im_box and rerotation are used to map blob coordinates onto the + // nontext_map, which is used to prevent the spread of text neighbourhoods + // into images. + // Returns true if the partition was changed. + bool SmoothRegionType(Pix* nontext_map, + const TBOX& im_box, + const FCOORD& rerotation, + bool debug, + ColPartition* part); + // Executes the search for SmoothRegionType in a single direction. + // Creates a bounding box that is padded in all directions except direction, + // and searches it for other partitions. Finds the nearest collection of + // partitions that makes a decisive result (if any) and returns the type + // and the distance of the collection. If there are any pixels in the + // nontext_map, then the decision is biased towards image. + BlobRegionType SmoothInOneDirection(BlobNeighbourDir direction, + Pix* nontext_map, + const TBOX& im_box, + const FCOORD& rerotation, + bool debug, + const ColPartition& part, + int* best_distance); + // Counts the partitions in the given search_box by appending the gap + // distance (scaled by dist_scaling) of the part from the base_part to the + // vector of the appropriate type for the partition. Prior to return, the + // vectors in the dists array are sorted in increasing order. + // dists must be an array of GenericVectors of size NPT_COUNT. + void AccumulatePartDistances(const ColPartition& base_part, + const ICOORD& dist_scaling, + const TBOX& search_box, + Pix* nontext_map, + const TBOX& im_box, + const FCOORD& rerotation, + bool debug, + GenericVector<int>* dists); + + // Improves the margins of the ColPartition by searching for + // neighbours that vertically overlap significantly. + void FindPartitionMargins(ColPartitionSet* columns, ColPartition* part); + + // Starting at x, and going in the specified direction, up to x_limit, finds + // the margin for the given y range by searching sideways, + // and ignoring not_this. + int FindMargin(int x, bool right_to_left, int x_limit, + int y_bottom, int y_top, const ColPartition* not_this); +}; + +} // namespace tesseract. + +#endif // TESSERACT_TEXTORD_COLPARTITIONGRID_H_ diff --git a/tesseract/src/textord/colpartitionset.cpp b/tesseract/src/textord/colpartitionset.cpp new file mode 100644 index 00000000..c53235e6 --- /dev/null +++ b/tesseract/src/textord/colpartitionset.cpp @@ -0,0 +1,667 @@ +/////////////////////////////////////////////////////////////////////// +// File: colpartitionset.cpp +// Description: Class to hold a list of ColPartitions of the page that +// correspond roughly to columns. +// Author: Ray Smith +// +// (C) Copyright 2008, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#include "colpartitionset.h" +#include "workingpartset.h" +#include "tablefind.h" + +namespace tesseract { + +// Minimum width of a column to be interesting as a multiple of resolution. +const double kMinColumnWidth = 2.0 / 3; + +ELISTIZE(ColPartitionSet) + +ColPartitionSet::ColPartitionSet(ColPartition_LIST* partitions) { + ColPartition_IT it(&parts_); + it.add_list_after(partitions); + ComputeCoverage(); +} + +ColPartitionSet::ColPartitionSet(ColPartition* part) { + ColPartition_IT it(&parts_); + it.add_after_then_move(part); + ComputeCoverage(); +} + +// Returns the number of columns of good width. +int ColPartitionSet::GoodColumnCount() const { + int num_good_cols = 0; + // This is a read-only iteration of the list. + ColPartition_IT it(const_cast<ColPartition_LIST*>(&parts_)); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + if (it.data()->good_width()) ++num_good_cols; + } + return num_good_cols; +} + +// Return an element of the parts_ list from its index. +ColPartition* ColPartitionSet::GetColumnByIndex(int index) { + ColPartition_IT it(&parts_); + it.mark_cycle_pt(); + for (int i = 0; i < index && !it.cycled_list(); ++i, it.forward()); + if (it.cycled_list()) + return nullptr; + return it.data(); +} + +// Return the ColPartition that contains the given coords, if any, else nullptr. +ColPartition* ColPartitionSet::ColumnContaining(int x, int y) { + ColPartition_IT it(&parts_); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + ColPartition* part = it.data(); + if (part->ColumnContains(x, y)) + return part; + } + return nullptr; +} + +// Extract all the parts from the list, relinquishing ownership. +void ColPartitionSet::RelinquishParts() { + ColPartition_IT it(&parts_); + while (!it.empty()) { + it.extract(); + it.forward(); + } +} + +// Attempt to improve this by adding partitions or expanding partitions. +void ColPartitionSet::ImproveColumnCandidate(WidthCallback cb, + PartSetVector* src_sets) { + int set_size = src_sets->size(); + // Iterate over the provided column sets, as each one may have something + // to improve this. + for (int i = 0; i < set_size; ++i) { + ColPartitionSet* column_set = src_sets->get(i); + if (column_set == nullptr) + continue; + // Iterate over the parts in this and column_set, adding bigger or + // new parts in column_set to this. + ColPartition_IT part_it(&parts_); + ASSERT_HOST(!part_it.empty()); + int prev_right = INT32_MIN; + part_it.mark_cycle_pt(); + ColPartition_IT col_it(&column_set->parts_); + for (col_it.mark_cycle_pt(); !col_it.cycled_list(); col_it.forward()) { + ColPartition* col_part = col_it.data(); + if (col_part->blob_type() < BRT_UNKNOWN) + continue; // Ignore image partitions. + int col_left = col_part->left_key(); + int col_right = col_part->right_key(); + // Sync-up part_it (in this) so it matches the col_part in column_set. + ColPartition* part = part_it.data(); + while (!part_it.at_last() && part->right_key() < col_left) { + prev_right = part->right_key(); + part_it.forward(); + part = part_it.data(); + } + int part_left = part->left_key(); + int part_right = part->right_key(); + if (part_right < col_left || col_right < part_left) { + // There is no overlap so this is a new partition. + AddPartition(col_part->ShallowCopy(), &part_it); + continue; + } + // Check the edges of col_part to see if they can improve part. + bool part_width_ok = cb(part->KeyWidth(part_left, part_right)); + if (col_left < part_left && col_left > prev_right) { + // The left edge of the column is better and it doesn't overlap, + // so we can potentially expand it. + int col_box_left = col_part->BoxLeftKey(); + bool tab_width_ok = cb(part->KeyWidth(col_left, part_right)); + bool box_width_ok = cb(part->KeyWidth(col_box_left, part_right)); + if (tab_width_ok || (!part_width_ok)) { + // The tab is leaving the good column metric at least as good as + // it was before, so use the tab. + part->CopyLeftTab(*col_part, false); + part->SetColumnGoodness(cb); + } else if (col_box_left < part_left && + (box_width_ok || !part_width_ok)) { + // The box is leaving the good column metric at least as good as + // it was before, so use the box. + part->CopyLeftTab(*col_part, true); + part->SetColumnGoodness(cb); + } + part_left = part->left_key(); + } + if (col_right > part_right && + (part_it.at_last() || + part_it.data_relative(1)->left_key() > col_right)) { + // The right edge is better, so we can possibly expand it. + int col_box_right = col_part->BoxRightKey(); + bool tab_width_ok = cb(part->KeyWidth(part_left, col_right)); + bool box_width_ok = cb(part->KeyWidth(part_left, col_box_right)); + if (tab_width_ok || (!part_width_ok)) { + // The tab is leaving the good column metric at least as good as + // it was before, so use the tab. + part->CopyRightTab(*col_part, false); + part->SetColumnGoodness(cb); + } else if (col_box_right > part_right && + (box_width_ok || !part_width_ok)) { + // The box is leaving the good column metric at least as good as + // it was before, so use the box. + part->CopyRightTab(*col_part, true); + part->SetColumnGoodness(cb); + } + } + } + } + ComputeCoverage(); +} + +// If this set is good enough to represent a new partitioning into columns, +// add it to the vector of sets, otherwise delete it. +void ColPartitionSet::AddToColumnSetsIfUnique(PartSetVector* column_sets, + WidthCallback cb) { + bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(), + bounding_box_.bottom()); + if (debug) { + tprintf("Considering new column candidate:\n"); + Print(); + } + if (!LegalColumnCandidate()) { + if (debug) { + tprintf("Not a legal column candidate:\n"); + Print(); + } + delete this; + return; + } + for (int i = 0; i < column_sets->size(); ++i) { + ColPartitionSet* columns = column_sets->get(i); + // In ordering the column set candidates, good_coverage_ is king, + // followed by good_column_count_ and then bad_coverage_. + bool better = good_coverage_ > columns->good_coverage_; + if (good_coverage_ == columns->good_coverage_) { + better = good_column_count_ > columns->good_column_count_; + if (good_column_count_ == columns->good_column_count_) { + better = bad_coverage_ > columns->bad_coverage_; + } + } + if (better) { + // The new one is better so add it. + if (debug) + tprintf("Good one\n"); + column_sets->insert(this, i); + return; + } + if (columns->CompatibleColumns(false, this, cb)) { + if (debug) + tprintf("Duplicate\n"); + delete this; + return; // It is not unique. + } + } + if (debug) + tprintf("Added to end\n"); + column_sets->push_back(this); +} + +// Return true if the partitions in other are all compatible with the columns +// in this. +bool ColPartitionSet::CompatibleColumns(bool debug, ColPartitionSet* other, + WidthCallback cb) { + if (debug) { + tprintf("CompatibleColumns testing compatibility\n"); + Print(); + other->Print(); + } + if (other->parts_.empty()) { + if (debug) + tprintf("CompatibleColumns true due to empty other\n"); + return true; + } + ColPartition_IT it(&other->parts_); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + ColPartition* part = it.data(); + if (part->blob_type() < BRT_UNKNOWN) { + if (debug) { + tprintf("CompatibleColumns ignoring image partition\n"); + part->Print(); + } + continue; // Image partitions are irrelevant to column compatibility. + } + int y = part->MidY(); + int left = part->bounding_box().left(); + int right = part->bounding_box().right(); + ColPartition* left_col = ColumnContaining(left, y); + ColPartition* right_col = ColumnContaining(right, y); + if (right_col == nullptr || left_col == nullptr) { + if (debug) { + tprintf("CompatibleColumns false due to partition edge outside\n"); + part->Print(); + } + return false; // A partition edge lies outside of all columns + } + if (right_col != left_col && cb(right - left)) { + if (debug) { + tprintf("CompatibleColumns false due to good width in multiple cols\n"); + part->Print(); + } + return false; // Partition with a good width must be in a single column. + } + + ColPartition_IT it2= it; + while (!it2.at_last()) { + it2.forward(); + ColPartition* next_part = it2.data(); + if (!BLOBNBOX::IsTextType(next_part->blob_type())) + continue; // Non-text partitions are irrelevant. + int next_left = next_part->bounding_box().left(); + if (next_left == right) { + break; // They share the same edge, so one must be a pull-out. + } + // Search to see if right and next_left fall within a single column. + ColPartition* next_left_col = ColumnContaining(next_left, y); + if (right_col == next_left_col) { + // There is a column break in this column. + // This can be due to a figure caption within a column, a pull-out + // block, or a simple broken textline that remains to be merged: + // all allowed, or a change in column layout: not allowed. + // If both partitions are of good width, then it is likely + // a change in column layout, otherwise probably an allowed situation. + if (part->good_width() && next_part->good_width()) { + if (debug) { + int next_right = next_part->bounding_box().right(); + tprintf("CompatibleColumns false due to 2 parts of good width\n"); + tprintf("part1 %d-%d, part2 %d-%d\n", + left, right, next_left, next_right); + right_col->Print(); + } + return false; + } + } + break; + } + } + if (debug) + tprintf("CompatibleColumns true!\n"); + return true; +} + +// Returns the total width of all blobs in the part_set that do not lie +// within an approved column. Used as a cost measure for using this +// column set over another that might be compatible. +int ColPartitionSet::UnmatchedWidth(ColPartitionSet* part_set) { + int total_width = 0; + ColPartition_IT it(&part_set->parts_); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + ColPartition* part = it.data(); + if (!BLOBNBOX::IsTextType(part->blob_type())) { + continue; // Non-text partitions are irrelevant to column compatibility. + } + int y = part->MidY(); + BLOBNBOX_C_IT box_it(part->boxes()); + for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) { + const TBOX& box = it.data()->bounding_box(); + // Assume that the whole blob is outside any column iff its x-middle + // is outside. + int x = (box.left() + box.right()) / 2; + ColPartition* col = ColumnContaining(x, y); + if (col == nullptr) + total_width += box.width(); + } + } + return total_width; +} + +// Return true if this ColPartitionSet makes a legal column candidate by +// having legal individual partitions and non-overlapping adjacent pairs. +bool ColPartitionSet::LegalColumnCandidate() { + ColPartition_IT it(&parts_); + if (it.empty()) + return false; + bool any_text_parts = false; + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + ColPartition* part = it.data(); + if (BLOBNBOX::IsTextType(part->blob_type())) { + if (!part->IsLegal()) + return false; // Individual partition is illegal. + any_text_parts = true; + } + if (!it.at_last()) { + ColPartition* next_part = it.data_relative(1); + if (next_part->left_key() < part->right_key()) { + return false; + } + } + } + return any_text_parts; +} + +// Return a copy of this. If good_only will only copy the Good ColPartitions. +ColPartitionSet* ColPartitionSet::Copy(bool good_only) { + ColPartition_LIST copy_parts; + ColPartition_IT src_it(&parts_); + ColPartition_IT dest_it(©_parts); + for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) { + ColPartition* part = src_it.data(); + if (BLOBNBOX::IsTextType(part->blob_type()) && + (!good_only || part->good_width() || part->good_column())) + dest_it.add_after_then_move(part->ShallowCopy()); + } + if (dest_it.empty()) + return nullptr; + return new ColPartitionSet(©_parts); +} + +// Return the bounding boxes of columns at the given y-range +void ColPartitionSet::GetColumnBoxes(int y_bottom, int y_top, + ColSegment_LIST *segments) { + ColPartition_IT it(&parts_); + ColSegment_IT col_it(segments); + col_it.move_to_last(); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + ColPartition* part = it.data(); + ICOORD bot_left(part->LeftAtY(y_top), y_bottom); + ICOORD top_right(part->RightAtY(y_bottom), y_top); + auto *col_seg = new ColSegment(); + col_seg->InsertBox(TBOX(bot_left, top_right)); + col_it.add_after_then_move(col_seg); + } +} + +#ifndef GRAPHICS_DISABLED + +// Display the edges of the columns at the given y coords. +void ColPartitionSet::DisplayColumnEdges(int y_bottom, int y_top, + ScrollView* win) { + ColPartition_IT it(&parts_); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + ColPartition* part = it.data(); + win->Line(part->LeftAtY(y_top), y_top, part->LeftAtY(y_bottom), y_bottom); + win->Line(part->RightAtY(y_top), y_top, part->RightAtY(y_bottom), y_bottom); + } +} + +#endif // !GRAPHICS_DISABLED + +// Return the ColumnSpanningType that best explains the columns overlapped +// by the given coords(left,right,y), with the given margins. +// Also return the first and last column index touched by the coords and +// the leftmost spanned column. +// Column indices are 2n + 1 for real columns (0 based) and even values +// represent the gaps in between columns, with 0 being left of the leftmost. +// resolution refers to the ppi resolution of the image. +ColumnSpanningType ColPartitionSet::SpanningType(int resolution, + int left, int right, + int height, int y, + int left_margin, + int right_margin, + int* first_col, + int* last_col, + int* first_spanned_col) { + *first_col = -1; + *last_col = -1; + *first_spanned_col = -1; + int margin_columns = 0; + ColPartition_IT it(&parts_); + int col_index = 1; + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), col_index += 2) { + ColPartition* part = it.data(); + if (part->ColumnContains(left, y) || + (it.at_first() && part->ColumnContains(left + height, y))) { + // In the default case, first_col is set, but columns_spanned remains + // zero, so first_col will get reset in the first column genuinely + // spanned, but we can tell the difference from a noise partition + // that touches no column. + *first_col = col_index; + if (part->ColumnContains(right, y) || + (it.at_last() && part->ColumnContains(right - height, y))) { + // Both within a single column. + *last_col = col_index; + return CST_FLOWING; + } + if (left_margin <= part->LeftAtY(y)) { + // It completely spans this column. + *first_spanned_col = col_index; + margin_columns = 1; + } + } else if (part->ColumnContains(right, y) || + (it.at_last() && part->ColumnContains(right - height, y))) { + if (*first_col < 0) { + // It started in-between. + *first_col = col_index - 1; + } + if (right_margin >= part->RightAtY(y)) { + // It completely spans this column. + if (margin_columns == 0) + *first_spanned_col = col_index; + ++margin_columns; + } + *last_col = col_index; + break; + } else if (left < part->LeftAtY(y) && right > part->RightAtY(y)) { + // Neither left nor right are contained within, so it spans this + // column. + if (*first_col < 0) { + // It started in between the previous column and the current column. + *first_col = col_index - 1; + } + if (margin_columns == 0) + *first_spanned_col = col_index; + *last_col = col_index; + } else if (right < part->LeftAtY(y)) { + // We have gone past the end. + *last_col = col_index - 1; + if (*first_col < 0) { + // It must lie completely between columns =>noise. + *first_col = col_index - 1; + } + break; + } + } + if (*first_col < 0) + *first_col = col_index - 1; // The last in-between. + if (*last_col < 0) + *last_col = col_index - 1; // The last in-between. + ASSERT_HOST(*first_col >= 0 && *last_col >= 0); + ASSERT_HOST(*first_col <= *last_col); + if (*first_col == *last_col && right - left < kMinColumnWidth * resolution) { + // Neither end was in a column, and it didn't span any, so it lies + // entirely between columns, therefore noise. + return CST_NOISE; + } else if (margin_columns <= 1) { + // An exception for headings that stick outside of single-column text. + if (margin_columns == 1 && parts_.singleton()) { + return CST_HEADING; + } + // It is a pullout, as left and right were not in the same column, but + // it doesn't go to the edge of its start and end. + return CST_PULLOUT; + } + // Its margins went to the edges of first and last columns => heading. + return CST_HEADING; +} + +// The column_set has changed. Close down all in-progress WorkingPartSets in +// columns that do not match and start new ones for the new columns in this. +// As ColPartitions are turned into BLOCKs, the used ones are put in +// used_parts, as they still need to be referenced in the grid. +void ColPartitionSet::ChangeWorkColumns(const ICOORD& bleft, + const ICOORD& tright, + int resolution, + ColPartition_LIST* used_parts, + WorkingPartSet_LIST* working_set_list) { + // Move the input list to a temporary location so we can delete its elements + // as we add them to the output working_set. + WorkingPartSet_LIST work_src; + WorkingPartSet_IT src_it(&work_src); + src_it.add_list_after(working_set_list); + src_it.move_to_first(); + WorkingPartSet_IT dest_it(working_set_list); + // Completed blocks and to_blocks are accumulated and given to the first new + // one whenever we keep a column, or at the end. + BLOCK_LIST completed_blocks; + TO_BLOCK_LIST to_blocks; + WorkingPartSet* first_new_set = nullptr; + WorkingPartSet* working_set = nullptr; + ColPartition_IT col_it(&parts_); + for (col_it.mark_cycle_pt(); !col_it.cycled_list(); col_it.forward()) { + ColPartition* column = col_it.data(); + // Any existing column to the left of column is completed. + while (!src_it.empty() && + ((working_set = src_it.data())->column() == nullptr || + working_set->column()->right_key() <= column->left_key())) { + src_it.extract(); + working_set->ExtractCompletedBlocks(bleft, tright, resolution, + used_parts, &completed_blocks, + &to_blocks); + delete working_set; + src_it.forward(); + } + // Make a new between-column WorkingSet for before the current column. + working_set = new WorkingPartSet(nullptr); + dest_it.add_after_then_move(working_set); + if (first_new_set == nullptr) + first_new_set = working_set; + // A matching column gets to stay, and first_new_set gets all the + // completed_sets. + working_set = src_it.empty() ? nullptr : src_it.data(); + if (working_set != nullptr && + working_set->column()->MatchingColumns(*column)) { + working_set->set_column(column); + dest_it.add_after_then_move(src_it.extract()); + src_it.forward(); + first_new_set->InsertCompletedBlocks(&completed_blocks, &to_blocks); + first_new_set = nullptr; + } else { + // Just make a new working set for the current column. + working_set = new WorkingPartSet(column); + dest_it.add_after_then_move(working_set); + } + } + // Complete any remaining src working sets. + while (!src_it.empty()) { + working_set = src_it.extract(); + working_set->ExtractCompletedBlocks(bleft, tright, resolution, + used_parts, &completed_blocks, + &to_blocks); + delete working_set; + src_it.forward(); + } + // Make a new between-column WorkingSet for after the last column. + working_set = new WorkingPartSet(nullptr); + dest_it.add_after_then_move(working_set); + if (first_new_set == nullptr) + first_new_set = working_set; + // The first_new_set now gets any accumulated completed_parts/blocks. + first_new_set->InsertCompletedBlocks(&completed_blocks, &to_blocks); +} + +// Accumulate the widths and gaps into the given variables. +void ColPartitionSet::AccumulateColumnWidthsAndGaps(int* total_width, + int* width_samples, + int* total_gap, + int* gap_samples) { + ColPartition_IT it(&parts_); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + ColPartition* part = it.data(); + *total_width += part->ColumnWidth(); + ++*width_samples; + if (!it.at_last()) { + ColPartition* next_part = it.data_relative(1); + int part_left = part->right_key(); + int part_right = next_part->left_key(); + int gap = part->KeyWidth(part_left, part_right); + *total_gap += gap; + ++*gap_samples; + } + } +} + +// Provide debug output for this ColPartitionSet and all the ColPartitions. +void ColPartitionSet::Print() { + ColPartition_IT it(&parts_); + tprintf("Partition set of %d parts, %d good, coverage=%d+%d" + " (%d,%d)->(%d,%d)\n", + it.length(), good_column_count_, good_coverage_, bad_coverage_, + bounding_box_.left(), bounding_box_.bottom(), + bounding_box_.right(), bounding_box_.top()); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + ColPartition* part = it.data(); + part->Print(); + } +} + +// PRIVATE CODE. + +// Add the given partition to the list in the appropriate place. +void ColPartitionSet::AddPartition(ColPartition* new_part, + ColPartition_IT* it) { + AddPartitionCoverageAndBox(*new_part); + int new_right = new_part->right_key(); + if (it->data()->left_key() >= new_right) + it->add_before_stay_put(new_part); + else + it->add_after_stay_put(new_part); +} + +// Compute the coverage and good column count. Coverage is the amount of the +// width of the page (in pixels) that is covered by ColPartitions, which are +// used to provide candidate column layouts. +// Coverage is split into good and bad. Good coverage is provided by +// ColPartitions of a frequent width (according to the callback function +// provided by TabFinder::WidthCB, which accesses stored statistics on the +// widths of ColPartitions) and bad coverage is provided by all other +// ColPartitions, even if they have tab vectors at both sides. Thus: +// |-----------------------------------------------------------------| +// | Double width heading | +// |-----------------------------------------------------------------| +// |-------------------------------| |-------------------------------| +// | Common width ColParition | | Common width ColPartition | +// |-------------------------------| |-------------------------------| +// the layout with two common-width columns has better coverage than the +// double width heading, because the coverage is "good," even though less in +// total coverage than the heading, because the heading coverage is "bad." +void ColPartitionSet::ComputeCoverage() { + // Count the number of good columns and sum their width. + ColPartition_IT it(&parts_); + good_column_count_ = 0; + good_coverage_ = 0; + bad_coverage_ = 0; + bounding_box_ = TBOX(); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + ColPartition* part = it.data(); + AddPartitionCoverageAndBox(*part); + } +} + +// Adds the coverage, column count and box for a single partition, +// without adding it to the list. (Helper factored from ComputeCoverage.) +void ColPartitionSet::AddPartitionCoverageAndBox(const ColPartition& part) { + bounding_box_ += part.bounding_box(); + int coverage = part.ColumnWidth(); + if (part.good_width()) { + good_coverage_ += coverage; + good_column_count_ += 2; + } else { + if (part.blob_type() < BRT_UNKNOWN) + coverage /= 2; + if (part.good_column()) + ++good_column_count_; + bad_coverage_ += coverage; + } +} + +} // namespace tesseract. diff --git a/tesseract/src/textord/colpartitionset.h b/tesseract/src/textord/colpartitionset.h new file mode 100644 index 00000000..57b61b34 --- /dev/null +++ b/tesseract/src/textord/colpartitionset.h @@ -0,0 +1,171 @@ +/////////////////////////////////////////////////////////////////////// +// File: colpartitionset.h +// Description: Class to hold a list of ColPartitions of the page that +// correspond roughly to columns. +// Author: Ray Smith +// +// (C) Copyright 2008, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_TEXTORD_COLPARTITIONSET_H_ +#define TESSERACT_TEXTORD_COLPARTITIONSET_H_ + +#include "colpartition.h" // For ColPartition_LIST. +#include "genericvector.h" // For GenericVector. +#include "rect.h" // For TBOX. +#include "tabvector.h" // For BLOBNBOX_CLIST. + +namespace tesseract { + +class WorkingPartSet_LIST; +class ColSegment_LIST; +class ColPartitionSet; +using PartSetVector = GenericVector<ColPartitionSet*>; + +// ColPartitionSet is a class that holds a list of ColPartitions. +// Its main use is in holding a candidate partitioning of the width of the +// image into columns, where each member ColPartition is a single column. +// ColPartitionSets are used in building the column layout of a page. +class ColPartitionSet : public ELIST_LINK { + public: + ColPartitionSet() = default; + explicit ColPartitionSet(ColPartition_LIST* partitions); + explicit ColPartitionSet(ColPartition* partition); + + ~ColPartitionSet() = default; + + // Simple accessors. + const TBOX& bounding_box() const { + return bounding_box_; + } + bool Empty() const { + return parts_.empty(); + } + int ColumnCount() const { + return parts_.length(); + } + + // Returns the number of columns of good width. + int GoodColumnCount() const; + + // Return an element of the parts_ list from its index. + ColPartition* GetColumnByIndex(int index); + + // Return the ColPartition that contains the given coords, if any, else nullptr. + ColPartition* ColumnContaining(int x, int y); + + // Return the bounding boxes of columns at the given y-range + void GetColumnBoxes(int y_bottom, int y_top, ColSegment_LIST *segments); + + // Extract all the parts from the list, relinquishing ownership. + void RelinquishParts(); + + // Attempt to improve this by adding partitions or expanding partitions. + void ImproveColumnCandidate(WidthCallback cb, PartSetVector* src_sets); + + // If this set is good enough to represent a new partitioning into columns, + // add it to the vector of sets, otherwise delete it. + void AddToColumnSetsIfUnique(PartSetVector* column_sets, WidthCallback cb); + + // Return true if the partitions in other are all compatible with the columns + // in this. + bool CompatibleColumns(bool debug, ColPartitionSet* other, WidthCallback cb); + + // Returns the total width of all blobs in the part_set that do not lie + // within an approved column. Used as a cost measure for using this + // column set over another that might be compatible. + int UnmatchedWidth(ColPartitionSet* part_set); + + // Return true if this ColPartitionSet makes a legal column candidate by + // having legal individual partitions and non-overlapping adjacent pairs. + bool LegalColumnCandidate(); + + // Return a copy of this. If good_only will only copy the Good ColPartitions. + ColPartitionSet* Copy(bool good_only); + + // Display the edges of the columns at the given y coords. + void DisplayColumnEdges(int y_bottom, int y_top, ScrollView* win); + + // Return the ColumnSpanningType that best explains the columns overlapped + // by the given coords(left,right,y), with the given margins. + // Also return the first and last column index touched by the coords and + // the leftmost spanned column. + // Column indices are 2n + 1 for real columns (0 based) and even values + // represent the gaps in between columns, with 0 being left of the leftmost. + // resolution refers to the ppi resolution of the image. It may be 0 if only + // the first_col and last_col are required. + ColumnSpanningType SpanningType(int resolution, + int left, int right, int height, int y, + int left_margin, int right_margin, + int* first_col, int* last_col, + int* first_spanned_col); + + // The column_set has changed. Close down all in-progress WorkingPartSets in + // columns that do not match and start new ones for the new columns in this. + // As ColPartitions are turned into BLOCKs, the used ones are put in + // used_parts, as they still need to be referenced in the grid. + void ChangeWorkColumns(const ICOORD& bleft, const ICOORD& tright, + int resolution, ColPartition_LIST* used_parts, + WorkingPartSet_LIST* working_set); + + // Accumulate the widths and gaps into the given variables. + void AccumulateColumnWidthsAndGaps(int* total_width, int* width_samples, + int* total_gap, int* gap_samples); + + // Provide debug output for this ColPartitionSet and all the ColPartitions. + void Print(); + + private: + // Add the given partition to the list in the appropriate place. + void AddPartition(ColPartition* new_part, ColPartition_IT* it); + + // Compute the coverage and good column count. Coverage is the amount of the + // width of the page (in pixels) that is covered by ColPartitions, which are + // used to provide candidate column layouts. + // Coverage is split into good and bad. Good coverage is provided by + // ColPartitions of a frequent width (according to the callback function + // provided by TabFinder::WidthCB, which accesses stored statistics on the + // widths of ColPartitions) and bad coverage is provided by all other + // ColPartitions, even if they have tab vectors at both sides. Thus: + // |-----------------------------------------------------------------| + // | Double width heading | + // |-----------------------------------------------------------------| + // |-------------------------------| |-------------------------------| + // | Common width ColParition | | Common width ColPartition | + // |-------------------------------| |-------------------------------| + // the layout with two common-width columns has better coverage than the + // double width heading, because the coverage is "good," even though less in + // total coverage than the heading, because the heading coverage is "bad." + void ComputeCoverage(); + + // Adds the coverage, column count and box for a single partition, + // without adding it to the list. (Helper factored from ComputeCoverage.) + void AddPartitionCoverageAndBox(const ColPartition& part); + + // The partitions in this column candidate. + ColPartition_LIST parts_; + // The number of partitions that have a frequent column width. + int good_column_count_; + // Total width of all the good ColPartitions. + int good_coverage_; + // Total width of all the bad ColPartitions. + int bad_coverage_; + // Bounding box of all partitions in the set. + TBOX bounding_box_; +}; + +ELISTIZEH(ColPartitionSet) + +} // namespace tesseract. + +#endif // TESSERACT_TEXTORD_COLPARTITION_H_ diff --git a/tesseract/src/textord/devanagari_processing.cpp b/tesseract/src/textord/devanagari_processing.cpp new file mode 100644 index 00000000..2ea0d942 --- /dev/null +++ b/tesseract/src/textord/devanagari_processing.cpp @@ -0,0 +1,502 @@ +/********************************************************************** + * File: devanagari_processing.cpp + * Description: Methods to process images containing devanagari symbols, + * prior to classification. + * Author: Shobhit Saxena + * Created: Mon Nov 17 20:26:01 IST 2008 + * + * (C) Copyright 2008, Google Inc. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#include "devanagari_processing.h" + +#include "debugpixa.h" +#include "statistc.h" +#include "tordmain.h" + +#include "allheaders.h" + +namespace tesseract { + +// Flags controlling the debugging information for shiro-rekha splitting +// strategies. +INT_VAR(devanagari_split_debuglevel, 0, + "Debug level for split shiro-rekha process."); + +BOOL_VAR(devanagari_split_debugimage, 0, + "Whether to create a debug image for split shiro-rekha process."); + +ShiroRekhaSplitter::ShiroRekhaSplitter() { + orig_pix_ = nullptr; + segmentation_block_list_ = nullptr; + splitted_image_ = nullptr; + global_xheight_ = kUnspecifiedXheight; + perform_close_ = false; + debug_image_ = nullptr; + pageseg_split_strategy_ = NO_SPLIT; + ocr_split_strategy_ = NO_SPLIT; +} + +ShiroRekhaSplitter::~ShiroRekhaSplitter() { + Clear(); +} + +void ShiroRekhaSplitter::Clear() { + pixDestroy(&orig_pix_); + pixDestroy(&splitted_image_); + pageseg_split_strategy_ = NO_SPLIT; + ocr_split_strategy_ = NO_SPLIT; + pixDestroy(&debug_image_); + segmentation_block_list_ = nullptr; + global_xheight_ = kUnspecifiedXheight; + perform_close_ = false; +} + +// On setting the input image, a clone of it is owned by this class. +void ShiroRekhaSplitter::set_orig_pix(Pix* pix) { + if (orig_pix_) { + pixDestroy(&orig_pix_); + } + orig_pix_ = pixClone(pix); +} + +// Top-level method to perform splitting based on current settings. +// Returns true if a split was actually performed. +// split_for_pageseg should be true if the splitting is being done prior to +// page segmentation. This mode uses the flag +// pageseg_devanagari_split_strategy to determine the splitting strategy. +bool ShiroRekhaSplitter::Split(bool split_for_pageseg, DebugPixa* pixa_debug) { + SplitStrategy split_strategy = split_for_pageseg ? pageseg_split_strategy_ : + ocr_split_strategy_; + if (split_strategy == NO_SPLIT) { + return false; // Nothing to do. + } + ASSERT_HOST(split_strategy == MINIMAL_SPLIT || + split_strategy == MAXIMAL_SPLIT); + ASSERT_HOST(orig_pix_); + if (devanagari_split_debuglevel > 0) { + tprintf("Splitting shiro-rekha ...\n"); + tprintf("Split strategy = %s\n", + split_strategy == MINIMAL_SPLIT ? "Minimal" : "Maximal"); + tprintf("Initial pageseg available = %s\n", + segmentation_block_list_ ? "yes" : "no"); + } + // Create a copy of original image to store the splitting output. + pixDestroy(&splitted_image_); + splitted_image_ = pixCopy(nullptr, orig_pix_); + + // Initialize debug image if required. + if (devanagari_split_debugimage) { + pixDestroy(&debug_image_); + debug_image_ = pixConvertTo32(orig_pix_); + } + + // Determine all connected components in the input image. A close operation + // may be required prior to this, depending on the current settings. + Pix* pix_for_ccs = pixClone(orig_pix_); + if (perform_close_ && global_xheight_ != kUnspecifiedXheight && + !segmentation_block_list_) { + if (devanagari_split_debuglevel > 0) { + tprintf("Performing a global close operation..\n"); + } + // A global measure is available for xheight, but no local information + // exists. + pixDestroy(&pix_for_ccs); + pix_for_ccs = pixCopy(nullptr, orig_pix_); + PerformClose(pix_for_ccs, global_xheight_); + } + Pixa* ccs; + Boxa* tmp_boxa = pixConnComp(pix_for_ccs, &ccs, 8); + boxaDestroy(&tmp_boxa); + pixDestroy(&pix_for_ccs); + + // Iterate over all connected components. Get their bounding boxes and clip + // out the image regions corresponding to these boxes from the original image. + // Conditionally run splitting on each of them. + Boxa* regions_to_clear = boxaCreate(0); + int num_ccs = 0; + if (ccs != nullptr) num_ccs = pixaGetCount(ccs); + for (int i = 0; i < num_ccs; ++i) { + Box* box = ccs->boxa->box[i]; + Pix* word_pix = pixClipRectangle(orig_pix_, box, nullptr); + ASSERT_HOST(word_pix); + int xheight = GetXheightForCC(box); + if (xheight == kUnspecifiedXheight && segmentation_block_list_ && + devanagari_split_debugimage) { + pixRenderBoxArb(debug_image_, box, 1, 255, 0, 0); + } + // If some xheight measure is available, attempt to pre-eliminate small + // blobs from the shiro-rekha process. This is primarily to save the CCs + // corresponding to punctuation marks/small dots etc which are part of + // larger graphemes. + if (xheight == kUnspecifiedXheight || + (box->w > xheight / 3 && box->h > xheight / 2)) { + SplitWordShiroRekha(split_strategy, word_pix, xheight, + box->x, box->y, regions_to_clear); + } else if (devanagari_split_debuglevel > 0) { + tprintf("CC dropped from splitting: %d,%d (%d, %d)\n", + box->x, box->y, box->w, box->h); + } + pixDestroy(&word_pix); + } + // Actually clear the boxes now. + for (int i = 0; i < boxaGetCount(regions_to_clear); ++i) { + Box* box = boxaGetBox(regions_to_clear, i, L_CLONE); + pixClearInRect(splitted_image_, box); + boxDestroy(&box); + } + boxaDestroy(®ions_to_clear); + pixaDestroy(&ccs); + if (devanagari_split_debugimage && pixa_debug != nullptr) { + pixa_debug->AddPix(debug_image_, + split_for_pageseg ? "pageseg_split" : "ocr_split"); + } + return true; +} + +// Method to perform a close operation on the input image. The xheight +// estimate decides the size of sel used. +void ShiroRekhaSplitter::PerformClose(Pix* pix, int xheight_estimate) { + pixCloseBrick(pix, pix, xheight_estimate / 8, xheight_estimate / 3); +} + +// This method resolves the cc bbox to a particular row and returns the row's +// xheight. +int ShiroRekhaSplitter::GetXheightForCC(Box* cc_bbox) { + if (!segmentation_block_list_) { + return global_xheight_; + } + // Compute the box coordinates in Tesseract's coordinate system. + TBOX bbox(cc_bbox->x, + pixGetHeight(orig_pix_) - cc_bbox->y - cc_bbox->h - 1, + cc_bbox->x + cc_bbox->w, + pixGetHeight(orig_pix_) - cc_bbox->y - 1); + // Iterate over all blocks. + BLOCK_IT block_it(segmentation_block_list_); + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { + BLOCK* block = block_it.data(); + // Iterate over all rows in the block. + ROW_IT row_it(block->row_list()); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + ROW* row = row_it.data(); + if (!row->bounding_box().major_overlap(bbox)) { + continue; + } + // Row could be skewed, warped, etc. Use the position of the box to + // determine the baseline position of the row for that x-coordinate. + // Create a square TBOX whose baseline's mid-point lies at this point + // and side is row's xheight. Take the overlap of this box with the input + // box and check if it is a 'major overlap'. If so, this box lies in this + // row. In that case, return the xheight for this row. + float box_middle = 0.5 * (bbox.left() + bbox.right()); + int baseline = static_cast<int>(row->base_line(box_middle) + 0.5); + TBOX test_box(box_middle - row->x_height() / 2, + baseline, + box_middle + row->x_height() / 2, + static_cast<int>(baseline + row->x_height())); + // Compute overlap. If it is is a major overlap, this is the right row. + if (bbox.major_overlap(test_box)) { + return row->x_height(); + } + } + } + // No row found for this bbox. + return kUnspecifiedXheight; +} + +// Returns a list of regions (boxes) which should be cleared in the original +// image so as to perform shiro-rekha splitting. Pix is assumed to carry one +// (or less) word only. Xheight measure could be the global estimate, the row +// estimate, or unspecified. If unspecified, over splitting may occur, since a +// conservative estimate of stroke width along with an associated multiplier +// is used in its place. It is advisable to have a specified xheight when +// splitting for classification/training. +// A vertical projection histogram of all the on-pixels in the input pix is +// computed. The maxima of this histogram is regarded as an approximate location +// of the shiro-rekha. By descending on the maxima's peak on both sides, +// stroke width of shiro-rekha is estimated. +// A horizontal projection histogram is computed for a sub-image of the input +// image, which extends from just below the shiro-rekha down to a certain +// leeway. The leeway depends on the input xheight, if provided, else a +// conservative multiplier on approximate stroke width is used (which may lead +// to over-splitting). +void ShiroRekhaSplitter::SplitWordShiroRekha(SplitStrategy split_strategy, + Pix* pix, + int xheight, + int word_left, + int word_top, + Boxa* regions_to_clear) { + if (split_strategy == NO_SPLIT) { + return; + } + int width = pixGetWidth(pix); + int height = pixGetHeight(pix); + // Statistically determine the yextents of the shiro-rekha. + int shirorekha_top, shirorekha_bottom, shirorekha_ylevel; + GetShiroRekhaYExtents(pix, &shirorekha_top, &shirorekha_bottom, + &shirorekha_ylevel); + // Since the shiro rekha is also a stroke, its width is equal to the stroke + // width. + int stroke_width = shirorekha_bottom - shirorekha_top + 1; + + // Some safeguards to protect CCs we do not want to be split. + // These are particularly useful when the word wasn't eliminated earlier + // because xheight information was unavailable. + if (shirorekha_ylevel > height / 2) { + // Shirorekha shouldn't be in the bottom half of the word. + if (devanagari_split_debuglevel > 0) { + tprintf("Skipping splitting CC at (%d, %d): shirorekha in lower half..\n", + word_left, word_top); + } + return; + } + if (stroke_width > height / 3) { + // Even the boldest of fonts shouldn't do this. + if (devanagari_split_debuglevel > 0) { + tprintf("Skipping splitting CC at (%d, %d): stroke width too huge..\n", + word_left, word_top); + } + return; + } + + // Clear the ascender and descender regions of the word. + // Obtain a vertical projection histogram for the resulting image. + Box* box_to_clear = boxCreate(0, shirorekha_top - stroke_width / 3, + width, 5 * stroke_width / 3); + Pix* word_in_xheight = pixCopy(nullptr, pix); + pixClearInRect(word_in_xheight, box_to_clear); + // Also clear any pixels which are below shirorekha_bottom + some leeway. + // The leeway is set to xheight if the information is available, else it is a + // multiplier applied to the stroke width. + int leeway_to_keep = stroke_width * 3; + if (xheight != kUnspecifiedXheight) { + // This is because the xheight-region typically includes the shiro-rekha + // inside it, i.e., the top of the xheight range corresponds to the top of + // shiro-rekha. + leeway_to_keep = xheight - stroke_width; + } + box_to_clear->y = shirorekha_bottom + leeway_to_keep; + box_to_clear->h = height - box_to_clear->y; + pixClearInRect(word_in_xheight, box_to_clear); + boxDestroy(&box_to_clear); + + PixelHistogram vert_hist; + vert_hist.ConstructVerticalCountHist(word_in_xheight); + pixDestroy(&word_in_xheight); + + // If the number of black pixel in any column of the image is less than a + // fraction of the stroke width, treat it as noise / a stray mark. Perform + // these changes inside the vert_hist data itself, as that is used later on as + // a bit vector for the final split decision at every column. + for (int i = 0; i < width; ++i) { + if (vert_hist.hist()[i] <= stroke_width / 4) + vert_hist.hist()[i] = 0; + else + vert_hist.hist()[i] = 1; + } + // In order to split the line at any point, we make sure that the width of the + // gap is at least half the stroke width. + int i = 0; + int cur_component_width = 0; + while (i < width) { + if (!vert_hist.hist()[i]) { + int j = 0; + while (i + j < width && !vert_hist.hist()[i+j]) + ++j; + if (j >= stroke_width / 2 && cur_component_width >= stroke_width / 2) { + // Perform a shiro-rekha split. The intervening region lies from i to + // i+j-1. + // A minimal single-pixel split makes the estimation of intra- and + // inter-word spacing easier during page layout analysis, + // whereas a maximal split may be needed for OCR, depending on + // how the engine was trained. + bool minimal_split = (split_strategy == MINIMAL_SPLIT); + int split_width = minimal_split ? 1 : j; + int split_left = minimal_split ? i + (j / 2) - (split_width / 2) : i; + if (!minimal_split || (i != 0 && i + j != width)) { + Box* box_to_clear = + boxCreate(word_left + split_left, + word_top + shirorekha_top - stroke_width / 3, + split_width, + 5 * stroke_width / 3); + if (box_to_clear) { + boxaAddBox(regions_to_clear, box_to_clear, L_CLONE); + // Mark this in the debug image if needed. + if (devanagari_split_debugimage) { + pixRenderBoxArb(debug_image_, box_to_clear, 1, 128, 255, 128); + } + boxDestroy(&box_to_clear); + cur_component_width = 0; + } + } + } + i += j; + } else { + ++i; + ++cur_component_width; + } + } +} + +// Refreshes the words in the segmentation block list by using blobs in the +// input block list. +// The segmentation block list must be set. +void ShiroRekhaSplitter::RefreshSegmentationWithNewBlobs( + C_BLOB_LIST* new_blobs) { + // The segmentation block list must have been specified. + ASSERT_HOST(segmentation_block_list_); + if (devanagari_split_debuglevel > 0) { + tprintf("Before refreshing blobs:\n"); + PrintSegmentationStats(segmentation_block_list_); + tprintf("New Blobs found: %d\n", new_blobs->length()); + } + + C_BLOB_LIST not_found_blobs; + RefreshWordBlobsFromNewBlobs(segmentation_block_list_, + new_blobs, + ((devanagari_split_debugimage && debug_image_) ? + ¬_found_blobs : nullptr)); + + if (devanagari_split_debuglevel > 0) { + tprintf("After refreshing blobs:\n"); + PrintSegmentationStats(segmentation_block_list_); + } + if (devanagari_split_debugimage && debug_image_) { + // Plot out the original blobs for which no match was found in the new + // all_blobs list. + C_BLOB_IT not_found_it(¬_found_blobs); + for (not_found_it.mark_cycle_pt(); !not_found_it.cycled_list(); + not_found_it.forward()) { + C_BLOB* not_found = not_found_it.data(); + TBOX not_found_box = not_found->bounding_box(); + Box* box_to_plot = GetBoxForTBOX(not_found_box); + pixRenderBoxArb(debug_image_, box_to_plot, 1, 255, 0, 255); + boxDestroy(&box_to_plot); + } + + // Plot out the blobs unused from all blobs. + C_BLOB_IT all_blobs_it(new_blobs); + for (all_blobs_it.mark_cycle_pt(); !all_blobs_it.cycled_list(); + all_blobs_it.forward()) { + C_BLOB* a_blob = all_blobs_it.data(); + Box* box_to_plot = GetBoxForTBOX(a_blob->bounding_box()); + pixRenderBoxArb(debug_image_, box_to_plot, 3, 0, 127, 0); + boxDestroy(&box_to_plot); + } + } +} + +// Returns a new box object for the corresponding TBOX, based on the original +// image's coordinate system. +Box* ShiroRekhaSplitter::GetBoxForTBOX(const TBOX& tbox) const { + return boxCreate(tbox.left(), pixGetHeight(orig_pix_) - tbox.top() - 1, + tbox.width(), tbox.height()); +} + +// This method returns the computed mode-height of blobs in the pix. +// It also prunes very small blobs from calculation. +int ShiroRekhaSplitter::GetModeHeight(Pix* pix) { + Boxa* boxa = pixConnComp(pix, nullptr, 8); + STATS heights(0, pixGetHeight(pix)); + heights.clear(); + for (int i = 0; i < boxaGetCount(boxa); ++i) { + Box* box = boxaGetBox(boxa, i, L_CLONE); + if (box->h >= 3 || box->w >= 3) { + heights.add(box->h, 1); + } + boxDestroy(&box); + } + boxaDestroy(&boxa); + return heights.mode(); +} + +// This method returns y-extents of the shiro-rekha computed from the input +// word image. +void ShiroRekhaSplitter::GetShiroRekhaYExtents(Pix* word_pix, + int* shirorekha_top, + int* shirorekha_bottom, + int* shirorekha_ylevel) { + // Compute a histogram from projecting the word on a vertical line. + PixelHistogram hist_horiz; + hist_horiz.ConstructHorizontalCountHist(word_pix); + // Get the ylevel where the top-line exists. This is basically the global + // maxima in the horizontal histogram. + int topline_onpixel_count = 0; + int topline_ylevel = hist_horiz.GetHistogramMaximum(&topline_onpixel_count); + + // Get the upper and lower extents of the shiro rekha. + int thresh = (topline_onpixel_count * 70) / 100; + int ulimit = topline_ylevel; + int llimit = topline_ylevel; + while (ulimit > 0 && hist_horiz.hist()[ulimit] >= thresh) + --ulimit; + while (llimit < pixGetHeight(word_pix) && hist_horiz.hist()[llimit] >= thresh) + ++llimit; + + if (shirorekha_top) *shirorekha_top = ulimit; + if (shirorekha_bottom) *shirorekha_bottom = llimit; + if (shirorekha_ylevel) *shirorekha_ylevel = topline_ylevel; +} + +// This method returns the global-maxima for the histogram. The frequency of +// the global maxima is returned in count, if specified. +int PixelHistogram::GetHistogramMaximum(int* count) const { + int best_value = 0; + for (int i = 0; i < length_; ++i) { + if (hist_[i] > hist_[best_value]) { + best_value = i; + } + } + if (count) { + *count = hist_[best_value]; + } + return best_value; +} + +// Methods to construct histograms from images. +void PixelHistogram::ConstructVerticalCountHist(Pix* pix) { + Clear(); + int width = pixGetWidth(pix); + int height = pixGetHeight(pix); + hist_ = new int[width]; + length_ = width; + int wpl = pixGetWpl(pix); + l_uint32 *data = pixGetData(pix); + for (int i = 0; i < width; ++i) + hist_[i] = 0; + for (int i = 0; i < height; ++i) { + l_uint32 *line = data + i * wpl; + for (int j = 0; j < width; ++j) + if (GET_DATA_BIT(line, j)) + ++(hist_[j]); + } +} + +void PixelHistogram::ConstructHorizontalCountHist(Pix* pix) { + Clear(); + Numa* counts = pixCountPixelsByRow(pix, nullptr); + length_ = numaGetCount(counts); + hist_ = new int[length_]; + for (int i = 0; i < length_; ++i) { + l_int32 val = 0; + numaGetIValue(counts, i, &val); + hist_[i] = val; + } + numaDestroy(&counts); +} + +} // namespace tesseract. diff --git a/tesseract/src/textord/devanagari_processing.h b/tesseract/src/textord/devanagari_processing.h new file mode 100644 index 00000000..cd0bfeb6 --- /dev/null +++ b/tesseract/src/textord/devanagari_processing.h @@ -0,0 +1,210 @@ +// Copyright 2008 Google Inc. All Rights Reserved. +// Author: shobhitsaxena@google.com (Shobhit Saxena) +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_ +#define TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_ + +#include "allheaders.h" +#include "ocrblock.h" +#include "params.h" + +struct Pix; +struct Box; +struct Boxa; + +namespace tesseract { + +extern +INT_VAR_H(devanagari_split_debuglevel, 0, + "Debug level for split shiro-rekha process."); + +extern +BOOL_VAR_H(devanagari_split_debugimage, 0, + "Whether to create a debug image for split shiro-rekha process."); + +class TBOX; +class DebugPixa; + +class PixelHistogram { + public: + PixelHistogram() { + hist_ = nullptr; + length_ = 0; + } + + ~PixelHistogram() { + Clear(); + } + + void Clear() { + delete[] hist_; + length_ = 0; + } + + int* hist() const { return hist_; } + + int length() const { + return length_; + } + + // Methods to construct histograms from images. These clear any existing data. + void ConstructVerticalCountHist(Pix* pix); + void ConstructHorizontalCountHist(Pix* pix); + + // This method returns the global-maxima for the histogram. The frequency of + // the global maxima is returned in count, if specified. + int GetHistogramMaximum(int* count) const; + + private: + int* hist_; + int length_; +}; + +class ShiroRekhaSplitter { + public: + enum SplitStrategy { + NO_SPLIT = 0, // No splitting is performed for the phase. + MINIMAL_SPLIT, // Blobs are split minimally. + MAXIMAL_SPLIT // Blobs are split maximally. + }; + + ShiroRekhaSplitter(); + virtual ~ShiroRekhaSplitter(); + + // Top-level method to perform splitting based on current settings. + // Returns true if a split was actually performed. + // If split_for_pageseg is true, the pageseg_split_strategy_ is used for + // splitting. If false, the ocr_split_strategy_ is used. + bool Split(bool split_for_pageseg, DebugPixa* pixa_debug); + + // Clears the memory held by this object. + void Clear(); + + // Refreshes the words in the segmentation block list by using blobs in the + // input blob list. + // The segmentation block list must be set. + void RefreshSegmentationWithNewBlobs(C_BLOB_LIST* new_blobs); + + // Returns true if the split strategies for pageseg and ocr are different. + bool HasDifferentSplitStrategies() const { + return pageseg_split_strategy_ != ocr_split_strategy_; + } + + // This only keeps a copy of the block list pointer. At split call, the list + // object should still be alive. This block list is used as a golden + // segmentation when performing splitting. + void set_segmentation_block_list(BLOCK_LIST* block_list) { + segmentation_block_list_ = block_list; + } + + static const int kUnspecifiedXheight = -1; + + void set_global_xheight(int xheight) { + global_xheight_ = xheight; + } + + void set_perform_close(bool perform) { + perform_close_ = perform; + } + + // Returns the image obtained from shiro-rekha splitting. The returned object + // is owned by this class. Callers may want to clone the returned pix to keep + // it alive beyond the life of ShiroRekhaSplitter object. + Pix* splitted_image() { + return splitted_image_; + } + + // On setting the input image, a clone of it is owned by this class. + void set_orig_pix(Pix* pix); + + // Returns the input image provided to the object. This object is owned by + // this class. Callers may want to clone the returned pix to work with it. + Pix* orig_pix() { + return orig_pix_; + } + + SplitStrategy ocr_split_strategy() const { + return ocr_split_strategy_; + } + + void set_ocr_split_strategy(SplitStrategy strategy) { + ocr_split_strategy_ = strategy; + } + + SplitStrategy pageseg_split_strategy() const { + return pageseg_split_strategy_; + } + + void set_pageseg_split_strategy(SplitStrategy strategy) { + pageseg_split_strategy_ = strategy; + } + + BLOCK_LIST* segmentation_block_list() { + return segmentation_block_list_; + } + + // This method returns the computed mode-height of blobs in the pix. + // It also prunes very small blobs from calculation. Could be used to provide + // a global xheight estimate for images which have the same point-size text. + static int GetModeHeight(Pix* pix); + + private: + // Method to perform a close operation on the input image. The xheight + // estimate decides the size of sel used. + static void PerformClose(Pix* pix, int xheight_estimate); + + // This method resolves the cc bbox to a particular row and returns the row's + // xheight. This uses block_list_ if available, else just returns the + // global_xheight_ estimate currently set in the object. + int GetXheightForCC(Box* cc_bbox); + + // Returns a list of regions (boxes) which should be cleared in the original + // image so as to perform shiro-rekha splitting. Pix is assumed to carry one + // (or less) word only. Xheight measure could be the global estimate, the row + // estimate, or unspecified. If unspecified, over splitting may occur, since a + // conservative estimate of stroke width along with an associated multiplier + // is used in its place. It is advisable to have a specified xheight when + // splitting for classification/training. + void SplitWordShiroRekha(SplitStrategy split_strategy, + Pix* pix, + int xheight, + int word_left, + int word_top, + Boxa* regions_to_clear); + + // Returns a new box object for the corresponding TBOX, based on the original + // image's coordinate system. + Box* GetBoxForTBOX(const TBOX& tbox) const; + + // This method returns y-extents of the shiro-rekha computed from the input + // word image. + static void GetShiroRekhaYExtents(Pix* word_pix, + int* shirorekha_top, + int* shirorekha_bottom, + int* shirorekha_ylevel); + + Pix* orig_pix_; // Just a clone of the input image passed. + Pix* splitted_image_; // Image produced after the last splitting round. The + // object is owned by this class. + SplitStrategy pageseg_split_strategy_; + SplitStrategy ocr_split_strategy_; + Pix* debug_image_; + // This block list is used as a golden segmentation when performing splitting. + BLOCK_LIST* segmentation_block_list_; + int global_xheight_; + bool perform_close_; // Whether a morphological close operation should be + // performed before CCs are run through splitting. +}; + +} // namespace tesseract. + +#endif // TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_ diff --git a/tesseract/src/textord/drawtord.cpp b/tesseract/src/textord/drawtord.cpp new file mode 100644 index 00000000..3e02653a --- /dev/null +++ b/tesseract/src/textord/drawtord.cpp @@ -0,0 +1,423 @@ +/********************************************************************** + * File: drawtord.cpp (Formerly drawto.c) + * Description: Draw things to do with textord. + * Author: Ray Smith + * + * (C) Copyright 1992, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#include "drawtord.h" + +#include "pithsync.h" +#include "topitch.h" + +namespace tesseract { + +#define TO_WIN_XPOS 0 //default window pos +#define TO_WIN_YPOS 0 +#define TO_WIN_NAME "Textord" + //title of window + +BOOL_VAR (textord_show_fixed_cuts, false, +"Draw fixed pitch cell boundaries"); + +ScrollView* to_win = nullptr; + +#ifndef GRAPHICS_DISABLED + +/********************************************************************** + * create_to_win + * + * Create the to window used to show the fit. + **********************************************************************/ + +ScrollView* create_to_win(ICOORD page_tr) { + if (to_win != nullptr) return to_win; + to_win = new ScrollView(TO_WIN_NAME, TO_WIN_XPOS, TO_WIN_YPOS, + page_tr.x() + 1, page_tr.y() + 1, + page_tr.x(), page_tr.y(), true); + return to_win; +} + + +void close_to_win() { + // to_win is leaked, but this enables the user to view the contents. + if (to_win != nullptr) { + to_win->Update(); + } +} + + +/********************************************************************** + * plot_box_list + * + * Draw a list of blobs. + **********************************************************************/ + +void plot_box_list( //make gradients win + ScrollView* win, //window to draw in + BLOBNBOX_LIST *list, //blob list + ScrollView::Color body_colour //colour to draw + ) { + BLOBNBOX_IT it = list; //iterator + + win->Pen(body_colour); + win->Brush(ScrollView::NONE); + for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { + it.data ()->bounding_box ().plot (win); + } +} + + +/********************************************************************** + * plot_to_row + * + * Draw the blobs of a row in a given colour and draw the line fit. + **********************************************************************/ + +void plot_to_row( //draw a row + TO_ROW *row, //row to draw + ScrollView::Color colour, //colour to draw in + FCOORD rotation //rotation for line + ) { + FCOORD plot_pt; //point to plot + //blobs + BLOBNBOX_IT it = row->blob_list (); + float left, right; //end of row + + if (it.empty ()) { + tprintf ("No blobs in row at %g\n", row->parallel_c ()); + return; + } + left = it.data ()->bounding_box ().left (); + it.move_to_last (); + right = it.data ()->bounding_box ().right (); + plot_blob_list (to_win, row->blob_list (), colour, ScrollView::BROWN); + to_win->Pen(colour); + plot_pt = FCOORD (left, row->line_m () * left + row->line_c ()); + plot_pt.rotate (rotation); + to_win->SetCursor(plot_pt.x (), plot_pt.y ()); + plot_pt = FCOORD (right, row->line_m () * right + row->line_c ()); + plot_pt.rotate (rotation); + to_win->DrawTo(plot_pt.x (), plot_pt.y ()); +} + + +/********************************************************************** + * plot_parallel_row + * + * Draw the blobs of a row in a given colour and draw the line fit. + **********************************************************************/ + +void plot_parallel_row( //draw a row + TO_ROW *row, //row to draw + float gradient, //gradients of lines + int32_t left, //edge of block + ScrollView::Color colour, //colour to draw in + FCOORD rotation //rotation for line + ) { + FCOORD plot_pt; //point to plot + //blobs + BLOBNBOX_IT it = row->blob_list (); + auto fleft = static_cast<float>(left); //floating version + float right; //end of row + + // left=it.data()->bounding_box().left(); + it.move_to_last (); + right = it.data ()->bounding_box ().right (); + plot_blob_list (to_win, row->blob_list (), colour, ScrollView::BROWN); + to_win->Pen(colour); + plot_pt = FCOORD (fleft, gradient * left + row->max_y ()); + plot_pt.rotate (rotation); + to_win->SetCursor(plot_pt.x (), plot_pt.y ()); + plot_pt = FCOORD (fleft, gradient * left + row->min_y ()); + plot_pt.rotate (rotation); + to_win->DrawTo(plot_pt.x (), plot_pt.y ()); + plot_pt = FCOORD (fleft, gradient * left + row->parallel_c ()); + plot_pt.rotate (rotation); + to_win->SetCursor(plot_pt.x (), plot_pt.y ()); + plot_pt = FCOORD (right, gradient * right + row->parallel_c ()); + plot_pt.rotate (rotation); + to_win->DrawTo(plot_pt.x (), plot_pt.y ()); +} + + +/********************************************************************** + * draw_occupation + * + * Draw the row occupation with points above the threshold in white + * and points below the threshold in black. + **********************************************************************/ + +void +draw_occupation ( //draw projection +int32_t xleft, //edge of block +int32_t ybottom, //bottom of block +int32_t min_y, //coordinate limits +int32_t max_y, int32_t occupation[], //projection counts +int32_t thresholds[] //for drop out +) { + int32_t line_index; //pixel coord + ScrollView::Color colour; //of histogram + auto fleft = static_cast<float>(xleft); //float version + + colour = ScrollView::WHITE; + to_win->Pen(colour); + to_win->SetCursor(fleft, static_cast<float>(ybottom)); + for (line_index = min_y; line_index <= max_y; line_index++) { + if (occupation[line_index - min_y] < thresholds[line_index - min_y]) { + if (colour != ScrollView::BLUE) { + colour = ScrollView::BLUE; + to_win->Pen(colour); + } + } + else { + if (colour != ScrollView::WHITE) { + colour = ScrollView::WHITE; + to_win->Pen(colour); + } + } + to_win->DrawTo(fleft + occupation[line_index - min_y] / 10.0, static_cast<float>(line_index)); + } + colour=ScrollView::STEEL_BLUE; + to_win->Pen(colour); + to_win->SetCursor(fleft, static_cast<float>(ybottom)); + for (line_index = min_y; line_index <= max_y; line_index++) { + to_win->DrawTo(fleft + thresholds[line_index - min_y] / 10.0, static_cast<float>(line_index)); + } +} + + +/********************************************************************** + * draw_meanlines + * + * Draw the meanlines of the given block in the given colour. + **********************************************************************/ + +void draw_meanlines( //draw a block + TO_BLOCK *block, //block to draw + float gradient, //gradients of lines + int32_t left, //edge of block + ScrollView::Color colour, //colour to draw in + FCOORD rotation //rotation for line + ) { + FCOORD plot_pt; //point to plot + //rows + TO_ROW_IT row_it = block->get_rows (); + TO_ROW *row; //current row + BLOBNBOX_IT blob_it; //blobs + float right; //end of row + to_win->Pen(colour); + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { + row = row_it.data (); + blob_it.set_to_list (row->blob_list ()); + blob_it.move_to_last (); + right = blob_it.data ()->bounding_box ().right (); + plot_pt = + FCOORD (static_cast<float>(left), + gradient * left + row->parallel_c () + row->xheight); + plot_pt.rotate (rotation); + to_win->SetCursor(plot_pt.x (), plot_pt.y ()); + plot_pt = + FCOORD (right, + gradient * right + row->parallel_c () + row->xheight); + plot_pt.rotate (rotation); + to_win->DrawTo (plot_pt.x (), plot_pt.y ()); + } +} + + +/********************************************************************** + * plot_word_decisions + * + * Plot a row with words in different colours and fuzzy spaces + * highlighted. + **********************************************************************/ + +void plot_word_decisions( //draw words + ScrollView* win, //window tro draw in + int16_t pitch, //of block + TO_ROW *row //row to draw + ) { + ScrollView::Color colour = ScrollView::MAGENTA; //current colour + ScrollView::Color rect_colour; //fuzzy colour + int32_t prev_x; //end of prev blob + int16_t blob_count; //blobs in word + BLOBNBOX *blob; //current blob + TBOX blob_box; //bounding box + //iterator + BLOBNBOX_IT blob_it = row->blob_list (); + BLOBNBOX_IT start_it = blob_it;//word start + + rect_colour = ScrollView::BLACK; + prev_x = -INT16_MAX; + blob_count = 0; + for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { + blob = blob_it.data (); + blob_box = blob->bounding_box (); + if (!blob->joined_to_prev () + && blob_box.left () - prev_x > row->max_nonspace) { + if ((blob_box.left () - prev_x >= row->min_space + || blob_box.left () - prev_x > row->space_threshold) + && blob_count > 0) { + if (pitch > 0 && textord_show_fixed_cuts) + plot_fp_cells (win, colour, &start_it, pitch, blob_count, + &row->projection, row->projection_left, + row->projection_right, + row->xheight * textord_projection_scale); + blob_count = 0; + start_it = blob_it; + } + if (colour == ScrollView::MAGENTA) + colour = ScrollView::RED; + else + colour = static_cast<ScrollView::Color>(colour + 1); + if (blob_box.left () - prev_x < row->min_space) { + if (blob_box.left () - prev_x > row->space_threshold) + rect_colour = ScrollView::GOLDENROD; + else + rect_colour = ScrollView::CORAL; + //fill_color_index(win, rect_colour); + win->Brush(rect_colour); + win->Rectangle (prev_x, blob_box.bottom (), + blob_box.left (), blob_box.top ()); + } + } + if (!blob->joined_to_prev()) + prev_x = blob_box.right(); + if (blob->cblob () != nullptr) + blob->cblob ()->plot (win, colour, colour); + if (!blob->joined_to_prev() && blob->cblob() != nullptr) + blob_count++; + } + if (pitch > 0 && textord_show_fixed_cuts && blob_count > 0) + plot_fp_cells (win, colour, &start_it, pitch, blob_count, + &row->projection, row->projection_left, + row->projection_right, + row->xheight * textord_projection_scale); +} + + +/********************************************************************** + * plot_fp_cells + * + * Make a list of fixed pitch cuts and draw them. + **********************************************************************/ + +void plot_fp_cells( //draw words + ScrollView* win, //window tro draw in + ScrollView::Color colour, //colour of lines + BLOBNBOX_IT *blob_it, //blobs + int16_t pitch, //of block + int16_t blob_count, //no of real blobs + STATS *projection, //vertical + int16_t projection_left, //edges //scale factor + int16_t projection_right, + float projection_scale) { + int16_t occupation; //occupied cells + TBOX word_box; //bounding box + FPSEGPT_LIST seg_list; //list of cuts + FPSEGPT_IT seg_it; + FPSEGPT *segpt; //current point + + if (pitsync_linear_version) + check_pitch_sync2 (blob_it, blob_count, pitch, 2, projection, + projection_left, projection_right, + projection_scale, occupation, &seg_list, 0, 0); + else + check_pitch_sync (blob_it, blob_count, pitch, 2, projection, &seg_list); + word_box = blob_it->data ()->bounding_box (); + for (; blob_count > 0; blob_count--) + word_box += box_next (blob_it); + seg_it.set_to_list (&seg_list); + for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) { + segpt = seg_it.data (); + if (segpt->faked) { + colour = ScrollView::WHITE; + win->Pen(colour); } + else { + win->Pen(colour); } + win->Line(segpt->position (), word_box.bottom (),segpt->position (), word_box.top ()); + } +} + + +/********************************************************************** + * plot_fp_cells2 + * + * Make a list of fixed pitch cuts and draw them. + **********************************************************************/ + +void plot_fp_cells2( //draw words + ScrollView* win, //window tro draw in + ScrollView::Color colour, //colour of lines + TO_ROW *row, //for location + FPSEGPT_LIST *seg_list //segments to plot + ) { + TBOX word_box; //bounding box + FPSEGPT_IT seg_it = seg_list; + //blobs in row + BLOBNBOX_IT blob_it = row->blob_list (); + FPSEGPT *segpt; //current point + + word_box = blob_it.data ()->bounding_box (); + for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();) + word_box += box_next (&blob_it); + for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) { + segpt = seg_it.data (); + if (segpt->faked) { + colour = ScrollView::WHITE; + win->Pen(colour); } + else { + win->Pen(colour); } + win->Line(segpt->position (), word_box.bottom (),segpt->position (), word_box.top ()); + } +} + + +/********************************************************************** + * plot_row_cells + * + * Make a list of fixed pitch cuts and draw them. + **********************************************************************/ + +void plot_row_cells( //draw words + ScrollView* win, //window tro draw in + ScrollView::Color colour, //colour of lines + TO_ROW *row, //for location + float xshift, //amount of shift + ICOORDELT_LIST *cells //cells to draw + ) { + TBOX word_box; //bounding box + ICOORDELT_IT cell_it = cells; + //blobs in row + BLOBNBOX_IT blob_it = row->blob_list (); + ICOORDELT *cell; //current cell + + word_box = blob_it.data ()->bounding_box (); + for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();) + word_box += box_next (&blob_it); + win->Pen(colour); + for (cell_it.mark_cycle_pt (); !cell_it.cycled_list (); cell_it.forward ()) { + cell = cell_it.data (); + win->Line(cell->x () + xshift, word_box.bottom (), cell->x () + xshift, word_box.top ()); + } +} + +#endif // !GRAPHICS_DISABLED + +} // namespace tesseract diff --git a/tesseract/src/textord/drawtord.h b/tesseract/src/textord/drawtord.h new file mode 100644 index 00000000..e88c4896 --- /dev/null +++ b/tesseract/src/textord/drawtord.h @@ -0,0 +1,103 @@ +/********************************************************************** + * File: drawtord.h (Formerly drawto.h) + * Description: Draw things to do with textord. + * Author: Ray Smith + * + * (C) Copyright 1992, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef DRAWTORD_H +#define DRAWTORD_H + +#include "params.h" +#include "scrollview.h" +#include "pitsync1.h" +#include "blobbox.h" + +namespace tesseract { + +#define NO_SMD "none" + +extern BOOL_VAR_H (textord_show_fixed_cuts, false, +"Draw fixed pitch cell boundaries"); +extern STRING_VAR_H (to_debugfile, DEBUG_WIN_NAME, "Name of debugfile"); +extern STRING_VAR_H (to_smdfile, NO_SMD, "Name of SMD file"); +extern ScrollView* to_win; +extern FILE *to_debug; +// Creates a static display window for textord, and returns a pointer to it. +ScrollView* create_to_win(ICOORD page_tr); +void close_to_win(); // Destroy the textord window. +void create_todebug_win(); //make gradients win +void plot_box_list( //make gradients win + ScrollView* win, //window to draw in + BLOBNBOX_LIST *list, //blob list + ScrollView::Color body_colour //colour to draw + ); +void plot_to_row( //draw a row + TO_ROW *row, //row to draw + ScrollView::Color colour, //colour to draw in + FCOORD rotation //rotation for line + ); +void plot_parallel_row( //draw a row + TO_ROW *row, //row to draw + float gradient, //gradients of lines + int32_t left, //edge of block + ScrollView::Color colour, //colour to draw in + FCOORD rotation //rotation for line + ); +void draw_occupation ( //draw projection +int32_t xleft, //edge of block +int32_t ybottom, //bottom of block +int32_t min_y, //coordinate limits +int32_t max_y, int32_t occupation[], //projection counts +int32_t thresholds[] //for drop out +); +void draw_meanlines( //draw a block + TO_BLOCK *block, //block to draw + float gradient, //gradients of lines + int32_t left, //edge of block + ScrollView::Color colour, //colour to draw in + FCOORD rotation //rotation for line + ); +void plot_word_decisions( //draw words + ScrollView* win, //window tro draw in + int16_t pitch, //of block + TO_ROW *row //row to draw + ); +void plot_fp_cells( //draw words + ScrollView* win, //window tro draw in + ScrollView::Color colour, //colour of lines + BLOBNBOX_IT *blob_it, //blobs + int16_t pitch, //of block + int16_t blob_count, //no of real blobs + STATS *projection, //vertical + int16_t projection_left, //edges //scale factor + int16_t projection_right, + float projection_scale); +void plot_fp_cells2( //draw words + ScrollView* win, //window tro draw in + ScrollView::Color colour, //colour of lines + TO_ROW *row, //for location + FPSEGPT_LIST *seg_list //segments to plot + ); +void plot_row_cells( //draw words + ScrollView* win, //window tro draw in + ScrollView::Color colour, //colour of lines + TO_ROW *row, //for location + float xshift, //amount of shift + ICOORDELT_LIST *cells //cells to draw + ); + +} // namespace tesseract + +#endif diff --git a/tesseract/src/textord/edgblob.cpp b/tesseract/src/textord/edgblob.cpp new file mode 100644 index 00000000..4383907f --- /dev/null +++ b/tesseract/src/textord/edgblob.cpp @@ -0,0 +1,462 @@ +/********************************************************************** + * File: edgblob.cpp (Formerly edgeloop.c) + * Description: Functions to clean up an outline before approximation. + * Author: Ray Smith + * + *(C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0(the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + + // Include automatically generated configuration file if running autoconf. +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#include "edgblob.h" + +#include "scanedg.h" +#include "edgloop.h" + +namespace tesseract { + +// Control parameters used in outline_complexity(), which rejects an outline +// if any one of the 3 conditions is satisfied: +// - number of children exceeds edges_max_children_per_outline +// - number of nested layers exceeds edges_max_children_layers +// - joint complexity exceeds edges_children_count_limit(as in child_count()) +static BOOL_VAR(edges_use_new_outline_complexity, false, + "Use the new outline complexity module"); +static INT_VAR(edges_max_children_per_outline, 10, + "Max number of children inside a character outline"); +static INT_VAR(edges_max_children_layers, 5, + "Max layers of nested children inside a character outline"); +static BOOL_VAR(edges_debug, false, + "turn on debugging for this module"); + +static INT_VAR(edges_children_per_grandchild, 10, + "Importance ratio for chucking outlines"); +static INT_VAR(edges_children_count_limit, 45, + "Max holes allowed in blob"); +static BOOL_VAR(edges_children_fix, false, + "Remove boxy parents of char-like children"); +static INT_VAR(edges_min_nonhole, 12, + "Min pixels for potential char in box"); +static INT_VAR(edges_patharea_ratio, 40, + "Max lensq/area for acceptable child outline"); +static double_VAR(edges_childarea, 0.5, + "Min area fraction of child outline"); +static double_VAR(edges_boxarea, 0.875, + "Min area fraction of grandchild for box"); + +/** + * @name OL_BUCKETS::OL_BUCKETS + * + * Construct an array of buckets for associating outlines into blobs. + */ + +OL_BUCKETS::OL_BUCKETS( +ICOORD bleft, // corners +ICOORD tright): bl(bleft), tr(tright) { + bxdim =(tright.x() - bleft.x()) / BUCKETSIZE + 1; + bydim =(tright.y() - bleft.y()) / BUCKETSIZE + 1; + // make array + buckets.reset(new C_OUTLINE_LIST[bxdim * bydim]); + index = 0; +} + + +/** + * @name OL_BUCKETS::operator( + * + * Return a pointer to a list of C_OUTLINEs corresponding to the + * given pixel coordinates. + */ + +C_OUTLINE_LIST * +OL_BUCKETS::operator()( // array access +int16_t x, // image coords +int16_t y) { + return &buckets[(y-bl.y()) / BUCKETSIZE * bxdim + (x-bl.x()) / BUCKETSIZE]; +} + + +/** + * @name OL_BUCKETS::outline_complexity + * + * This is the new version of count_child. + * + * The goal of this function is to determine if an outline and its + * interiors could be part of a character blob. This is done by + * computing a "complexity" index for the outline, which is the return + * value of this function, and checking it against a threshold. + * The max_count is used for short-circuiting the recursion and forcing + * a rejection that guarantees to fail the threshold test. + * The complexity F for outline X with N children X[i] is + * F(X) = N + sum_i F(X[i]) * edges_children_per_grandchild + * so each layer of nesting increases complexity exponentially. + * An outline can be rejected as a text blob candidate if its complexity + * is too high, has too many children(likely a container), or has too + * many layers of nested inner loops. This has the side-effect of + * flattening out boxed or reversed video text regions. + */ + +int32_t OL_BUCKETS::outline_complexity( + C_OUTLINE *outline, // parent outline + int32_t max_count, // max output + int16_t depth // recurion depth + ) { + int16_t xmin, xmax; // coord limits + int16_t ymin, ymax; + int16_t xindex, yindex; // current bucket + C_OUTLINE *child; // current child + int32_t child_count; // no of children + int32_t grandchild_count; // no of grandchildren + C_OUTLINE_IT child_it; // search iterator + + TBOX olbox = outline->bounding_box(); + xmin =(olbox.left() - bl.x()) / BUCKETSIZE; + xmax =(olbox.right() - bl.x()) / BUCKETSIZE; + ymin =(olbox.bottom() - bl.y()) / BUCKETSIZE; + ymax =(olbox.top() - bl.y()) / BUCKETSIZE; + child_count = 0; + grandchild_count = 0; + if (++depth > edges_max_children_layers) // nested loops are too deep + return max_count + depth; + + for (yindex = ymin; yindex <= ymax; yindex++) { + for (xindex = xmin; xindex <= xmax; xindex++) { + child_it.set_to_list(&buckets[yindex * bxdim + xindex]); + if (child_it.empty()) + continue; + for (child_it.mark_cycle_pt(); !child_it.cycled_list(); + child_it.forward()) { + child = child_it.data(); + if (child == outline || !(*child < *outline)) + continue; + child_count++; + + if (child_count > edges_max_children_per_outline) { // too fragmented + if (edges_debug) + tprintf("Discard outline on child_count=%d > " + "max_children_per_outline=%d\n", + child_count, + static_cast<int32_t>(edges_max_children_per_outline)); + return max_count + child_count; + } + + // Compute the "complexity" of each child recursively + int32_t remaining_count = max_count - child_count - grandchild_count; + if (remaining_count > 0) + grandchild_count += edges_children_per_grandchild * + outline_complexity(child, remaining_count, depth); + if (child_count + grandchild_count > max_count) { // too complex + if (edges_debug) + tprintf("Disgard outline on child_count=%d + grandchild_count=%d " + "> max_count=%d\n", + child_count, grandchild_count, max_count); + return child_count + grandchild_count; + } + } + } + } + return child_count + grandchild_count; +} + + +/** + * @name OL_BUCKETS::count_children + * + * Find number of descendants of this outline. + */ +// TODO(rays) Merge with outline_complexity. +int32_t OL_BUCKETS::count_children( // recursive count + C_OUTLINE *outline, // parent outline + int32_t max_count // max output + ) { + bool parent_box; // could it be boxy + int16_t xmin, xmax; // coord limits + int16_t ymin, ymax; + int16_t xindex, yindex; // current bucket + C_OUTLINE *child; // current child + int32_t child_count; // no of children + int32_t grandchild_count; // no of grandchildren + int32_t parent_area; // potential box + float max_parent_area; // potential box + int32_t child_area; // current child + int32_t child_length; // current child + TBOX olbox; + C_OUTLINE_IT child_it; // search iterator + + olbox = outline->bounding_box(); + xmin =(olbox.left() - bl.x()) / BUCKETSIZE; + xmax =(olbox.right() - bl.x()) / BUCKETSIZE; + ymin =(olbox.bottom() - bl.y()) / BUCKETSIZE; + ymax =(olbox.top() - bl.y()) / BUCKETSIZE; + child_count = 0; + grandchild_count = 0; + parent_area = 0; + max_parent_area = 0; + parent_box = true; + for (yindex = ymin; yindex <= ymax; yindex++) { + for (xindex = xmin; xindex <= xmax; xindex++) { + child_it.set_to_list(&buckets[yindex * bxdim + xindex]); + if (child_it.empty()) + continue; + for (child_it.mark_cycle_pt(); !child_it.cycled_list(); + child_it.forward()) { + child = child_it.data(); + if (child != outline && *child < *outline) { + child_count++; + if (child_count <= max_count) { + int max_grand =(max_count - child_count) / + edges_children_per_grandchild; + if (max_grand > 0) + grandchild_count += count_children(child, max_grand) * + edges_children_per_grandchild; + else + grandchild_count += count_children(child, 1); + } + if (child_count + grandchild_count > max_count) { + if (edges_debug) + tprintf("Discarding parent with child count=%d, gc=%d\n", + child_count,grandchild_count); + return child_count + grandchild_count; + } + if (parent_area == 0) { + parent_area = outline->outer_area(); + if (parent_area < 0) + parent_area = -parent_area; + max_parent_area = outline->bounding_box().area() * edges_boxarea; + if (parent_area < max_parent_area) + parent_box = false; + } + if (parent_box && + (!edges_children_fix || + child->bounding_box().height() > edges_min_nonhole)) { + child_area = child->outer_area(); + if (child_area < 0) + child_area = -child_area; + if (edges_children_fix) { + if (parent_area - child_area < max_parent_area) { + parent_box = false; + continue; + } + if (grandchild_count > 0) { + if (edges_debug) + tprintf("Discarding parent of area %d, child area=%d, max%g " + "with gc=%d\n", + parent_area, child_area, max_parent_area, + grandchild_count); + return max_count + 1; + } + child_length = child->pathlength(); + if (child_length * child_length > + child_area * edges_patharea_ratio) { + if (edges_debug) + tprintf("Discarding parent of area %d, child area=%d, max%g " + "with child length=%d\n", + parent_area, child_area, max_parent_area, + child_length); + return max_count + 1; + } + } + if (child_area < child->bounding_box().area() * edges_childarea) { + if (edges_debug) + tprintf("Discarding parent of area %d, child area=%d, max%g " + "with child rect=%d\n", + parent_area, child_area, max_parent_area, + child->bounding_box().area()); + return max_count + 1; + } + } + } + } + } + } + return child_count + grandchild_count; +} + + + + +/** + * @name OL_BUCKETS::extract_children + * + * Find number of descendants of this outline. + */ + +void OL_BUCKETS::extract_children( // recursive count + C_OUTLINE *outline, // parent outline + C_OUTLINE_IT *it // destination iterator + ) { + int16_t xmin, xmax; // coord limits + int16_t ymin, ymax; + int16_t xindex, yindex; // current bucket + TBOX olbox; + C_OUTLINE_IT child_it; // search iterator + + olbox = outline->bounding_box(); + xmin =(olbox.left() - bl.x()) / BUCKETSIZE; + xmax =(olbox.right() - bl.x()) / BUCKETSIZE; + ymin =(olbox.bottom() - bl.y()) / BUCKETSIZE; + ymax =(olbox.top() - bl.y()) / BUCKETSIZE; + for (yindex = ymin; yindex <= ymax; yindex++) { + for (xindex = xmin; xindex <= xmax; xindex++) { + child_it.set_to_list(&buckets[yindex * bxdim + xindex]); + for (child_it.mark_cycle_pt(); !child_it.cycled_list(); + child_it.forward()) { + if (*child_it.data() < *outline) { + it->add_after_then_move(child_it.extract()); + } + } + } + } +} + + +/** + * @name extract_edges + * + * Run the edge detector over the block and return a list of blobs. + */ + +void extract_edges(Pix* pix, // thresholded image + BLOCK *block) { // block to scan + C_OUTLINE_LIST outlines; // outlines in block + C_OUTLINE_IT out_it = &outlines; + + block_edges(pix, &(block->pdblk), &out_it); + ICOORD bleft; // block box + ICOORD tright; + block->pdblk.bounding_box(bleft, tright); + // make blobs + outlines_to_blobs(block, bleft, tright, &outlines); +} + + +/** + * @name outlines_to_blobs + * + * Gather together outlines into blobs using the usual bucket sort. + */ + +void outlines_to_blobs( // find blobs + BLOCK *block, // block to scan + ICOORD bleft, + ICOORD tright, + C_OUTLINE_LIST *outlines) { + // make buckets + OL_BUCKETS buckets(bleft, tright); + + fill_buckets(outlines, &buckets); + empty_buckets(block, &buckets); +} + + +/** + * @name fill_buckets + * + * Run the edge detector over the block and return a list of blobs. + */ + +void fill_buckets( // find blobs + C_OUTLINE_LIST *outlines, // outlines in block + OL_BUCKETS *buckets // output buckets + ) { + TBOX ol_box; // outline box + C_OUTLINE_IT out_it = outlines; // iterator + C_OUTLINE_IT bucket_it; // iterator in bucket + C_OUTLINE *outline; // current outline + + for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) { + outline = out_it.extract(); // take off list + // get box + ol_box = outline->bounding_box(); + bucket_it.set_to_list((*buckets) (ol_box.left(), ol_box.bottom())); + bucket_it.add_to_end(outline); + } +} + + +/** + * @name empty_buckets + * + * Run the edge detector over the block and return a list of blobs. + */ + +void empty_buckets( // find blobs + BLOCK *block, // block to scan + OL_BUCKETS *buckets // output buckets + ) { + bool good_blob; // healthy blob + C_OUTLINE_LIST outlines; // outlines in block + // iterator + C_OUTLINE_IT out_it = &outlines; + C_OUTLINE_IT bucket_it = buckets->start_scan(); + C_OUTLINE_IT parent_it; // parent outline + C_BLOB_IT good_blobs = block->blob_list(); + C_BLOB_IT junk_blobs = block->reject_blobs(); + + while (!bucket_it.empty()) { + out_it.set_to_list(&outlines); + do { + parent_it = bucket_it; // find outermost + do { + bucket_it.forward(); + } while (!bucket_it.at_first() && + !(*parent_it.data() < *bucket_it.data())); + } while (!bucket_it.at_first()); + + // move to new list + out_it.add_after_then_move(parent_it.extract()); + good_blob = capture_children(buckets, &junk_blobs, &out_it); + C_BLOB::ConstructBlobsFromOutlines(good_blob, &outlines, &good_blobs, + &junk_blobs); + + bucket_it.set_to_list(buckets->scan_next()); + } +} + + +/** + * @name capture_children + * + * Find all neighbouring outlines that are children of this outline + * and either move them to the output list or declare this outline + * illegal and return false. + */ + +bool capture_children( // find children + OL_BUCKETS* buckets, // bucket sort clanss + C_BLOB_IT* reject_it, // dead grandchildren + C_OUTLINE_IT* blob_it // output outlines +) { + C_OUTLINE *outline; // master outline + int32_t child_count; // no of children + + outline = blob_it->data(); + if (edges_use_new_outline_complexity) + child_count = buckets->outline_complexity(outline, + edges_children_count_limit, + 0); + else + child_count = buckets->count_children(outline, + edges_children_count_limit); + if (child_count > edges_children_count_limit) + return false; + + if (child_count > 0) + buckets->extract_children(outline, blob_it); + return true; +} + +} // namespace tesseract diff --git a/tesseract/src/textord/edgblob.h b/tesseract/src/textord/edgblob.h new file mode 100644 index 00000000..a3b7ac1b --- /dev/null +++ b/tesseract/src/textord/edgblob.h @@ -0,0 +1,100 @@ +/********************************************************************** + * File: edgblob.h (Formerly edgeloop.h) + * Description: Functions to clean up an outline before approximation. + * Author: Ray Smith + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef EDGBLOB_H +#define EDGBLOB_H + +#include "scrollview.h" +#include "params.h" +#include "ocrblock.h" +#include "coutln.h" +#include "crakedge.h" + +#include <memory> + +namespace tesseract { + +#define BUCKETSIZE 16 + +class OL_BUCKETS +{ + public: + OL_BUCKETS( //constructor + ICOORD bleft, //corners + ICOORD tright); + + ~OL_BUCKETS () = default; + + C_OUTLINE_LIST *operator () (//array access + int16_t x, //image coords + int16_t y); + //first non-empty bucket + C_OUTLINE_LIST *start_scan() { + for (index = 0; buckets[index].empty () && index < bxdim * bydim - 1; + index++); + return &buckets[index]; + } + //next non-empty bucket + C_OUTLINE_LIST *scan_next() { + for (; buckets[index].empty () && index < bxdim * bydim - 1; index++); + return &buckets[index]; + } + int32_t count_children( //recursive sum + C_OUTLINE *outline, //parent outline + int32_t max_count); // max output + int32_t outline_complexity( // new version of count_children + C_OUTLINE *outline, // parent outline + int32_t max_count, // max output + int16_t depth); // level of recursion + void extract_children( //single level get + C_OUTLINE *outline, //parent outline + C_OUTLINE_IT *it); //destination iterator + + private: + std::unique_ptr<C_OUTLINE_LIST[]> buckets; //array of buckets + int16_t bxdim; //size of array + int16_t bydim; + ICOORD bl; //corners + ICOORD tr; + int32_t index; //for extraction scan +}; + +void extract_edges(Pix* pix, // thresholded image + BLOCK* block); // block to scan +void outlines_to_blobs( //find blobs + BLOCK *block, //block to scan + ICOORD bleft, //block box //outlines in block + ICOORD tright, + C_OUTLINE_LIST *outlines); +void fill_buckets( //find blobs + C_OUTLINE_LIST *outlines, //outlines in block + OL_BUCKETS *buckets //output buckets + ); +void empty_buckets( //find blobs + BLOCK *block, //block to scan + OL_BUCKETS *buckets //output buckets + ); +bool capture_children( //find children + OL_BUCKETS* buckets, //bucket sort clanss + C_BLOB_IT* reject_it, //dead grandchildren + C_OUTLINE_IT* blob_it //output outlines +); + +} // namespace tesseract + +#endif diff --git a/tesseract/src/textord/edgloop.cpp b/tesseract/src/textord/edgloop.cpp new file mode 100644 index 00000000..33cf3a02 --- /dev/null +++ b/tesseract/src/textord/edgloop.cpp @@ -0,0 +1,162 @@ +/********************************************************************** + * File: edgloop.cpp (Formerly edgeloop.c) + * Description: Functions to clean up an outline before approximation. + * Author: Ray Smith + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + + // Include automatically generated configuration file if running autoconf. +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#include "scanedg.h" + +#include "edgloop.h" + +namespace tesseract { + +#define MINEDGELENGTH 8 // min decent length + +/********************************************************************** + * complete_edge + * + * Complete the edge by cleaning it up. + **********************************************************************/ + +void complete_edge(CRACKEDGE *start, //start of loop + C_OUTLINE_IT* outline_it) { + ScrollView::Color colour; //colour to draw in + int16_t looplength; //steps in loop + ICOORD botleft; //bounding box + ICOORD topright; + C_OUTLINE *outline; //new outline + + //check length etc. + colour = check_path_legal (start); + + if (colour == ScrollView::RED || colour == ScrollView::BLUE) { + looplength = loop_bounding_box (start, botleft, topright); + outline = new C_OUTLINE (start, botleft, topright, looplength); + //add to list + outline_it->add_after_then_move (outline); + } +} + + +/********************************************************************** + * check_path_legal + * + * Check that the outline is legal for length and for chaincode sum. + * The return value is RED for a normal black-inside outline, + * BLUE for a white-inside outline, MAGENTA if it is too short, + * YELLOW if it is too long, and GREEN if it is illegal. + * These colours are used to draw the raw outline. + **********************************************************************/ + +ScrollView::Color check_path_legal( //certify outline + CRACKEDGE *start //start of loop + ) { + int lastchain; //last chain code + int chaindiff; //chain code diff + int32_t length; //length of loop + int32_t chainsum; //sum of chain diffs + CRACKEDGE *edgept; //current point + constexpr ERRCODE ED_ILLEGAL_SUM("Illegal sum of chain codes"); + + length = 0; + chainsum = 0; //sum of chain codes + edgept = start; + lastchain = edgept->prev->stepdir; //previous chain code + do { + length++; + if (edgept->stepdir != lastchain) { + //chain code difference + chaindiff = edgept->stepdir - lastchain; + if (chaindiff > 2) + chaindiff -= 4; + else if (chaindiff < -2) + chaindiff += 4; + chainsum += chaindiff; //sum differences + lastchain = edgept->stepdir; + } + edgept = edgept->next; + } + while (edgept != start && length < C_OUTLINE::kMaxOutlineLength); + + if ((chainsum != 4 && chainsum != -4) + || edgept != start || length < MINEDGELENGTH) { + if (edgept != start) { + return ScrollView::YELLOW; + } else if (length < MINEDGELENGTH) { + return ScrollView::MAGENTA; + } else { + ED_ILLEGAL_SUM.error ("check_path_legal", TESSLOG, "chainsum=%d", + chainsum); + return ScrollView::GREEN; + } + } + //colour on inside + return chainsum < 0 ? ScrollView::BLUE : ScrollView::RED; +} + +/********************************************************************** + * loop_bounding_box + * + * Find the bounding box of the edge loop. + **********************************************************************/ + +int16_t loop_bounding_box( //get bounding box + CRACKEDGE *&start, //edge loop + ICOORD &botleft, //bounding box + ICOORD &topright) { + int16_t length; //length of loop + int16_t leftmost; //on top row + CRACKEDGE *edgept; //current point + CRACKEDGE *realstart; //topleft start + + edgept = start; + realstart = start; + botleft = topright = ICOORD (edgept->pos.x (), edgept->pos.y ()); + leftmost = edgept->pos.x (); + length = 0; //coutn length + do { + edgept = edgept->next; + if (edgept->pos.x () < botleft.x ()) + //get bounding box + botleft.set_x (edgept->pos.x ()); + else if (edgept->pos.x () > topright.x ()) + topright.set_x (edgept->pos.x ()); + if (edgept->pos.y () < botleft.y ()) + //get bounding box + botleft.set_y (edgept->pos.y ()); + else if (edgept->pos.y () > topright.y ()) { + realstart = edgept; + leftmost = edgept->pos.x (); + topright.set_y (edgept->pos.y ()); + } + else if (edgept->pos.y () == topright.y () + && edgept->pos.x () < leftmost) { + //leftmost on line + leftmost = edgept->pos.x (); + realstart = edgept; + } + length++; //count elements + } + while (edgept != start); + start = realstart; //shift it to topleft + return length; +} + +} // namespace tesseract diff --git a/tesseract/src/textord/edgloop.h b/tesseract/src/textord/edgloop.h new file mode 100644 index 00000000..26cd2f21 --- /dev/null +++ b/tesseract/src/textord/edgloop.h @@ -0,0 +1,44 @@ +/********************************************************************** + * File: edgloop.h (Formerly edgeloop.h) + * Description: Functions to clean up an outline before approximation. + * Author: Ray Smith + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef EDGLOOP_H +#define EDGLOOP_H + +#include "scrollview.h" +#include "params.h" +#include "pdblock.h" +#include "coutln.h" +#include "crakedge.h" + +namespace tesseract { + +#define BUCKETSIZE 16 + +void complete_edge(CRACKEDGE *start, //start of loop + C_OUTLINE_IT* outline_it); +ScrollView::Color check_path_legal( //certify outline + CRACKEDGE *start //start of loop + ); +int16_t loop_bounding_box( //get bounding box + CRACKEDGE *&start, //edge loop + ICOORD &botleft, //bounding box + ICOORD &topright); + +} // namespace tesseract + +#endif diff --git a/tesseract/src/textord/equationdetectbase.cpp b/tesseract/src/textord/equationdetectbase.cpp new file mode 100644 index 00000000..1d40ed8e --- /dev/null +++ b/tesseract/src/textord/equationdetectbase.cpp @@ -0,0 +1,64 @@ +/////////////////////////////////////////////////////////////////////// +// File: equationdetectbase.cpp +// Description: The base class equation detection class. +// Author: Zongyi (Joe) Liu (joeliu@google.com) +// Created: Fri Aug 31 11:13:01 PST 2011 +// +// (C) Copyright 2011, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#include "allheaders.h" +#include "blobbox.h" +#include "equationdetectbase.h" + +namespace tesseract { + +// Destructor. +// It is defined here, so the compiler can create a single vtable +// instead of weak vtables in every compilation unit. +EquationDetectBase::~EquationDetectBase() = default; + +void EquationDetectBase::RenderSpecialText(Pix* pix, + BLOBNBOX* blob) { + ASSERT_HOST(pix != nullptr && pixGetDepth(pix) == 32 && blob != nullptr); + const TBOX& tbox = blob->bounding_box(); + int height = pixGetHeight(pix); + const int box_width = 5; + + // Coordinate translation: tesseract use left bottom as the original, while + // leptonica uses left top as the original. + Box *box = boxCreate(tbox.left(), height - tbox.top(), + tbox.width(), tbox.height()); + switch (blob->special_text_type()) { + case BSTT_MATH: // Red box. + pixRenderBoxArb(pix, box, box_width, 255, 0, 0); + break; + case BSTT_DIGIT: // cyan box. + pixRenderBoxArb(pix, box, box_width, 0, 255, 255); + break; + case BSTT_ITALIC: // Green box. + pixRenderBoxArb(pix, box, box_width, 0, 255, 0); + break; + case BSTT_UNCLEAR: // blue box. + pixRenderBoxArb(pix, box, box_width, 0, 255, 0); + break; + case BSTT_NONE: + default: + // yellow box. + pixRenderBoxArb(pix, box, box_width, 255, 255, 0); + break; + } + boxDestroy(&box); +} + +} // namespace tesseract diff --git a/tesseract/src/textord/equationdetectbase.h b/tesseract/src/textord/equationdetectbase.h new file mode 100644 index 00000000..7f84bd09 --- /dev/null +++ b/tesseract/src/textord/equationdetectbase.h @@ -0,0 +1,59 @@ +/////////////////////////////////////////////////////////////////////// +// File: equationdetectbase.h +// Description: The base class equation detection class. +// Author: Zongyi (Joe) Liu (joeliu@google.com) +// Created: Fri Aug 31 11:13:01 PST 2011 +// +// (C) Copyright 2011, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_TEXTORD_EQUATIONDETECTBASE_H_ +#define TESSERACT_TEXTORD_EQUATIONDETECTBASE_H_ + +class BLOBNBOX_LIST; +class TO_BLOCK; +struct Pix; + +namespace tesseract { + +class ColPartitionGrid; +class ColPartitionSet; + +class TESS_API EquationDetectBase { + public: + EquationDetectBase() = default; + virtual ~EquationDetectBase(); + + // Iterate over the blobs inside to_block, and set the blobs that we want to + // process to BSTT_NONE. (By default, they should be BSTT_SKIP). The function + // returns 0 upon success. + virtual int LabelSpecialText(TO_BLOCK* to_block) = 0; + + // Interface to find possible equation partition grid from part_grid. This + // should be called after IdentifySpecialText function. + virtual int FindEquationParts(ColPartitionGrid* part_grid, + ColPartitionSet** best_columns) = 0; + + // Debug function: Render a bounding box on pix based on the value of its + // special_text_type, specifically: + // BSTT_MATH: red box + // BSTT_DIGIT: cyan box + // BSTT_ITALIC: green box + // BSTT_UNCLEAR: blue box + // All others: yellow box + static void RenderSpecialText(Pix* pix, BLOBNBOX* blob); +}; + +} // namespace tesseract + +#endif // TESSERACT_TEXTORD_EQUATIONDETECTBASE_H_ diff --git a/tesseract/src/textord/fpchop.cpp b/tesseract/src/textord/fpchop.cpp new file mode 100644 index 00000000..91444a4d --- /dev/null +++ b/tesseract/src/textord/fpchop.cpp @@ -0,0 +1,890 @@ +/********************************************************************** + * File: fpchop.cpp (Formerly fp_chop.c) + * Description: Code to chop fixed pitch text into character cells. + * Author: Ray Smith + * + * (C) Copyright 1993, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + + // Include automatically generated configuration file if running autoconf. +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#include "fpchop.h" + +#include "blobbox.h" +#include "statistc.h" +#include "drawtord.h" +#include "tovars.h" +#include "topitch.h" + +namespace tesseract { + +INT_VAR (textord_fp_chop_error, 2, +"Max allowed bending of chop cells"); +double_VAR (textord_fp_chop_snap, 0.5, +"Max distance of chop pt from vertex"); + +ELISTIZE(C_OUTLINE_FRAG) + +static WERD* add_repeated_word( + WERD_IT* rep_it, + int16_t& rep_left, + int16_t& prev_chop_coord, + uint8_t& blanks, + float pitch, + WERD_IT* word_it +); + +static void fixed_chop_cblob( + C_BLOB* blob, + int16_t chop_coord, + float pitch_error, + C_OUTLINE_LIST* left_outlines, + C_OUTLINE_LIST* right_outlines +); + +static void fixed_split_coutline( + C_OUTLINE* srcline, + int16_t chop_coord, + float pitch_error, + C_OUTLINE_IT* left_it, + C_OUTLINE_IT* right_it +); + +static bool fixed_chop_coutline( + C_OUTLINE* srcline, + int16_t chop_coord, + float pitch_error, + C_OUTLINE_FRAG_LIST* left_frags, + C_OUTLINE_FRAG_LIST* right_frags +); + +static void save_chop_cfragment( + int16_t head_index, + ICOORD head_pos, + int16_t tail_index, + ICOORD tail_pos, + C_OUTLINE* srcline, + C_OUTLINE_FRAG_LIST* frags +); + +static void add_frag_to_list( + C_OUTLINE_FRAG* frag, + C_OUTLINE_FRAG_LIST* frags +); + +static void close_chopped_cfragments( + C_OUTLINE_FRAG_LIST* frags, + C_OUTLINE_LIST* children, + float pitch_error, + C_OUTLINE_IT* dest_it +); + +static C_OUTLINE* join_chopped_fragments( + C_OUTLINE_FRAG* bottom, + C_OUTLINE_FRAG* top +); + +static void join_segments( + C_OUTLINE_FRAG* bottom, + C_OUTLINE_FRAG* top +); + +/********************************************************************** + * fixed_pitch_words + * + * Make a ROW from a fixed pitch TO_ROW. + **********************************************************************/ +ROW *fixed_pitch_words( //find lines + TO_ROW *row, //row to do + FCOORD rotation //for drawing + ) { + bool bol; //start of line + uint8_t blanks; //in front of word + uint8_t new_blanks; //blanks in empty cell + int16_t chop_coord; //chop boundary + int16_t prev_chop_coord; //start of cell + int16_t rep_left; //left edge of rep word + ROW *real_row; //output row + C_OUTLINE_LIST left_coutlines; + C_OUTLINE_LIST right_coutlines; + C_BLOB_LIST cblobs; + C_BLOB_IT cblob_it = &cblobs; + WERD_LIST words; + WERD_IT word_it = &words; //new words + //repeated blobs + WERD_IT rep_it = &row->rep_words; + WERD *word; //new word + int32_t xstarts[2]; //row ends + int32_t prev_x; //end of prev blob + //iterator + BLOBNBOX_IT box_it = row->blob_list (); + //boundaries + ICOORDELT_IT cell_it = &row->char_cells; + +#ifndef GRAPHICS_DISABLED + if (textord_show_page_cuts && to_win != nullptr) { + plot_row_cells (to_win, ScrollView::RED, row, 0, &row->char_cells); + } +#endif + + prev_x = -INT16_MAX; + bol = true; + blanks = 0; + if (rep_it.empty ()) + rep_left = INT16_MAX; + else + rep_left = rep_it.data ()->bounding_box ().left (); + if (box_it.empty ()) + return nullptr; //empty row + xstarts[0] = box_it.data ()->bounding_box ().left (); + if (rep_left < xstarts[0]) { + xstarts[0] = rep_left; + } + if (cell_it.empty () || row->char_cells.singleton ()) { + tprintf ("Row without enough char cells!\n"); + tprintf ("Leftmost blob is at (%d,%d)\n", + box_it.data ()->bounding_box ().left (), + box_it.data ()->bounding_box ().bottom ()); + return nullptr; + } + ASSERT_HOST (!cell_it.empty () && !row->char_cells.singleton ()); + prev_chop_coord = cell_it.data ()->x (); + word = nullptr; + while (rep_left < cell_it.data ()->x ()) { + word = add_repeated_word (&rep_it, rep_left, prev_chop_coord, + blanks, row->fixed_pitch, &word_it); + } + cell_it.mark_cycle_pt (); + if (prev_chop_coord >= cell_it.data ()->x ()) + cell_it.forward (); + for (; !cell_it.cycled_list (); cell_it.forward ()) { + chop_coord = cell_it.data ()->x (); + while (!box_it.empty () + && box_it.data ()->bounding_box ().left () <= chop_coord) { + if (box_it.data ()->bounding_box ().right () > prev_x) + prev_x = box_it.data ()->bounding_box ().right (); + split_to_blob (box_it.extract (), chop_coord, + textord_fp_chop_error + 0.5f, + &left_coutlines, + &right_coutlines); + box_it.forward (); + while (!box_it.empty() && box_it.data()->cblob() == nullptr) { + delete box_it.extract(); + box_it.forward(); + } + } + if (!right_coutlines.empty() && left_coutlines.empty()) + split_to_blob (nullptr, chop_coord, + textord_fp_chop_error + 0.5f, + &left_coutlines, + &right_coutlines); + if (!left_coutlines.empty()) { + cblob_it.add_after_then_move(new C_BLOB(&left_coutlines)); + } else { + if (rep_left < chop_coord) { + if (rep_left > prev_chop_coord) + new_blanks = static_cast<uint8_t>(floor ((rep_left - prev_chop_coord) + / row->fixed_pitch + 0.5)); + else + new_blanks = 0; + } + else { + if (chop_coord > prev_chop_coord) + new_blanks = static_cast<uint8_t>(floor ((chop_coord - prev_chop_coord) + / row->fixed_pitch + 0.5)); + else + new_blanks = 0; + } + if (!cblob_it.empty()) { + if (blanks < 1 && word != nullptr && !word->flag (W_REP_CHAR)) + blanks = 1; + word = new WERD (&cblobs, blanks, nullptr); + cblob_it.set_to_list (&cblobs); + word->set_flag (W_DONT_CHOP, true); + word_it.add_after_then_move (word); + if (bol) { + word->set_flag (W_BOL, true); + bol = false; + } + blanks = new_blanks; + } + else + blanks += new_blanks; + while (rep_left < chop_coord) { + word = add_repeated_word (&rep_it, rep_left, prev_chop_coord, + blanks, row->fixed_pitch, &word_it); + } + } + if (prev_chop_coord < chop_coord) + prev_chop_coord = chop_coord; + } + if (!cblob_it.empty()) { + word = new WERD(&cblobs, blanks, nullptr); + word->set_flag (W_DONT_CHOP, true); + word_it.add_after_then_move (word); + if (bol) + word->set_flag (W_BOL, true); + } + ASSERT_HOST (word != nullptr); + while (!rep_it.empty ()) { + add_repeated_word (&rep_it, rep_left, prev_chop_coord, + blanks, row->fixed_pitch, &word_it); + } + //at end of line + word_it.data ()->set_flag (W_EOL, true); + if (prev_chop_coord > prev_x) + prev_x = prev_chop_coord; + xstarts[1] = prev_x + 1; + real_row = new ROW (row, static_cast<int16_t>(row->kern_size), static_cast<int16_t>(row->space_size)); + word_it.set_to_list (real_row->word_list ()); + //put words in row + word_it.add_list_after (&words); + real_row->recalc_bounding_box (); + return real_row; +} + + +/********************************************************************** + * add_repeated_word + * + * Add repeated word into the row at the given point. + **********************************************************************/ + +static +WERD *add_repeated_word( //move repeated word + WERD_IT *rep_it, //repeated words + int16_t &rep_left, //left edge of word + int16_t &prev_chop_coord, //previous word end + uint8_t &blanks, //no of blanks + float pitch, //char cell size + WERD_IT *word_it //list of words + ) { + WERD *word; //word to move + int16_t new_blanks; //extra blanks + + if (rep_left > prev_chop_coord) { + new_blanks = static_cast<uint8_t>(floor ((rep_left - prev_chop_coord) / pitch + 0.5)); + blanks += new_blanks; + } + word = rep_it->extract (); + prev_chop_coord = word->bounding_box ().right (); + word_it->add_after_then_move (word); + word->set_blanks (blanks); + rep_it->forward (); + if (rep_it->empty ()) + rep_left = INT16_MAX; + else + rep_left = rep_it->data ()->bounding_box ().left (); + blanks = 0; + return word; +} + + +/********************************************************************** + * split_to_blob + * + * Split a BLOBNBOX across a vertical chop line and put the pieces + * into a left outline list and a right outline list. + **********************************************************************/ + +void split_to_blob( //split the blob + BLOBNBOX *blob, //blob to split + int16_t chop_coord, //place to chop + float pitch_error, //allowed deviation + C_OUTLINE_LIST *left_coutlines, //for cblobs + C_OUTLINE_LIST *right_coutlines) { + C_BLOB *real_cblob; //cblob to chop + + if (blob != nullptr) { + real_cblob = blob->cblob(); + } else { + real_cblob = nullptr; + } + if (!right_coutlines->empty() || real_cblob != nullptr) + fixed_chop_cblob(real_cblob, + chop_coord, + pitch_error, + left_coutlines, + right_coutlines); + + delete blob; +} + +/********************************************************************** + * fixed_chop_cblob + * + * Chop the given cblob (if any) and the existing right outlines to + * produce a list of outlines left of the chop point and more to the right. + **********************************************************************/ + +static +void fixed_chop_cblob( //split the blob + C_BLOB *blob, //blob to split + int16_t chop_coord, //place to chop + float pitch_error, //allowed deviation + C_OUTLINE_LIST *left_outlines, //left half of chop + C_OUTLINE_LIST *right_outlines //right half of chop + ) { + C_OUTLINE *old_right; //already there + C_OUTLINE_LIST new_outlines; //new right ones + //output iterator + C_OUTLINE_IT left_it = left_outlines; + //in/out iterator + C_OUTLINE_IT right_it = right_outlines; + C_OUTLINE_IT new_it = &new_outlines; + C_OUTLINE_IT blob_it; //outlines in blob + + if (!right_it.empty ()) { + while (!right_it.empty ()) { + old_right = right_it.extract (); + right_it.forward (); + fixed_split_coutline(old_right, + chop_coord, + pitch_error, + &left_it, + &new_it); + } + right_it.add_list_before (&new_outlines); + } + if (blob != nullptr) { + blob_it.set_to_list (blob->out_list ()); + for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); + blob_it.forward ()) + fixed_split_coutline (blob_it.extract (), chop_coord, pitch_error, + &left_it, &right_it); + delete blob; + } +} + + +/********************************************************************** + * fixed_split_outline + * + * Chop the given outline (if necessary) placing the fragments which + * fall either side of the chop line into the appropriate list. + **********************************************************************/ + +static +void fixed_split_coutline( //chop the outline + C_OUTLINE *srcline, //source outline + int16_t chop_coord, //place to chop + float pitch_error, //allowed deviation + C_OUTLINE_IT *left_it, //left half of chop + C_OUTLINE_IT *right_it //right half of chop + ) { + C_OUTLINE *child; //child outline + TBOX srcbox; //box of outline + C_OUTLINE_LIST left_ch; //left children + C_OUTLINE_LIST right_ch; //right children + C_OUTLINE_FRAG_LIST left_frags;//chopped fragments + C_OUTLINE_FRAG_LIST right_frags;; + C_OUTLINE_IT left_ch_it = &left_ch; + //for whole children + C_OUTLINE_IT right_ch_it = &right_ch; + //for holes + C_OUTLINE_IT child_it = srcline->child (); + + srcbox = srcline->bounding_box(); + if (srcbox.left() + srcbox.right() <= chop_coord * 2 + && srcbox.right() < chop_coord + pitch_error) { + // Whole outline is in the left side or not far over the chop_coord, + // so put the whole thing on the left. + left_it->add_after_then_move(srcline); + } else if (srcbox.left() + srcbox.right() > chop_coord * 2 + && srcbox.left () > chop_coord - pitch_error) { + // Whole outline is in the right side or not far over the chop_coord, + // so put the whole thing on the right. + right_it->add_before_stay_put(srcline); + } else { + // Needs real chopping. + if (fixed_chop_coutline(srcline, chop_coord, pitch_error, + &left_frags, &right_frags)) { + for (child_it.mark_cycle_pt(); !child_it.cycled_list(); + child_it.forward()) { + child = child_it.extract(); + srcbox = child->bounding_box(); + if (srcbox.right() < chop_coord) { + // Whole child is on the left. + left_ch_it.add_after_then_move(child); + } else if (srcbox.left() > chop_coord) { + // Whole child is on the right. + right_ch_it.add_after_then_move (child); + } else { + // No pitch_error is allowed when chopping children to prevent + // impossible outlines from being created. + if (fixed_chop_coutline(child, chop_coord, 0.0f, + &left_frags, &right_frags)) { + delete child; + } else { + if (srcbox.left() + srcbox.right() <= chop_coord * 2) + left_ch_it.add_after_then_move(child); + else + right_ch_it.add_after_then_move(child); + } + } + } + close_chopped_cfragments(&left_frags, &left_ch, pitch_error, left_it); + close_chopped_cfragments(&right_frags, &right_ch, pitch_error, right_it); + ASSERT_HOST(left_ch.empty() && right_ch.empty()); + // No children left. + delete srcline; // Smashed up. + } else { + // Chop failed. Just use middle coord. + if (srcbox.left() + srcbox.right() <= chop_coord * 2) + left_it->add_after_then_move(srcline); // Stick whole in left. + else + right_it->add_before_stay_put(srcline); + } + } +} + + +/********************************************************************** + * fixed_chop_coutline + * + * Chop the given coutline (if necessary) placing the fragments which + * fall either side of the chop line into the appropriate list. + * If the coutline lies too heavily to one side to chop, false is returned. + **********************************************************************/ + +static +bool fixed_chop_coutline( //chop the outline + C_OUTLINE* srcline, //source outline + int16_t chop_coord, //place to chop + float pitch_error, //allowed deviation + C_OUTLINE_FRAG_LIST* left_frags, //left half of chop + C_OUTLINE_FRAG_LIST* right_frags //right half of chop +) { + bool first_frag; //fragment + int16_t left_edge; //of outline + int16_t startindex; //in first fragment + int32_t length; //of outline + int16_t stepindex; //into outline + int16_t head_index; //start of fragment + ICOORD head_pos; //start of fragment + int16_t tail_index; //end of fragment + ICOORD tail_pos; //end of fragment + ICOORD pos; //current point + int16_t first_index = 0; //first tail + ICOORD first_pos; //first tail + + length = srcline->pathlength (); + pos = srcline->start_pos (); + left_edge = pos.x (); + tail_index = 0; + tail_pos = pos; + for (stepindex = 0; stepindex < length; stepindex++) { + if (pos.x () < left_edge) { + left_edge = pos.x (); + tail_index = stepindex; + tail_pos = pos; + } + pos += srcline->step (stepindex); + } + if (left_edge >= chop_coord - pitch_error) + return false; //not worth it + + startindex = tail_index; + first_frag = true; + head_index = tail_index; + head_pos = tail_pos; + do { + do { + tail_pos += srcline->step (tail_index); + tail_index++; + if (tail_index == length) + tail_index = 0; + } + while (tail_pos.x () != chop_coord && tail_index != startindex); + if (tail_index == startindex) { + if (first_frag) + return false; //doesn't cross line + else + break; + } + ASSERT_HOST (head_index != tail_index); + if (!first_frag) { + save_chop_cfragment(head_index, + head_pos, + tail_index, + tail_pos, + srcline, + left_frags); + } + else { + first_index = tail_index; + first_pos = tail_pos; + first_frag = false; + } + while (srcline->step (tail_index).x () == 0) { + tail_pos += srcline->step (tail_index); + tail_index++; + if (tail_index == length) + tail_index = 0; + } + head_index = tail_index; + head_pos = tail_pos; + while (srcline->step (tail_index).x () > 0) { + do { + tail_pos += srcline->step (tail_index); + tail_index++; + if (tail_index == length) + tail_index = 0; + } + while (tail_pos.x () != chop_coord); + ASSERT_HOST (head_index != tail_index); + save_chop_cfragment(head_index, + head_pos, + tail_index, + tail_pos, + srcline, + right_frags); + while (srcline->step (tail_index).x () == 0) { + tail_pos += srcline->step (tail_index); + tail_index++; + if (tail_index == length) + tail_index = 0; + } + head_index = tail_index; + head_pos = tail_pos; + } + } + while (tail_index != startindex); + save_chop_cfragment(head_index, + head_pos, + first_index, + first_pos, + srcline, + left_frags); + return true; //did some chopping +} + +/********************************************************************** + * save_chop_cfragment + * + * Store the given fragment in the given fragment list. + **********************************************************************/ + +static +void save_chop_cfragment( //chop the outline + int16_t head_index, //head of fragment + ICOORD head_pos, //head of fragment + int16_t tail_index, //tail of fragment + ICOORD tail_pos, //tail of fragment + C_OUTLINE *srcline, //source of edgesteps + C_OUTLINE_FRAG_LIST *frags //fragment list + ) { + int16_t jump; //gap across end + int16_t stepcount; //total steps + C_OUTLINE_FRAG *head; //head of fragment + C_OUTLINE_FRAG *tail; //tail of fragment + int16_t tail_y; //ycoord of tail + + ASSERT_HOST (tail_pos.x () == head_pos.x ()); + ASSERT_HOST (tail_index != head_index); + stepcount = tail_index - head_index; + if (stepcount < 0) + stepcount += srcline->pathlength (); + jump = tail_pos.y () - head_pos.y (); + if (jump < 0) + jump = -jump; + if (jump == stepcount) + return; //its a nop + tail_y = tail_pos.y (); + head = new C_OUTLINE_FRAG (head_pos, tail_pos, srcline, + head_index, tail_index); + tail = new C_OUTLINE_FRAG (head, tail_y); + head->other_end = tail; + add_frag_to_list(head, frags); + add_frag_to_list(tail, frags); +} + + +/********************************************************************** + * C_OUTLINE_FRAG::C_OUTLINE_FRAG + * + * Constructors for C_OUTLINE_FRAG. + **********************************************************************/ + +C_OUTLINE_FRAG::C_OUTLINE_FRAG( //record fragment + ICOORD start_pt, //start coord + ICOORD end_pt, //end coord + C_OUTLINE *outline, //source of steps + int16_t start_index, + int16_t end_index) { + start = start_pt; + end = end_pt; + ycoord = start_pt.y (); + stepcount = end_index - start_index; + if (stepcount < 0) + stepcount += outline->pathlength (); + ASSERT_HOST (stepcount > 0); + steps = new DIR128[stepcount]; + if (end_index > start_index) { + for (int i = start_index; i < end_index; ++i) + steps[i - start_index] = outline->step_dir(i); + } + else { + int len = outline->pathlength(); + int i = start_index; + for (; i < len; ++i) + steps[i - start_index] = outline->step_dir(i); + if (end_index > 0) + for (; i < end_index + len; ++i) + steps[i - start_index] = outline->step_dir(i - len); + } + other_end = nullptr; + delete close(); +} + + +C_OUTLINE_FRAG::C_OUTLINE_FRAG( //record fragment + C_OUTLINE_FRAG *head, //other end + int16_t tail_y) { + ycoord = tail_y; + other_end = head; + start = head->start; + end = head->end; + steps = nullptr; + stepcount = 0; +} + + +/********************************************************************** + * add_frag_to_list + * + * Insert the fragment in the list at the appropriate place to keep + * them in ascending ycoord order. + **********************************************************************/ + +static +void add_frag_to_list( //ordered add + C_OUTLINE_FRAG *frag, //fragment to add + C_OUTLINE_FRAG_LIST *frags //fragment list + ) { + //output list + C_OUTLINE_FRAG_IT frag_it = frags; + + if (!frags->empty ()) { + for (frag_it.mark_cycle_pt (); !frag_it.cycled_list (); + frag_it.forward ()) { + if (frag_it.data ()->ycoord > frag->ycoord + || (frag_it.data ()->ycoord == frag->ycoord + && frag->other_end->ycoord < frag->ycoord)) { + frag_it.add_before_then_move (frag); + return; + } + } + } + frag_it.add_to_end (frag); +} + + +/********************************************************************** + * close_chopped_cfragments + * + * Clear the given list of fragments joining them up into outlines. + * Each outline made soaks up any of the child outlines which it encloses. + **********************************************************************/ + +static +void close_chopped_cfragments( //chop the outline + C_OUTLINE_FRAG_LIST *frags, //list to clear + C_OUTLINE_LIST *children, //potential children + float pitch_error, //allowed shrinkage + C_OUTLINE_IT *dest_it //output list + ) { + //iterator + C_OUTLINE_FRAG_IT frag_it = frags; + C_OUTLINE_FRAG *bottom_frag; //bottom of cut + C_OUTLINE_FRAG *top_frag; //top of cut + C_OUTLINE *outline; //new outline + C_OUTLINE *child; //current child + C_OUTLINE_IT child_it = children; + C_OUTLINE_IT olchild_it; //children of outline + + while (!frag_it.empty()) { + frag_it.move_to_first(); + // get bottom one + bottom_frag = frag_it.extract(); + frag_it.forward(); + top_frag = frag_it.data(); // look at next + if ((bottom_frag->steps == nullptr && top_frag->steps == nullptr) + || (bottom_frag->steps != nullptr && top_frag->steps != nullptr)) { + if (frag_it.data_relative(1)->ycoord == top_frag->ycoord) + frag_it.forward(); + } + top_frag = frag_it.extract(); + if (top_frag->other_end != bottom_frag) { + outline = join_chopped_fragments(bottom_frag, top_frag); + ASSERT_HOST(outline == nullptr); + } else { + outline = join_chopped_fragments(bottom_frag, top_frag); + if (outline != nullptr) { + olchild_it.set_to_list(outline->child()); + for (child_it.mark_cycle_pt(); !child_it.cycled_list(); + child_it.forward()) { + child = child_it.data(); + if (*child < *outline) + olchild_it.add_to_end(child_it.extract()); + } + if (outline->bounding_box().width() > pitch_error) + dest_it->add_after_then_move(outline); + else + delete outline; // Make it disappear. + } + } + } + while (!child_it.empty ()) { + dest_it->add_after_then_move (child_it.extract ()); + child_it.forward (); + } +} + + +/********************************************************************** + * join_chopped_fragments + * + * Join the two lists of POLYPTs such that neither OUTLINE_FRAG + * operand keeps responsibility for the fragment. + **********************************************************************/ + +static +C_OUTLINE *join_chopped_fragments( //join pieces + C_OUTLINE_FRAG *bottom, //bottom of cut + C_OUTLINE_FRAG *top //top of cut + ) { + C_OUTLINE *outline; //closed loop + + if (bottom->other_end == top) { + if (bottom->steps == nullptr) + outline = top->close (); //turn to outline + else + outline = bottom->close (); + delete top; + delete bottom; + return outline; + } + if (bottom->steps == nullptr) { + ASSERT_HOST (top->steps != nullptr); + join_segments (bottom->other_end, top); + } + else { + ASSERT_HOST (top->steps == nullptr); + join_segments (top->other_end, bottom); + } + top->other_end->other_end = bottom->other_end; + bottom->other_end->other_end = top->other_end; + delete bottom; + delete top; + return nullptr; +} + +/********************************************************************** + * join_segments + * + * Join the two edgestep fragments such that the second comes after + * the first and the gap between them is closed. + **********************************************************************/ + +static +void join_segments( //join pieces + C_OUTLINE_FRAG *bottom, //bottom of cut + C_OUTLINE_FRAG *top //top of cut + ) { + DIR128 *steps; //new steps + int32_t stepcount; //no of steps + int16_t fake_count; //fake steps + DIR128 fake_step; //step entry + + ASSERT_HOST (bottom->end.x () == top->start.x ()); + fake_count = top->start.y () - bottom->end.y (); + if (fake_count < 0) { + fake_count = -fake_count; + fake_step = 32; + } + else + fake_step = 96; + + stepcount = bottom->stepcount + fake_count + top->stepcount; + steps = new DIR128[stepcount]; + memmove (steps, bottom->steps, bottom->stepcount); + memset (steps + bottom->stepcount, fake_step.get_dir(), fake_count); + memmove (steps + bottom->stepcount + fake_count, top->steps, + top->stepcount); + delete [] bottom->steps; + bottom->steps = steps; + bottom->stepcount = stepcount; + bottom->end = top->end; + bottom->other_end->end = top->end; +} + + +/********************************************************************** + * C_OUTLINE_FRAG::close + * + * Join the ends of this fragment and turn it into an outline. + **********************************************************************/ + +C_OUTLINE *C_OUTLINE_FRAG::close() { //join pieces + DIR128 *new_steps; //new steps + int32_t new_stepcount; //no of steps + int16_t fake_count; //fake steps + DIR128 fake_step; //step entry + + ASSERT_HOST (start.x () == end.x ()); + fake_count = start.y () - end.y (); + if (fake_count < 0) { + fake_count = -fake_count; + fake_step = 32; + } + else + fake_step = 96; + + new_stepcount = stepcount + fake_count; + if (new_stepcount > C_OUTLINE::kMaxOutlineLength) + return nullptr; // Can't join them + new_steps = new DIR128[new_stepcount]; + memmove(new_steps, steps, stepcount); + memset (new_steps + stepcount, fake_step.get_dir(), fake_count); + auto* result = new C_OUTLINE (start, new_steps, new_stepcount); + delete [] new_steps; + return result; +} + + +/********************************************************************** + * C_OUTLINE_FRAG::operator= + * + * Copy this fragment. + **********************************************************************/ + + //join pieces +C_OUTLINE_FRAG & C_OUTLINE_FRAG::operator= ( +const C_OUTLINE_FRAG & src //fragment to copy +) { + delete [] steps; + + stepcount = src.stepcount; + steps = new DIR128[stepcount]; + memmove (steps, src.steps, stepcount); + start = src.start; + end = src.end; + ycoord = src.ycoord; + return *this; +} + +} // namespace tesseract diff --git a/tesseract/src/textord/fpchop.h b/tesseract/src/textord/fpchop.h new file mode 100644 index 00000000..cc938ba9 --- /dev/null +++ b/tesseract/src/textord/fpchop.h @@ -0,0 +1,84 @@ +/********************************************************************** + * File: fpchop.h (Formerly fp_chop.h) + * Description: Code to chop fixed pitch text into character cells. + * Author: Ray Smith + * + * (C) Copyright 1993, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef FPCHOP_H +#define FPCHOP_H + +#include "params.h" +#include "blobbox.h" + +namespace tesseract { + +class C_OUTLINE_FRAG : public ELIST_LINK +{ + public: + C_OUTLINE_FRAG() { //empty constructor + steps = nullptr; + stepcount = 0; + } + ~C_OUTLINE_FRAG () { + delete [] steps; + } + //start coord + C_OUTLINE_FRAG(ICOORD start_pt, + ICOORD end_pt, //end coord + C_OUTLINE *outline, //source of steps + int16_t start_index, + int16_t end_index); + //other end + C_OUTLINE_FRAG(C_OUTLINE_FRAG *head, int16_t tail_y); + C_OUTLINE *close(); //copy to outline + C_OUTLINE_FRAG & operator= ( //assign + const C_OUTLINE_FRAG & src); + + ICOORD start; //start coord + ICOORD end; //end coord + DIR128 *steps; //step array + int32_t stepcount; //no of steps + C_OUTLINE_FRAG *other_end; //head if a tail + int16_t ycoord; //coord of cut pt + + private: + // Copy constructor (currently unused, therefore private). + C_OUTLINE_FRAG(const C_OUTLINE_FRAG& other); +}; + +ELISTIZEH(C_OUTLINE_FRAG) + +extern +INT_VAR_H (textord_fp_chop_error, 2, +"Max allowed bending of chop cells"); +extern +double_VAR_H (textord_fp_chop_snap, 0.5, +"Max distance of chop pt from vertex"); + +ROW *fixed_pitch_words( //find lines + TO_ROW *row, //row to do + FCOORD rotation //for drawing + ); + +void split_to_blob( //split the blob + BLOBNBOX *blob, //blob to split + int16_t chop_coord, //place to chop + float pitch_error, //allowed deviation + C_OUTLINE_LIST *left_coutlines, //for cblobs + C_OUTLINE_LIST *right_coutlines); + +} // namespace tesseract + +#endif diff --git a/tesseract/src/textord/gap_map.cpp b/tesseract/src/textord/gap_map.cpp new file mode 100644 index 00000000..e31328f8 --- /dev/null +++ b/tesseract/src/textord/gap_map.cpp @@ -0,0 +1,189 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "gap_map.h" + +#include "statistc.h" + +namespace tesseract { + +BOOL_VAR(gapmap_debug, false, "Say which blocks have tables"); +BOOL_VAR(gapmap_use_ends, false, "Use large space at start and end of rows"); +BOOL_VAR(gapmap_no_isolated_quanta, false, +"Ensure gaps not less than 2quanta wide"); +double_VAR(gapmap_big_gaps, 1.75, "xht multiplier"); + +/************************************************************************* + * A block gap map is a quantised histogram of whitespace regions in the + * block. It is a vertical projection of wide gaps WITHIN lines + * + * The map is held as an array of counts of rows which have a wide gap + * covering that region of the row. Each bucket in the map represents a width + * of about half an xheight - (The median of the xhts in the rows is used.) + * + * The block is considered RECTANGULAR - delimited by the left and right + * extremes of the rows in the block. However, ONLY wide gaps WITHIN a row are + * counted. + * + *************************************************************************/ + +GAPMAP::GAPMAP( //Constructor + TO_BLOCK *block //block + ) { + TO_ROW *row; //current row + BLOBNBOX_IT blob_it; //iterator + TBOX blob_box; + TBOX prev_blob_box; + int16_t gap_width; + int16_t start_of_row; + int16_t end_of_row; + STATS xht_stats (0, 128); + int16_t min_quantum; + int16_t max_quantum; + int16_t i; + + /* + Find left and right extremes and bucket size + */ + map = nullptr; + min_left = INT16_MAX; + max_right = -INT16_MAX; + total_rows = 0; + any_tabs = false; + + // row iterator + TO_ROW_IT row_it(block->get_rows()); + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { + row = row_it.data (); + if (!row->blob_list ()->empty ()) { + total_rows++; + xht_stats.add (static_cast<int16_t>(floor (row->xheight + 0.5)), 1); + blob_it.set_to_list (row->blob_list ()); + start_of_row = blob_it.data ()->bounding_box ().left (); + end_of_row = blob_it.data_relative (-1)->bounding_box ().right (); + if (min_left > start_of_row) + min_left = start_of_row; + if (max_right < end_of_row) + max_right = end_of_row; + } + } + if ((total_rows < 3) || (min_left >= max_right)) { + bucket_size = 0; + map_max = 0; + total_rows = 0; + min_left = max_right = 0; + return; + } + bucket_size = static_cast<int16_t>(floor (xht_stats.median () + 0.5)) / 2; + map_max = (max_right - min_left) / bucket_size; + map = new int16_t[map_max + 1]; + for (i = 0; i <= map_max; i++) + map[i] = 0; + + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { + row = row_it.data (); + if (!row->blob_list ()->empty ()) { + blob_it.set_to_list (row->blob_list ()); + blob_it.mark_cycle_pt (); + blob_box = box_next (&blob_it); + prev_blob_box = blob_box; + if (gapmap_use_ends) { + /* Leading space */ + gap_width = blob_box.left () - min_left; + if ((gap_width > gapmap_big_gaps * row->xheight) + && gap_width > 2) { + max_quantum = (blob_box.left () - min_left) / bucket_size; + if (max_quantum > map_max) max_quantum = map_max; + for (i = 0; i <= max_quantum; i++) + map[i]++; + } + } + while (!blob_it.cycled_list ()) { + blob_box = box_next (&blob_it); + gap_width = blob_box.left () - prev_blob_box.right (); + if ((gap_width > gapmap_big_gaps * row->xheight) + && gap_width > 2) { + min_quantum = + (prev_blob_box.right () - min_left) / bucket_size; + max_quantum = (blob_box.left () - min_left) / bucket_size; + if (max_quantum > map_max) max_quantum = map_max; + for (i = min_quantum; i <= max_quantum; i++) + map[i]++; + } + prev_blob_box = blob_box; + } + if (gapmap_use_ends) { + /* Trailing space */ + gap_width = max_right - prev_blob_box.right (); + if ((gap_width > gapmap_big_gaps * row->xheight) + && gap_width > 2) { + min_quantum = + (prev_blob_box.right () - min_left) / bucket_size; + if (min_quantum < 0) min_quantum = 0; + for (i = min_quantum; i <= map_max; i++) + map[i]++; + } + } + } + } + for (i = 0; i <= map_max; i++) { + if (map[i] > total_rows / 2) { + if (gapmap_no_isolated_quanta && + (((i == 0) && + (map[i + 1] <= total_rows / 2)) || + ((i == map_max) && + (map[i - 1] <= total_rows / 2)) || + ((i > 0) && + (i < map_max) && + (map[i - 1] <= total_rows / 2) && + (map[i + 1] <= total_rows / 2)))) { + map[i] = 0; //prevent isolated quantum + } + else + any_tabs = true; + } + } + if (gapmap_debug && any_tabs) + tprintf ("Table found\n"); +} + + +/************************************************************************* + * GAPMAP::table_gap() + * Is there a bucket in the specified range where more than half the rows in the + * block have a wide gap? + *************************************************************************/ + +bool GAPMAP::table_gap( //Is gap a table? + int16_t left, //From here + int16_t right //To here +) { + int16_t min_quantum; + int16_t max_quantum; + int16_t i; + bool tab_found = false; + + if (!any_tabs) + return false; + + min_quantum = (left - min_left) / bucket_size; + max_quantum = (right - min_left) / bucket_size; + // Clip to the bounds of the array. In some circumstances (big blob followed + // by small blob) max_quantum can exceed the map_max bounds, but we clip + // here instead, as it provides better long-term safety. + if (min_quantum < 0) min_quantum = 0; + if (max_quantum > map_max) max_quantum = map_max; + for (i = min_quantum; (!tab_found && (i <= max_quantum)); i++) + if (map[i] > total_rows / 2) + tab_found = true; + return tab_found; +} + +} // namespace tesseract diff --git a/tesseract/src/textord/gap_map.h b/tesseract/src/textord/gap_map.h new file mode 100644 index 00000000..7ed9aae6 --- /dev/null +++ b/tesseract/src/textord/gap_map.h @@ -0,0 +1,53 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef GAP_MAP_H +#define GAP_MAP_H + +#include "blobbox.h" + +namespace tesseract { + +class GAPMAP +{ + public: + GAPMAP( //constructor + TO_BLOCK *block); + + ~GAPMAP () { //destructor + delete[] map; + } + + bool table_gap( //Is gap a table? + int16_t left, //From here + int16_t right); //To here + + private: + int16_t total_rows; //in block + int16_t min_left; //Left extreme + int16_t max_right; //Right extreme + int16_t bucket_size; // half an x ht + int16_t *map; //empty counts + int16_t map_max; //map[0..max_map] defined + bool any_tabs; +}; + +/*-----------------------------*/ + +extern BOOL_VAR_H (gapmap_debug, false, "Say which blocks have tables"); +extern BOOL_VAR_H (gapmap_use_ends, false, +"Use large space at start and end of rows"); +extern BOOL_VAR_H (gapmap_no_isolated_quanta, false, +"Ensure gaps not less than 2quanta wide"); +extern double_VAR_H (gapmap_big_gaps, 1.75, "xht multiplier"); + +} // namespace tesseract + +#endif diff --git a/tesseract/src/textord/imagefind.cpp b/tesseract/src/textord/imagefind.cpp new file mode 100644 index 00000000..dc5f19b9 --- /dev/null +++ b/tesseract/src/textord/imagefind.cpp @@ -0,0 +1,1366 @@ +/////////////////////////////////////////////////////////////////////// +// File: imagefind.cpp +// Description: Function to find image and drawing regions in an image +// and create a corresponding list of empty blobs. +// Author: Ray Smith +// +// (C) Copyright 2008, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#include "imagefind.h" + +#include "colpartitiongrid.h" +#include "linlsq.h" +#include "statistc.h" +#include "params.h" + +#include "allheaders.h" + +#include <algorithm> + +namespace tesseract { + +static INT_VAR(textord_tabfind_show_images, false, "Show image blobs"); + +// Fraction of width or height of on pixels that can be discarded from a +// roughly rectangular image. +const double kMinRectangularFraction = 0.125; +// Fraction of width or height to consider image completely used. +const double kMaxRectangularFraction = 0.75; +// Fraction of width or height to allow transition from kMinRectangularFraction +// to kMaxRectangularFraction, equivalent to a dy/dx skew. +const double kMaxRectangularGradient = 0.1; // About 6 degrees. +// Minimum image size to be worth looking for images on. +const int kMinImageFindSize = 100; +// Scale factor for the rms color fit error. +const double kRMSFitScaling = 8.0; +// Min color difference to call it two colors. +const int kMinColorDifference = 16; +// Pixel padding for noise blobs and partitions when rendering on the image +// mask to encourage them to join together. Make it too big and images +// will fatten out too much and have to be clipped to text. +const int kNoisePadding = 4; + +// Finds image regions within the BINARY source pix (page image) and returns +// the image regions as a mask image. +// The returned pix may be nullptr, meaning no images found. +// If not nullptr, it must be PixDestroyed by the caller. +// If textord_tabfind_show_images, debug images are appended to pixa_debug. +Pix* ImageFind::FindImages(Pix* pix, DebugPixa* pixa_debug) { + // Not worth looking at small images. + if (pixGetWidth(pix) < kMinImageFindSize || + pixGetHeight(pix) < kMinImageFindSize) + return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1); + + // Reduce by factor 2. + Pix *pixr = pixReduceRankBinaryCascade(pix, 1, 0, 0, 0); + if (textord_tabfind_show_images && pixa_debug != nullptr) + pixa_debug->AddPix(pixr, "CascadeReduced"); + + // Get the halftone mask directly from Leptonica. + // + // Leptonica will print an error message and return nullptr if we call + // pixGenHalftoneMask(pixr, nullptr, ...) with too small image, so we + // want to bypass that. + if (pixGetWidth(pixr) < kMinImageFindSize || + pixGetHeight(pixr) < kMinImageFindSize) { + pixDestroy(&pixr); + return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1); + } + // Get the halftone mask. + l_int32 ht_found = 0; + Pixa* pixadb = (textord_tabfind_show_images && pixa_debug != nullptr) + ? pixaCreate(0) + : nullptr; + Pix* pixht2 = pixGenerateHalftoneMask(pixr, nullptr, &ht_found, pixadb); + if (pixadb) { + Pix* pixdb = pixaDisplayTiledInColumns(pixadb, 3, 1.0, 20, 2); + if (textord_tabfind_show_images && pixa_debug != nullptr) + pixa_debug->AddPix(pixdb, "HalftoneMask"); + pixDestroy(&pixdb); + pixaDestroy(&pixadb); + } + pixDestroy(&pixr); + if (!ht_found && pixht2 != nullptr) + pixDestroy(&pixht2); + if (pixht2 == nullptr) + return pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1); + + // Expand back up again. + Pix *pixht = pixExpandReplicate(pixht2, 2); + if (textord_tabfind_show_images && pixa_debug != nullptr) + pixa_debug->AddPix(pixht, "HalftoneReplicated"); + pixDestroy(&pixht2); + + // Fill to capture pixels near the mask edges that were missed + Pix *pixt = pixSeedfillBinary(nullptr, pixht, pix, 8); + pixOr(pixht, pixht, pixt); + pixDestroy(&pixt); + + // Eliminate lines and bars that may be joined to images. + Pix* pixfinemask = pixReduceRankBinaryCascade(pixht, 1, 1, 3, 3); + pixDilateBrick(pixfinemask, pixfinemask, 5, 5); + if (textord_tabfind_show_images && pixa_debug != nullptr) + pixa_debug->AddPix(pixfinemask, "FineMask"); + Pix* pixreduced = pixReduceRankBinaryCascade(pixht, 1, 1, 1, 1); + Pix* pixreduced2 = pixReduceRankBinaryCascade(pixreduced, 3, 3, 3, 0); + pixDestroy(&pixreduced); + pixDilateBrick(pixreduced2, pixreduced2, 5, 5); + Pix* pixcoarsemask = pixExpandReplicate(pixreduced2, 8); + pixDestroy(&pixreduced2); + if (textord_tabfind_show_images && pixa_debug != nullptr) + pixa_debug->AddPix(pixcoarsemask, "CoarseMask"); + // Combine the coarse and fine image masks. + pixAnd(pixcoarsemask, pixcoarsemask, pixfinemask); + pixDestroy(&pixfinemask); + // Dilate a bit to make sure we get everything. + pixDilateBrick(pixcoarsemask, pixcoarsemask, 3, 3); + Pix* pixmask = pixExpandReplicate(pixcoarsemask, 16); + pixDestroy(&pixcoarsemask); + if (textord_tabfind_show_images && pixa_debug != nullptr) + pixa_debug->AddPix(pixmask, "MaskDilated"); + // And the image mask with the line and bar remover. + pixAnd(pixht, pixht, pixmask); + pixDestroy(&pixmask); + if (textord_tabfind_show_images && pixa_debug != nullptr) + pixa_debug->AddPix(pixht, "FinalMask"); + // Make the result image the same size as the input. + Pix* result = pixCreate(pixGetWidth(pix), pixGetHeight(pix), 1); + pixOr(result, result, pixht); + pixDestroy(&pixht); + return result; +} + +// Generates a Boxa, Pixa pair from the input binary (image mask) pix, +// analogous to pixConnComp, except that connected components which are nearly +// rectangular are replaced with solid rectangles. +// The returned boxa, pixa may be nullptr, meaning no images found. +// If not nullptr, they must be destroyed by the caller. +// Resolution of pix should match the source image (Tesseract::pix_binary_) +// so the output coordinate systems match. +void ImageFind::ConnCompAndRectangularize(Pix* pix, DebugPixa* pixa_debug, + Boxa** boxa, Pixa** pixa) { + *boxa = nullptr; + *pixa = nullptr; + + if (textord_tabfind_show_images && pixa_debug != nullptr) + pixa_debug->AddPix(pix, "Conncompimage"); + // Find the individual image regions in the mask image. + *boxa = pixConnComp(pix, pixa, 8); + // Rectangularize the individual images. If a sharp edge in vertical and/or + // horizontal occupancy can be found, it indicates a probably rectangular + // image with unwanted bits merged on, so clip to the approximate rectangle. + int npixes = 0; + if (*boxa != nullptr && *pixa != nullptr) npixes = pixaGetCount(*pixa); + for (int i = 0; i < npixes; ++i) { + int x_start, x_end, y_start, y_end; + Pix* img_pix = pixaGetPix(*pixa, i, L_CLONE); + if (textord_tabfind_show_images && pixa_debug != nullptr) + pixa_debug->AddPix(img_pix, "A component"); + if (pixNearlyRectangular(img_pix, kMinRectangularFraction, + kMaxRectangularFraction, + kMaxRectangularGradient, + &x_start, &y_start, &x_end, &y_end)) { + Pix* simple_pix = pixCreate(x_end - x_start, y_end - y_start, 1); + pixSetAll(simple_pix); + pixDestroy(&img_pix); + // pixaReplacePix takes ownership of the simple_pix. + pixaReplacePix(*pixa, i, simple_pix, nullptr); + img_pix = pixaGetPix(*pixa, i, L_CLONE); + // Fix the box to match the new pix. + l_int32 x, y, width, height; + boxaGetBoxGeometry(*boxa, i, &x, &y, &width, &height); + Box* simple_box = boxCreate(x + x_start, y + y_start, + x_end - x_start, y_end - y_start); + boxaReplaceBox(*boxa, i, simple_box); + } + pixDestroy(&img_pix); + } +} + +// Scans horizontally on x=[x_start,x_end), starting with y=*y_start, +// stepping y+=y_step, until y=y_end. *ystart is input/output. +// If the number of black pixels in a row, pix_count fits this pattern: +// 0 or more rows with pix_count < min_count then +// <= mid_width rows with min_count <= pix_count <= max_count then +// a row with pix_count > max_count then +// true is returned, and *y_start = the first y with pix_count >= min_count. +static bool HScanForEdge(uint32_t* data, int wpl, int x_start, int x_end, + int min_count, int mid_width, int max_count, + int y_end, int y_step, int* y_start) { + int mid_rows = 0; + for (int y = *y_start; y != y_end; y += y_step) { + // Need pixCountPixelsInRow(pix, y, &pix_count, nullptr) to count in a subset. + int pix_count = 0; + uint32_t* line = data + wpl * y; + for (int x = x_start; x < x_end; ++x) { + if (GET_DATA_BIT(line, x)) + ++pix_count; + } + if (mid_rows == 0 && pix_count < min_count) + continue; // In the min phase. + if (mid_rows == 0) + *y_start = y; // Save the y_start where we came out of the min phase. + if (pix_count > max_count) + return true; // Found the pattern. + ++mid_rows; + if (mid_rows > mid_width) + break; // Middle too big. + } + return false; // Never found max_count. +} + +// Scans vertically on y=[y_start,y_end), starting with x=*x_start, +// stepping x+=x_step, until x=x_end. *x_start is input/output. +// If the number of black pixels in a column, pix_count fits this pattern: +// 0 or more cols with pix_count < min_count then +// <= mid_width cols with min_count <= pix_count <= max_count then +// a column with pix_count > max_count then +// true is returned, and *x_start = the first x with pix_count >= min_count. +static bool VScanForEdge(uint32_t* data, int wpl, int y_start, int y_end, + int min_count, int mid_width, int max_count, + int x_end, int x_step, int* x_start) { + int mid_cols = 0; + for (int x = *x_start; x != x_end; x += x_step) { + int pix_count = 0; + uint32_t* line = data + y_start * wpl; + for (int y = y_start; y < y_end; ++y, line += wpl) { + if (GET_DATA_BIT(line, x)) + ++pix_count; + } + if (mid_cols == 0 && pix_count < min_count) + continue; // In the min phase. + if (mid_cols == 0) + *x_start = x; // Save the place where we came out of the min phase. + if (pix_count > max_count) + return true; // found the pattern. + ++mid_cols; + if (mid_cols > mid_width) + break; // Middle too big. + } + return false; // Never found max_count. +} + +// Returns true if there is a rectangle in the source pix, such that all +// pixel rows and column slices outside of it have less than +// min_fraction of the pixels black, and within max_skew_gradient fraction +// of the pixels on the inside, there are at least max_fraction of the +// pixels black. In other words, the inside of the rectangle looks roughly +// rectangular, and the outside of it looks like extra bits. +// On return, the rectangle is defined by x_start, y_start, x_end and y_end. +// Note: the algorithm is iterative, allowing it to slice off pixels from +// one edge, allowing it to then slice off more pixels from another edge. +bool ImageFind::pixNearlyRectangular(Pix* pix, + double min_fraction, double max_fraction, + double max_skew_gradient, + int* x_start, int* y_start, + int* x_end, int* y_end) { + ASSERT_HOST(pix != nullptr); + *x_start = 0; + *x_end = pixGetWidth(pix); + *y_start = 0; + *y_end = pixGetHeight(pix); + + uint32_t* data = pixGetData(pix); + int wpl = pixGetWpl(pix); + bool any_cut = false; + bool left_done = false; + bool right_done = false; + bool top_done = false; + bool bottom_done = false; + do { + any_cut = false; + // Find the top/bottom edges. + int width = *x_end - *x_start; + int min_count = static_cast<int>(width * min_fraction); + int max_count = static_cast<int>(width * max_fraction); + int edge_width = static_cast<int>(width * max_skew_gradient); + if (HScanForEdge(data, wpl, *x_start, *x_end, min_count, edge_width, + max_count, *y_end, 1, y_start) && !top_done) { + top_done = true; + any_cut = true; + } + --(*y_end); + if (HScanForEdge(data, wpl, *x_start, *x_end, min_count, edge_width, + max_count, *y_start, -1, y_end) && !bottom_done) { + bottom_done = true; + any_cut = true; + } + ++(*y_end); + + // Find the left/right edges. + int height = *y_end - *y_start; + min_count = static_cast<int>(height * min_fraction); + max_count = static_cast<int>(height * max_fraction); + edge_width = static_cast<int>(height * max_skew_gradient); + if (VScanForEdge(data, wpl, *y_start, *y_end, min_count, edge_width, + max_count, *x_end, 1, x_start) && !left_done) { + left_done = true; + any_cut = true; + } + --(*x_end); + if (VScanForEdge(data, wpl, *y_start, *y_end, min_count, edge_width, + max_count, *x_start, -1, x_end) && !right_done) { + right_done = true; + any_cut = true; + } + ++(*x_end); + } while (any_cut); + + // All edges must satisfy the condition of sharp gradient in pixel density + // in order for the full rectangle to be present. + return left_done && right_done && top_done && bottom_done; +} + +// Given an input pix, and a bounding rectangle, the sides of the rectangle +// are shrunk inwards until they bound any black pixels found within the +// original rectangle. Returns false if the rectangle contains no black +// pixels at all. +bool ImageFind::BoundsWithinRect(Pix* pix, int* x_start, int* y_start, + int* x_end, int* y_end) { + Box* input_box = boxCreate(*x_start, *y_start, *x_end - *x_start, + *y_end - *y_start); + Box* output_box = nullptr; + pixClipBoxToForeground(pix, input_box, nullptr, &output_box); + bool result = output_box != nullptr; + if (result) { + l_int32 x, y, width, height; + boxGetGeometry(output_box, &x, &y, &width, &height); + *x_start = x; + *y_start = y; + *x_end = x + width; + *y_end = y + height; + boxDestroy(&output_box); + } + boxDestroy(&input_box); + return result; +} + +// Given a point in 3-D (RGB) space, returns the squared Euclidean distance +// of the point from the given line, defined by a pair of points in the 3-D +// (RGB) space, line1 and line2. +double ImageFind::ColorDistanceFromLine(const uint8_t* line1, + const uint8_t* line2, + const uint8_t* point) { + int line_vector[kRGBRMSColors]; + int point_vector[kRGBRMSColors]; + for (int i = 0; i < kRGBRMSColors; ++i) { + line_vector[i] = static_cast<int>(line2[i]) - static_cast<int>(line1[i]); + point_vector[i] = static_cast<int>(point[i]) - static_cast<int>(line1[i]); + } + line_vector[L_ALPHA_CHANNEL] = 0; + // Now the cross product in 3d. + int cross[kRGBRMSColors]; + cross[COLOR_RED] = line_vector[COLOR_GREEN] * point_vector[COLOR_BLUE] + - line_vector[COLOR_BLUE] * point_vector[COLOR_GREEN]; + cross[COLOR_GREEN] = line_vector[COLOR_BLUE] * point_vector[COLOR_RED] + - line_vector[COLOR_RED] * point_vector[COLOR_BLUE]; + cross[COLOR_BLUE] = line_vector[COLOR_RED] * point_vector[COLOR_GREEN] + - line_vector[COLOR_GREEN] * point_vector[COLOR_RED]; + cross[L_ALPHA_CHANNEL] = 0; + // Now the sums of the squares. + double cross_sq = 0.0; + double line_sq = 0.0; + for (int j = 0; j < kRGBRMSColors; ++j) { + cross_sq += static_cast<double>(cross[j]) * cross[j]; + line_sq += static_cast<double>(line_vector[j]) * line_vector[j]; + } + if (line_sq == 0.0) { + return 0.0; + } + return cross_sq / line_sq; // This is the squared distance. +} + + +// Returns the leptonica combined code for the given RGB triplet. +uint32_t ImageFind::ComposeRGB(uint32_t r, uint32_t g, uint32_t b) { + l_uint32 result; + composeRGBPixel(r, g, b, &result); + return result; +} + +// Returns the input value clipped to a uint8_t. +uint8_t ImageFind::ClipToByte(double pixel) { + if (pixel < 0.0) + return 0; + else if (pixel >= 255.0) + return 255; + return static_cast<uint8_t>(pixel); +} + +// Computes the light and dark extremes of color in the given rectangle of +// the given pix, which is factor smaller than the coordinate system in rect. +// The light and dark points are taken to be the upper and lower 8th-ile of +// the most deviant of R, G and B. The value of the other 2 channels are +// computed by linear fit against the most deviant. +// The colors of the two points are returned in color1 and color2, with the +// alpha channel set to a scaled mean rms of the fits. +// If color_map1 is not null then it and color_map2 get rect pasted in them +// with the two calculated colors, and rms map gets a pasted rect of the rms. +// color_map1, color_map2 and rms_map are assumed to be the same scale as pix. +void ImageFind::ComputeRectangleColors(const TBOX& rect, Pix* pix, int factor, + Pix* color_map1, Pix* color_map2, + Pix* rms_map, + uint8_t* color1, uint8_t* color2) { + ASSERT_HOST(pix != nullptr && pixGetDepth(pix) == 32); + // Pad the rectangle outwards by 2 (scaled) pixels if possible to get more + // background. + int width = pixGetWidth(pix); + int height = pixGetHeight(pix); + int left_pad = std::max(rect.left() - 2 * factor, 0) / factor; + int top_pad = (rect.top() + 2 * factor + (factor - 1)) / factor; + top_pad = std::min(height, top_pad); + int right_pad = (rect.right() + 2 * factor + (factor - 1)) / factor; + right_pad = std::min(width, right_pad); + int bottom_pad = std::max(rect.bottom() - 2 * factor, 0) / factor; + int width_pad = right_pad - left_pad; + int height_pad = top_pad - bottom_pad; + if (width_pad < 1 || height_pad < 1 || width_pad + height_pad < 4) + return; + // Now crop the pix to the rectangle. + Box* scaled_box = boxCreate(left_pad, height - top_pad, + width_pad, height_pad); + Pix* scaled = pixClipRectangle(pix, scaled_box, nullptr); + + // Compute stats over the whole image. + STATS red_stats(0, 256); + STATS green_stats(0, 256); + STATS blue_stats(0, 256); + uint32_t* data = pixGetData(scaled); + ASSERT_HOST(pixGetWpl(scaled) == width_pad); + for (int y = 0; y < height_pad; ++y) { + for (int x = 0; x < width_pad; ++x, ++data) { + int r = GET_DATA_BYTE(data, COLOR_RED); + int g = GET_DATA_BYTE(data, COLOR_GREEN); + int b = GET_DATA_BYTE(data, COLOR_BLUE); + red_stats.add(r, 1); + green_stats.add(g, 1); + blue_stats.add(b, 1); + } + } + // Find the RGB component with the greatest 8th-ile-range. + // 8th-iles are used instead of quartiles to get closer to the true + // foreground color, which is going to be faint at best because of the + // pre-scaling of the input image. + int best_l8 = static_cast<int>(red_stats.ile(0.125f)); + int best_u8 = static_cast<int>(ceil(red_stats.ile(0.875f))); + int best_i8r = best_u8 - best_l8; + int x_color = COLOR_RED; + int y1_color = COLOR_GREEN; + int y2_color = COLOR_BLUE; + int l8 = static_cast<int>(green_stats.ile(0.125f)); + int u8 = static_cast<int>(ceil(green_stats.ile(0.875f))); + if (u8 - l8 > best_i8r) { + best_i8r = u8 - l8; + best_l8 = l8; + best_u8 = u8; + x_color = COLOR_GREEN; + y1_color = COLOR_RED; + } + l8 = static_cast<int>(blue_stats.ile(0.125f)); + u8 = static_cast<int>(ceil(blue_stats.ile(0.875f))); + if (u8 - l8 > best_i8r) { + best_i8r = u8 - l8; + best_l8 = l8; + best_u8 = u8; + x_color = COLOR_BLUE; + y1_color = COLOR_GREEN; + y2_color = COLOR_RED; + } + if (best_i8r >= kMinColorDifference) { + LLSQ line1; + LLSQ line2; + uint32_t* data = pixGetData(scaled); + for (int im_y = 0; im_y < height_pad; ++im_y) { + for (int im_x = 0; im_x < width_pad; ++im_x, ++data) { + int x = GET_DATA_BYTE(data, x_color); + int y1 = GET_DATA_BYTE(data, y1_color); + int y2 = GET_DATA_BYTE(data, y2_color); + line1.add(x, y1); + line2.add(x, y2); + } + } + double m1 = line1.m(); + double c1 = line1.c(m1); + double m2 = line2.m(); + double c2 = line2.c(m2); + double rms = line1.rms(m1, c1) + line2.rms(m2, c2); + rms *= kRMSFitScaling; + // Save the results. + color1[x_color] = ClipToByte(best_l8); + color1[y1_color] = ClipToByte(m1 * best_l8 + c1 + 0.5); + color1[y2_color] = ClipToByte(m2 * best_l8 + c2 + 0.5); + color1[L_ALPHA_CHANNEL] = ClipToByte(rms); + color2[x_color] = ClipToByte(best_u8); + color2[y1_color] = ClipToByte(m1 * best_u8 + c1 + 0.5); + color2[y2_color] = ClipToByte(m2 * best_u8 + c2 + 0.5); + color2[L_ALPHA_CHANNEL] = ClipToByte(rms); + } else { + // There is only one color. + color1[COLOR_RED] = ClipToByte(red_stats.median()); + color1[COLOR_GREEN] = ClipToByte(green_stats.median()); + color1[COLOR_BLUE] = ClipToByte(blue_stats.median()); + color1[L_ALPHA_CHANNEL] = 0; + memcpy(color2, color1, 4); + } + if (color_map1 != nullptr) { + pixSetInRectArbitrary(color_map1, scaled_box, + ComposeRGB(color1[COLOR_RED], + color1[COLOR_GREEN], + color1[COLOR_BLUE])); + pixSetInRectArbitrary(color_map2, scaled_box, + ComposeRGB(color2[COLOR_RED], + color2[COLOR_GREEN], + color2[COLOR_BLUE])); + pixSetInRectArbitrary(rms_map, scaled_box, color1[L_ALPHA_CHANNEL]); + } + pixDestroy(&scaled); + boxDestroy(&scaled_box); +} + +// ================ CUTTING POLYGONAL IMAGES FROM A RECTANGLE ================ +// The following functions are responsible for cutting a polygonal image from +// a rectangle: CountPixelsInRotatedBox, AttemptToShrinkBox, CutChunkFromParts +// with DivideImageIntoParts as the master. +// Problem statement: +// We start with a single connected component from the image mask: we get +// a Pix of the component, and its location on the page (im_box). +// The objective of cutting a polygonal image from its rectangle is to avoid +// interfering text, but not text that completely overlaps the image. +// ------------------------------ ------------------------------ +// | Single input partition | | 1 Cut up output partitions | +// | | ------------------------------ +// Av|oid | Avoid | | +// | | |________________________| +// Int|erfering | Interfering | | +// | | _____|__________________| +// T|ext | Text | | +// | Text-on-image | | Text-on-image | +// ------------------------------ -------------------------- +// DivideImageIntoParts does this by building a ColPartition_LIST (not in the +// grid) with each ColPartition representing one of the rectangles needed, +// starting with a single rectangle for the whole image component, and cutting +// bits out of it with CutChunkFromParts as needed to avoid text. The output +// ColPartitions are supposed to be ordered from top to bottom. + +// The problem is complicated by the fact that we have rotated the coordinate +// system to make text lines horizontal, so if we need to look at the component +// image, we have to rotate the coordinates. Throughout the functions in this +// section im_box is the rectangle representing the image component in the +// rotated page coordinates (where we are building our output ColPartitions), +// rotation is the rotation that we used to get there, and rerotation is the +// rotation required to get back to original page image coordinates. +// To get to coordinates in the component image, pix, we rotate the im_box, +// the point we want to locate, and subtract the rotated point from the top-left +// of the rotated im_box. +// im_box is therefore essential to calculating coordinates within the pix. + +// Returns true if there are no black pixels in between the boxes. +// The im_box must represent the bounding box of the pix in tesseract +// coordinates, which may be negative, due to rotations to make the textlines +// horizontal. The boxes are rotated by rotation, which should undo such +// rotations, before mapping them onto the pix. +bool ImageFind::BlankImageInBetween(const TBOX& box1, const TBOX& box2, + const TBOX& im_box, const FCOORD& rotation, + Pix* pix) { + TBOX search_box(box1); + search_box += box2; + if (box1.x_gap(box2) >= box1.y_gap(box2)) { + if (box1.x_gap(box2) <= 0) + return true; + search_box.set_left(std::min(box1.right(), box2.right())); + search_box.set_right(std::max(box1.left(), box2.left())); + } else { + if (box1.y_gap(box2) <= 0) + return true; + search_box.set_top(std::max(box1.bottom(), box2.bottom())); + search_box.set_bottom(std::min(box1.top(), box2.top())); + } + return CountPixelsInRotatedBox(search_box, im_box, rotation, pix) == 0; +} + +// Returns the number of pixels in box in the pix. +// rotation, pix and im_box are defined in the large comment above. +int ImageFind::CountPixelsInRotatedBox(TBOX box, const TBOX& im_box, + const FCOORD& rotation, Pix* pix) { + // Intersect it with the image box. + box &= im_box; // This is in-place box intersection. + if (box.null_box()) + return 0; + box.rotate(rotation); + TBOX rotated_im_box(im_box); + rotated_im_box.rotate(rotation); + Pix* rect_pix = pixCreate(box.width(), box.height(), 1); + pixRasterop(rect_pix, 0, 0, box.width(), box.height(), + PIX_SRC, pix, box.left() - rotated_im_box.left(), + rotated_im_box.top() - box.top()); + l_int32 result; + pixCountPixels(rect_pix, &result, nullptr); + pixDestroy(&rect_pix); + return result; +} + +// The box given by slice contains some black pixels, but not necessarily +// over the whole box. Shrink the x bounds of slice, but not the y bounds +// until there is at least one black pixel in the outermost columns. +// rotation, rerotation, pix and im_box are defined in the large comment above. +static void AttemptToShrinkBox(const FCOORD& rotation, const FCOORD& rerotation, + const TBOX& im_box, Pix* pix, TBOX* slice) { + TBOX rotated_box(*slice); + rotated_box.rotate(rerotation); + TBOX rotated_im_box(im_box); + rotated_im_box.rotate(rerotation); + int left = rotated_box.left() - rotated_im_box.left(); + int right = rotated_box.right() - rotated_im_box.left(); + int top = rotated_im_box.top() - rotated_box.top(); + int bottom = rotated_im_box.top() - rotated_box.bottom(); + ImageFind::BoundsWithinRect(pix, &left, &top, &right, &bottom); + top = rotated_im_box.top() - top; + bottom = rotated_im_box.top() - bottom; + left += rotated_im_box.left(); + right += rotated_im_box.left(); + rotated_box.set_to_given_coords(left, bottom, right, top); + rotated_box.rotate(rotation); + slice->set_left(rotated_box.left()); + slice->set_right(rotated_box.right()); +} + +// The meat of cutting a polygonal image around text. +// This function covers the general case of cutting a box out of a box +// as shown: +// Input Output +// ------------------------------ ------------------------------ +// | Single input partition | | 1 Cut up output partitions | +// | | ------------------------------ +// | ---------- | --------- ---------- +// | | box | | | 2 | box | 3 | +// | | | | | | is cut | | +// | ---------- | --------- out ---------- +// | | ------------------------------ +// | | | 4 | +// ------------------------------ ------------------------------ +// In the context that this function is used, at most 3 of the above output +// boxes will be created, as the overlapping box is never contained by the +// input. +// The above cutting operation is executed for each element of part_list that +// is overlapped by the input box. Each modified ColPartition is replaced +// in place in the list by the output of the cutting operation in the order +// shown above, so iff no holes are ever created, the output will be in +// top-to-bottom order, but in extreme cases, hole creation is possible. +// In such cases, the output order may cause strange block polygons. +// rotation, rerotation, pix and im_box are defined in the large comment above. +static void CutChunkFromParts(const TBOX& box, const TBOX& im_box, + const FCOORD& rotation, const FCOORD& rerotation, + Pix* pix, ColPartition_LIST* part_list) { + ASSERT_HOST(!part_list->empty()); + ColPartition_IT part_it(part_list); + do { + ColPartition* part = part_it.data(); + TBOX part_box = part->bounding_box(); + if (part_box.overlap(box)) { + // This part must be cut and replaced with the remains. There are + // up to 4 pieces to be made. Start with the first one and use + // add_before_stay_put. For each piece if it has no black pixels + // left, just don't make the box. + // Above box. + if (box.top() < part_box.top()) { + TBOX slice(part_box); + slice.set_bottom(box.top()); + if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation, + pix) > 0) { + AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice); + part_it.add_before_stay_put( + ColPartition::FakePartition(slice, PT_UNKNOWN, BRT_POLYIMAGE, + BTFT_NONTEXT)); + } + } + // Left of box. + if (box.left() > part_box.left()) { + TBOX slice(part_box); + slice.set_right(box.left()); + if (box.top() < part_box.top()) + slice.set_top(box.top()); + if (box.bottom() > part_box.bottom()) + slice.set_bottom(box.bottom()); + if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation, + pix) > 0) { + AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice); + part_it.add_before_stay_put( + ColPartition::FakePartition(slice, PT_UNKNOWN, BRT_POLYIMAGE, + BTFT_NONTEXT)); + } + } + // Right of box. + if (box.right() < part_box.right()) { + TBOX slice(part_box); + slice.set_left(box.right()); + if (box.top() < part_box.top()) + slice.set_top(box.top()); + if (box.bottom() > part_box.bottom()) + slice.set_bottom(box.bottom()); + if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation, + pix) > 0) { + AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice); + part_it.add_before_stay_put( + ColPartition::FakePartition(slice, PT_UNKNOWN, BRT_POLYIMAGE, + BTFT_NONTEXT)); + } + } + // Below box. + if (box.bottom() > part_box.bottom()) { + TBOX slice(part_box); + slice.set_top(box.bottom()); + if (ImageFind::CountPixelsInRotatedBox(slice, im_box, rerotation, + pix) > 0) { + AttemptToShrinkBox(rotation, rerotation, im_box, pix, &slice); + part_it.add_before_stay_put( + ColPartition::FakePartition(slice, PT_UNKNOWN, BRT_POLYIMAGE, + BTFT_NONTEXT)); + } + } + part->DeleteBoxes(); + delete part_it.extract(); + } + part_it.forward(); + } while (!part_it.at_first()); +} + +// Starts with the bounding box of the image component and cuts it up +// so that it doesn't intersect text where possible. +// Strong fully contained horizontal text is marked as text on image, +// and does not cause a division of the image. +// For more detail see the large comment above on cutting polygonal images +// from a rectangle. +// rotation, rerotation, pix and im_box are defined in the large comment above. +static void DivideImageIntoParts(const TBOX& im_box, const FCOORD& rotation, + const FCOORD& rerotation, Pix* pix, + ColPartitionGridSearch* rectsearch, + ColPartition_LIST* part_list) { + // Add the full im_box partition to the list to begin with. + ColPartition* pix_part = ColPartition::FakePartition(im_box, PT_UNKNOWN, + BRT_RECTIMAGE, + BTFT_NONTEXT); + ColPartition_IT part_it(part_list); + part_it.add_after_then_move(pix_part); + + rectsearch->StartRectSearch(im_box); + ColPartition* part; + while ((part = rectsearch->NextRectSearch()) != nullptr) { + TBOX part_box = part->bounding_box(); + if (part_box.contains(im_box) && part->flow() >= BTFT_CHAIN) { + // This image is completely covered by an existing text partition. + for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) { + ColPartition* pix_part = part_it.extract(); + pix_part->DeleteBoxes(); + delete pix_part; + } + } else if (part->flow() == BTFT_STRONG_CHAIN) { + // Text intersects the box. + TBOX overlap_box = part_box.intersection(im_box); + // Intersect it with the image box. + int black_area = ImageFind::CountPixelsInRotatedBox(overlap_box, im_box, + rerotation, pix); + if (black_area * 2 < part_box.area() || !im_box.contains(part_box)) { + // Eat a piece out of the image. + // Pad it so that pieces eaten out look decent. + int padding = part->blob_type() == BRT_VERT_TEXT + ? part_box.width() : part_box.height(); + part_box.set_top(part_box.top() + padding / 2); + part_box.set_bottom(part_box.bottom() - padding / 2); + CutChunkFromParts(part_box, im_box, rotation, rerotation, + pix, part_list); + } else { + // Strong overlap with the black area, so call it text on image. + part->set_flow(BTFT_TEXT_ON_IMAGE); + } + } + if (part_list->empty()) { + break; + } + } +} + +// Search for the rightmost text that overlaps vertically and is to the left +// of the given box, but within the given left limit. +static int ExpandImageLeft(const TBOX& box, int left_limit, + ColPartitionGrid* part_grid) { + ColPartitionGridSearch search(part_grid); + ColPartition* part; + // Search right to left for any text that overlaps. + search.StartSideSearch(box.left(), box.bottom(), box.top()); + while ((part = search.NextSideSearch(true)) != nullptr) { + if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) { + const TBOX& part_box(part->bounding_box()); + if (part_box.y_gap(box) < 0) { + if (part_box.right() > left_limit && part_box.right() < box.left()) + left_limit = part_box.right(); + break; + } + } + } + if (part != nullptr) { + // Search for the nearest text up to the one we already found. + TBOX search_box(left_limit, box.bottom(), box.left(), box.top()); + search.StartRectSearch(search_box); + while ((part = search.NextRectSearch()) != nullptr) { + if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) { + const TBOX& part_box(part->bounding_box()); + if (part_box.y_gap(box) < 0) { + if (part_box.right() > left_limit && part_box.right() < box.left()) { + left_limit = part_box.right(); + } + } + } + } + } + return left_limit; +} + +// Search for the leftmost text that overlaps vertically and is to the right +// of the given box, but within the given right limit. +static int ExpandImageRight(const TBOX& box, int right_limit, + ColPartitionGrid* part_grid) { + ColPartitionGridSearch search(part_grid); + ColPartition* part; + // Search left to right for any text that overlaps. + search.StartSideSearch(box.right(), box.bottom(), box.top()); + while ((part = search.NextSideSearch(false)) != nullptr) { + if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) { + const TBOX& part_box(part->bounding_box()); + if (part_box.y_gap(box) < 0) { + if (part_box.left() < right_limit && part_box.left() > box.right()) + right_limit = part_box.left(); + break; + } + } + } + if (part != nullptr) { + // Search for the nearest text up to the one we already found. + TBOX search_box(box.left(), box.bottom(), right_limit, box.top()); + search.StartRectSearch(search_box); + while ((part = search.NextRectSearch()) != nullptr) { + if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) { + const TBOX& part_box(part->bounding_box()); + if (part_box.y_gap(box) < 0) { + if (part_box.left() < right_limit && part_box.left() > box.right()) + right_limit = part_box.left(); + } + } + } + } + return right_limit; +} + +// Search for the topmost text that overlaps horizontally and is below +// the given box, but within the given bottom limit. +static int ExpandImageBottom(const TBOX& box, int bottom_limit, + ColPartitionGrid* part_grid) { + ColPartitionGridSearch search(part_grid); + ColPartition* part; + // Search right to left for any text that overlaps. + search.StartVerticalSearch(box.left(), box.right(), box.bottom()); + while ((part = search.NextVerticalSearch(true)) != nullptr) { + if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) { + const TBOX& part_box(part->bounding_box()); + if (part_box.x_gap(box) < 0) { + if (part_box.top() > bottom_limit && part_box.top() < box.bottom()) + bottom_limit = part_box.top(); + break; + } + } + } + if (part != nullptr) { + // Search for the nearest text up to the one we already found. + TBOX search_box(box.left(), bottom_limit, box.right(), box.bottom()); + search.StartRectSearch(search_box); + while ((part = search.NextRectSearch()) != nullptr) { + if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) { + const TBOX& part_box(part->bounding_box()); + if (part_box.x_gap(box) < 0) { + if (part_box.top() > bottom_limit && part_box.top() < box.bottom()) + bottom_limit = part_box.top(); + } + } + } + } + return bottom_limit; +} + +// Search for the bottommost text that overlaps horizontally and is above +// the given box, but within the given top limit. +static int ExpandImageTop(const TBOX& box, int top_limit, + ColPartitionGrid* part_grid) { + ColPartitionGridSearch search(part_grid); + ColPartition* part; + // Search right to left for any text that overlaps. + search.StartVerticalSearch(box.left(), box.right(), box.top()); + while ((part = search.NextVerticalSearch(false)) != nullptr) { + if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) { + const TBOX& part_box(part->bounding_box()); + if (part_box.x_gap(box) < 0) { + if (part_box.bottom() < top_limit && part_box.bottom() > box.top()) + top_limit = part_box.bottom(); + break; + } + } + } + if (part != nullptr) { + // Search for the nearest text up to the one we already found. + TBOX search_box(box.left(), box.top(), box.right(), top_limit); + search.StartRectSearch(search_box); + while ((part = search.NextRectSearch()) != nullptr) { + if (part->flow() == BTFT_STRONG_CHAIN || part->flow() == BTFT_CHAIN) { + const TBOX& part_box(part->bounding_box()); + if (part_box.x_gap(box) < 0) { + if (part_box.bottom() < top_limit && part_box.bottom() > box.top()) + top_limit = part_box.bottom(); + } + } + } + } + return top_limit; +} + +// Expands the image box in the given direction until it hits text, +// limiting the expansion to the given limit box, returning the result +// in the expanded box, and +// returning the increase in area resulting from the expansion. +static int ExpandImageDir(BlobNeighbourDir dir, const TBOX& im_box, + const TBOX& limit_box, + ColPartitionGrid* part_grid, TBOX* expanded_box) { + *expanded_box = im_box; + switch (dir) { + case BND_LEFT: + expanded_box->set_left(ExpandImageLeft(im_box, limit_box.left(), + part_grid)); + break; + case BND_RIGHT: + expanded_box->set_right(ExpandImageRight(im_box, limit_box.right(), + part_grid)); + break; + case BND_ABOVE: + expanded_box->set_top(ExpandImageTop(im_box, limit_box.top(), part_grid)); + break; + case BND_BELOW: + expanded_box->set_bottom(ExpandImageBottom(im_box, limit_box.bottom(), + part_grid)); + break; + default: + return 0; + } + return expanded_box->area() - im_box.area(); +} + +// Expands the image partition into any non-text until it touches text. +// The expansion proceeds in the order of increasing increase in area +// as a heuristic to find the best rectangle by expanding in the most +// constrained direction first. +static void MaximalImageBoundingBox(ColPartitionGrid* part_grid, TBOX* im_box) { + bool dunnit[BND_COUNT]; + memset(dunnit, 0, sizeof(dunnit)); + TBOX limit_box(part_grid->bleft().x(), part_grid->bleft().y(), + part_grid->tright().x(), part_grid->tright().y()); + TBOX text_box(*im_box); + for (int iteration = 0; iteration < BND_COUNT; ++iteration) { + // Find the direction with least area increase. + int best_delta = -1; + BlobNeighbourDir best_dir = BND_LEFT; + TBOX expanded_boxes[BND_COUNT]; + for (int dir = 0; dir < BND_COUNT; ++dir) { + auto bnd = static_cast<BlobNeighbourDir>(dir); + if (!dunnit[bnd]) { + TBOX expanded_box; + int area_delta = ExpandImageDir(bnd, text_box, limit_box, part_grid, + &expanded_boxes[bnd]); + if (best_delta < 0 || area_delta < best_delta) { + best_delta = area_delta; + best_dir = bnd; + } + } + } + // Run the best and remember the direction. + dunnit[best_dir] = true; + text_box = expanded_boxes[best_dir]; + } + *im_box = text_box; +} + +// Helper deletes the given partition but first marks up all the blobs as +// noise, so they get deleted later, and disowns them. +// If the initial type of the partition is image, then it actually deletes +// the blobs, as the partition owns them in that case. +static void DeletePartition(ColPartition* part) { + BlobRegionType type = part->blob_type(); + if (type == BRT_RECTIMAGE || type == BRT_POLYIMAGE) { + // The partition owns the boxes of these types, so just delete them. + part->DeleteBoxes(); // From a previous iteration. + } else { + // Once marked, the blobs will be swept up by TidyBlobs. + part->set_flow(BTFT_NONTEXT); + part->set_blob_type(BRT_NOISE); + part->SetBlobTypes(); + part->DisownBoxes(); // Created before FindImagePartitions. + } + delete part; +} + +// The meat of joining fragmented images and consuming ColPartitions of +// uncertain type. +// *part_ptr is an input/output BRT_RECTIMAGE ColPartition that is to be +// expanded to consume overlapping and nearby ColPartitions of uncertain type +// and other BRT_RECTIMAGE partitions, but NOT to be expanded beyond +// max_image_box. *part_ptr is NOT in the part_grid. +// rectsearch is already constructed on the part_grid, and is used for +// searching for overlapping and nearby ColPartitions. +// ExpandImageIntoParts is called iteratively until it returns false. Each +// time it absorbs the nearest non-contained candidate, and everything that +// is fully contained within part_ptr's bounding box. +// TODO(rays) what if it just eats everything inside max_image_box in one go? +static bool ExpandImageIntoParts(const TBOX& max_image_box, + ColPartitionGridSearch* rectsearch, + ColPartitionGrid* part_grid, + ColPartition** part_ptr) { + ColPartition* image_part = *part_ptr; + TBOX im_part_box = image_part->bounding_box(); + if (textord_tabfind_show_images > 1) { + tprintf("Searching for merge with image part:"); + im_part_box.print(); + tprintf("Text box="); + max_image_box.print(); + } + rectsearch->StartRectSearch(max_image_box); + ColPartition* part; + ColPartition* best_part = nullptr; + int best_dist = 0; + while ((part = rectsearch->NextRectSearch()) != nullptr) { + if (textord_tabfind_show_images > 1) { + tprintf("Considering merge with part:"); + part->Print(); + if (im_part_box.contains(part->bounding_box())) + tprintf("Fully contained\n"); + else if (!max_image_box.contains(part->bounding_box())) + tprintf("Not within text box\n"); + else if (part->flow() == BTFT_STRONG_CHAIN) + tprintf("Too strong text\n"); + else + tprintf("Real candidate\n"); + } + if (part->flow() == BTFT_STRONG_CHAIN || + part->flow() == BTFT_TEXT_ON_IMAGE || + part->blob_type() == BRT_POLYIMAGE) + continue; + TBOX box = part->bounding_box(); + if (max_image_box.contains(box) && part->blob_type() != BRT_NOISE) { + if (im_part_box.contains(box)) { + // Eat it completely. + rectsearch->RemoveBBox(); + DeletePartition(part); + continue; + } + int x_dist = std::max(0, box.x_gap(im_part_box)); + int y_dist = std::max(0, box.y_gap(im_part_box)); + int dist = x_dist * x_dist + y_dist * y_dist; + if (dist > box.area() || dist > im_part_box.area()) + continue; // Not close enough. + if (best_part == nullptr || dist < best_dist) { + // We keep the nearest qualifier, which is not necessarily the nearest. + best_part = part; + best_dist = dist; + } + } + } + if (best_part != nullptr) { + // It needs expanding. We can do it without touching text. + TBOX box = best_part->bounding_box(); + if (textord_tabfind_show_images > 1) { + tprintf("Merging image part:"); + im_part_box.print(); + tprintf("with part:"); + box.print(); + } + im_part_box += box; + *part_ptr = ColPartition::FakePartition(im_part_box, PT_UNKNOWN, + BRT_RECTIMAGE, + BTFT_NONTEXT); + DeletePartition(image_part); + part_grid->RemoveBBox(best_part); + DeletePartition(best_part); + rectsearch->RepositionIterator(); + return true; + } + return false; +} + +// Helper function to compute the overlap area between the box and the +// given list of partitions. +static int IntersectArea(const TBOX& box, ColPartition_LIST* part_list) { + int intersect_area = 0; + ColPartition_IT part_it(part_list); + // Iterate the parts and subtract intersecting area. + for (part_it.mark_cycle_pt(); !part_it.cycled_list(); + part_it.forward()) { + ColPartition* image_part = part_it.data(); + TBOX intersect = box.intersection(image_part->bounding_box()); + intersect_area += intersect.area(); + } + return intersect_area; +} + +// part_list is a set of ColPartitions representing a polygonal image, and +// im_box is the union of the bounding boxes of all the parts in part_list. +// Tests whether part is to be consumed by the polygonal image. +// Returns true if part is weak text and more than half of its area is +// intersected by parts from the part_list, and it is contained within im_box. +static bool TestWeakIntersectedPart(const TBOX& im_box, + ColPartition_LIST* part_list, + ColPartition* part) { + if (part->flow() < BTFT_STRONG_CHAIN) { + // A weak partition intersects the box. + const TBOX& part_box = part->bounding_box(); + if (im_box.contains(part_box)) { + int area = part_box.area(); + int intersect_area = IntersectArea(part_box, part_list); + if (area < 2 * intersect_area) { + return true; + } + } + } + return false; +} + +// A rectangular or polygonal image has been completed, in part_list, bounding +// box in im_box. We want to eliminate weak text or other uncertain partitions +// (basically anything that is not BRT_STRONG_CHAIN or better) from both the +// part_grid and the big_parts list that are contained within im_box and +// overlapped enough by the possibly polygonal image. +static void EliminateWeakParts(const TBOX& im_box, + ColPartitionGrid* part_grid, + ColPartition_LIST* big_parts, + ColPartition_LIST* part_list) { + ColPartitionGridSearch rectsearch(part_grid); + ColPartition* part; + rectsearch.StartRectSearch(im_box); + while ((part = rectsearch.NextRectSearch()) != nullptr) { + if (TestWeakIntersectedPart(im_box, part_list, part)) { + BlobRegionType type = part->blob_type(); + if (type == BRT_POLYIMAGE || type == BRT_RECTIMAGE) { + rectsearch.RemoveBBox(); + DeletePartition(part); + } else { + // The part is mostly covered, so mark it. Non-image partitions are + // kept hanging around to mark the image for pass2 + part->set_flow(BTFT_NONTEXT); + part->set_blob_type(BRT_NOISE); + part->SetBlobTypes(); + } + } + } + ColPartition_IT big_it(big_parts); + for (big_it.mark_cycle_pt(); !big_it.cycled_list(); big_it.forward()) { + part = big_it.data(); + if (TestWeakIntersectedPart(im_box, part_list, part)) { + // Once marked, the blobs will be swept up by TidyBlobs. + DeletePartition(big_it.extract()); + } + } +} + +// Helper scans for good text partitions overlapping the given box. +// If there are no good text partitions overlapping an expanded box, then +// the box is expanded, otherwise, the original box is returned. +// If good text overlaps the box, true is returned. +static bool ScanForOverlappingText(ColPartitionGrid* part_grid, TBOX* box) { + ColPartitionGridSearch rectsearch(part_grid); + TBOX padded_box(*box); + padded_box.pad(kNoisePadding, kNoisePadding); + rectsearch.StartRectSearch(padded_box); + ColPartition* part; + bool any_text_in_padded_rect = false; + while ((part = rectsearch.NextRectSearch()) != nullptr) { + if (part->flow() == BTFT_CHAIN || + part->flow() == BTFT_STRONG_CHAIN) { + // Text intersects the box. + any_text_in_padded_rect = true; + const TBOX& part_box = part->bounding_box(); + if (box->overlap(part_box)) { + return true; + } + } + } + if (!any_text_in_padded_rect) + *box = padded_box; + return false; +} + +// Renders the boxes of image parts from the supplied list onto the image_pix, +// except where they interfere with existing strong text in the part_grid, +// and then deletes them. +// Box coordinates are rotated by rerotate to match the image. +static void MarkAndDeleteImageParts(const FCOORD& rerotate, + ColPartitionGrid* part_grid, + ColPartition_LIST* image_parts, + Pix* image_pix) { + if (image_pix == nullptr) + return; + int imageheight = pixGetHeight(image_pix); + ColPartition_IT part_it(image_parts); + for (; !part_it.empty(); part_it.forward()) { + ColPartition* part = part_it.extract(); + TBOX part_box = part->bounding_box(); + BlobRegionType type = part->blob_type(); + if (!ScanForOverlappingText(part_grid, &part_box) || + type == BRT_RECTIMAGE || type == BRT_POLYIMAGE) { + // Mark the box on the image. + // All coords need to be rotated to match the image. + part_box.rotate(rerotate); + int left = part_box.left(); + int top = part_box.top(); + pixRasterop(image_pix, left, imageheight - top, + part_box.width(), part_box.height(), PIX_SET, nullptr, 0, 0); + } + DeletePartition(part); + } +} + +// Locates all the image partitions in the part_grid, that were found by a +// previous call to FindImagePartitions, marks them in the image_mask, +// removes them from the grid, and deletes them. This makes it possible to +// call FindImagePartitions again to produce less broken-up and less +// overlapping image partitions. +// rerotation specifies how to rotate the partition coords to match +// the image_mask, since this function is used after orientation correction. +void ImageFind::TransferImagePartsToImageMask(const FCOORD& rerotation, + ColPartitionGrid* part_grid, + Pix* image_mask) { + // Extract the noise parts from the grid and put them on a temporary list. + ColPartition_LIST parts_list; + ColPartition_IT part_it(&parts_list); + ColPartitionGridSearch gsearch(part_grid); + gsearch.StartFullSearch(); + ColPartition* part; + while ((part = gsearch.NextFullSearch()) != nullptr) { + BlobRegionType type = part->blob_type(); + if (type == BRT_NOISE || type == BRT_RECTIMAGE || type == BRT_POLYIMAGE) { + part_it.add_after_then_move(part); + gsearch.RemoveBBox(); + } + } + // Render listed noise partitions to the image mask. + MarkAndDeleteImageParts(rerotation, part_grid, &parts_list, image_mask); +} + +// Removes and deletes all image partitions that are too small to be worth +// keeping. We have to do this as a separate phase after creating the image +// partitions as the small images are needed to join the larger ones together. +static void DeleteSmallImages(ColPartitionGrid* part_grid) { + if (part_grid != nullptr) return; + ColPartitionGridSearch gsearch(part_grid); + gsearch.StartFullSearch(); + ColPartition* part; + while ((part = gsearch.NextFullSearch()) != nullptr) { + // Only delete rectangular images, since if it became a poly image, it + // is more evidence that it is somehow important. + if (part->blob_type() == BRT_RECTIMAGE) { + const TBOX& part_box = part->bounding_box(); + if (part_box.width() < kMinImageFindSize || + part_box.height() < kMinImageFindSize) { + // It is too small to keep. Just make it disappear. + gsearch.RemoveBBox(); + DeletePartition(part); + } + } + } +} + +// Runs a CC analysis on the image_pix mask image, and creates +// image partitions from them, cutting out strong text, and merging with +// nearby image regions such that they don't interfere with text. +// Rotation and rerotation specify how to rotate image coords to match +// the blob and partition coords and back again. +// The input/output part_grid owns all the created partitions, and +// the partitions own all the fake blobs that belong in the partitions. +// Since the other blobs in the other partitions will be owned by the block, +// ColPartitionGrid::ReTypeBlobs must be called afterwards to fix this +// situation and collect the image blobs. +void ImageFind::FindImagePartitions(Pix* image_pix, const FCOORD& rotation, + const FCOORD& rerotation, TO_BLOCK* block, + TabFind* tab_grid, DebugPixa* pixa_debug, + ColPartitionGrid* part_grid, + ColPartition_LIST* big_parts) { + int imageheight = pixGetHeight(image_pix); + Boxa* boxa; + Pixa* pixa; + ConnCompAndRectangularize(image_pix, pixa_debug, &boxa, &pixa); + // Iterate the connected components in the image regions mask. + int nboxes = 0; + if (boxa != nullptr && pixa != nullptr) nboxes = boxaGetCount(boxa); + for (int i = 0; i < nboxes; ++i) { + l_int32 x, y, width, height; + boxaGetBoxGeometry(boxa, i, &x, &y, &width, &height); + Pix* pix = pixaGetPix(pixa, i, L_CLONE); + TBOX im_box(x, imageheight -y - height, x + width, imageheight - y); + im_box.rotate(rotation); // Now matches all partitions and blobs. + ColPartitionGridSearch rectsearch(part_grid); + rectsearch.SetUniqueMode(true); + ColPartition_LIST part_list; + DivideImageIntoParts(im_box, rotation, rerotation, pix, + &rectsearch, &part_list); + if (textord_tabfind_show_images && pixa_debug != nullptr) { + pixa_debug->AddPix(pix, "ImageComponent"); + tprintf("Component has %d parts\n", part_list.length()); + } + pixDestroy(&pix); + if (!part_list.empty()) { + ColPartition_IT part_it(&part_list); + if (part_list.singleton()) { + // We didn't have to chop it into a polygon to fit around text, so + // try expanding it to merge fragmented image parts, as long as it + // doesn't touch strong text. + ColPartition* part = part_it.extract(); + TBOX text_box(im_box); + MaximalImageBoundingBox(part_grid, &text_box); + while (ExpandImageIntoParts(text_box, &rectsearch, part_grid, &part)); + part_it.set_to_list(&part_list); + part_it.add_after_then_move(part); + im_box = part->bounding_box(); + } + EliminateWeakParts(im_box, part_grid, big_parts, &part_list); + // Iterate the part_list and put the parts into the grid. + for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) { + ColPartition* image_part = part_it.extract(); + im_box = image_part->bounding_box(); + part_grid->InsertBBox(true, true, image_part); + if (!part_it.at_last()) { + ColPartition* neighbour = part_it.data_relative(1); + image_part->AddPartner(false, neighbour); + neighbour->AddPartner(true, image_part); + } + } + } + } + boxaDestroy(&boxa); + pixaDestroy(&pixa); + DeleteSmallImages(part_grid); +#ifndef GRAPHICS_DISABLED + if (textord_tabfind_show_images) { + ScrollView* images_win_ = part_grid->MakeWindow(1000, 400, "With Images"); + part_grid->DisplayBoxes(images_win_); + } +#endif +} + +} // namespace tesseract. diff --git a/tesseract/src/textord/imagefind.h b/tesseract/src/textord/imagefind.h new file mode 100644 index 00000000..57be6990 --- /dev/null +++ b/tesseract/src/textord/imagefind.h @@ -0,0 +1,159 @@ +/////////////////////////////////////////////////////////////////////// +// File: imagefind.h +// Description: Class to find image and drawing regions in an image +// and create a corresponding list of empty blobs. +// Author: Ray Smith +// +// (C) Copyright 2008, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_TEXTORD_IMAGEFIND_H_ +#define TESSERACT_TEXTORD_IMAGEFIND_H_ + +#include "debugpixa.h" + +#include <cstdint> + +struct Boxa; +struct Pix; +struct Pixa; + +namespace tesseract { + +class ColPartitionGrid; +class ColPartition_LIST; +class TabFind; +class TBOX; +class FCOORD; +class TO_BLOCK; +class BLOBNBOX_LIST; + +// The ImageFind class is a simple static function wrapper class that +// exposes the FindImages function and some useful helper functions. +class ImageFind { + public: + // Finds image regions within the BINARY source pix (page image) and returns + // the image regions as a mask image. + // The returned pix may be nullptr, meaning no images found. + // If not nullptr, it must be PixDestroyed by the caller. + // If textord_tabfind_show_images, debug images are appended to pixa_debug. + static Pix* FindImages(Pix* pix, DebugPixa* pixa_debug); + + // Generates a Boxa, Pixa pair from the input binary (image mask) pix, + // analogous to pixConnComp, except that connected components which are nearly + // rectangular are replaced with solid rectangles. + // The returned boxa, pixa may be nullptr, meaning no images found. + // If not nullptr, they must be destroyed by the caller. + // Resolution of pix should match the source image (Tesseract::pix_binary_) + // so the output coordinate systems match. + static void ConnCompAndRectangularize(Pix* pix, DebugPixa* pixa_debug, + Boxa** boxa, Pixa** pixa); + + // Returns true if there is a rectangle in the source pix, such that all + // pixel rows and column slices outside of it have less than + // min_fraction of the pixels black, and within max_skew_gradient fraction + // of the pixels on the inside, there are at least max_fraction of the + // pixels black. In other words, the inside of the rectangle looks roughly + // rectangular, and the outside of it looks like extra bits. + // On return, the rectangle is defined by x_start, y_start, x_end and y_end. + // Note: the algorithm is iterative, allowing it to slice off pixels from + // one edge, allowing it to then slice off more pixels from another edge. + static bool pixNearlyRectangular(Pix* pix, + double min_fraction, double max_fraction, + double max_skew_gradient, + int* x_start, int* y_start, + int* x_end, int* y_end); + + // Given an input pix, and a bounding rectangle, the sides of the rectangle + // are shrunk inwards until they bound any black pixels found within the + // original rectangle. Returns false if the rectangle contains no black + // pixels at all. + static bool BoundsWithinRect(Pix* pix, int* x_start, int* y_start, + int* x_end, int* y_end); + + // Given a point in 3-D (RGB) space, returns the squared Euclidean distance + // of the point from the given line, defined by a pair of points in the 3-D + // (RGB) space, line1 and line2. + static double ColorDistanceFromLine(const uint8_t* line1, const uint8_t* line2, + const uint8_t* point); + + // Returns the leptonica combined code for the given RGB triplet. + static uint32_t ComposeRGB(uint32_t r, uint32_t g, uint32_t b); + + // Returns the input value clipped to a uint8_t. + static uint8_t ClipToByte(double pixel); + + // Computes the light and dark extremes of color in the given rectangle of + // the given pix, which is factor smaller than the coordinate system in rect. + // The light and dark points are taken to be the upper and lower 8th-ile of + // the most deviant of R, G and B. The value of the other 2 channels are + // computed by linear fit against the most deviant. + // The colors of the two point are returned in color1 and color2, with the + // alpha channel set to a scaled mean rms of the fits. + // If color_map1 is not null then it and color_map2 get rect pasted in them + // with the two calculated colors, and rms map gets a pasted rect of the rms. + // color_map1, color_map2 and rms_map are assumed to be the same scale as pix. + static void ComputeRectangleColors(const TBOX& rect, Pix* pix, int factor, + Pix* color_map1, Pix* color_map2, + Pix* rms_map, + uint8_t* color1, uint8_t* color2); + + // Returns true if there are no black pixels in between the boxes. + // The im_box must represent the bounding box of the pix in tesseract + // coordinates, which may be negative, due to rotations to make the textlines + // horizontal. The boxes are rotated by rotation, which should undo such + // rotations, before mapping them onto the pix. + static bool BlankImageInBetween(const TBOX& box1, const TBOX& box2, + const TBOX& im_box, const FCOORD& rotation, + Pix* pix); + + // Returns the number of pixels in box in the pix. + // The im_box must represent the bounding box of the pix in tesseract + // coordinates, which may be negative, due to rotations to make the textlines + // horizontal. The boxes are rotated by rotation, which should undo such + // rotations, before mapping them onto the pix. + static int CountPixelsInRotatedBox(TBOX box, const TBOX& im_box, + const FCOORD& rotation, Pix* pix); + + + // Locates all the image partitions in the part_grid, that were found by a + // previous call to FindImagePartitions, marks them in the image_mask, + // removes them from the grid, and deletes them. This makes it possible to + // call FindImagePartitions again to produce less broken-up and less + // overlapping image partitions. + // rerotation specifies how to rotate the partition coords to match + // the image_mask, since this function is used after orientation correction. + static void TransferImagePartsToImageMask(const FCOORD& rerotation, + ColPartitionGrid* part_grid, + Pix* image_mask); + + // Runs a CC analysis on the image_pix mask image, and creates + // image partitions from them, cutting out strong text, and merging with + // nearby image regions such that they don't interfere with text. + // Rotation and rerotation specify how to rotate image coords to match + // the blob and partition coords and back again. + // The input/output part_grid owns all the created partitions, and + // the partitions own all the fake blobs that belong in the partitions. + // Since the other blobs in the other partitions will be owned by the block, + // ColPartitionGrid::ReTypeBlobs must be called afterwards to fix this + // situation and collect the image blobs. + static void FindImagePartitions(Pix* image_pix, const FCOORD& rotation, + const FCOORD& rerotation, TO_BLOCK* block, + TabFind* tab_grid, DebugPixa* pixa_debug, + ColPartitionGrid* part_grid, + ColPartition_LIST* big_parts); +}; + +} // namespace tesseract. + +#endif // TESSERACT_TEXTORD_LINEFIND_H_ diff --git a/tesseract/src/textord/linefind.cpp b/tesseract/src/textord/linefind.cpp new file mode 100644 index 00000000..d3763f31 --- /dev/null +++ b/tesseract/src/textord/linefind.cpp @@ -0,0 +1,769 @@ +/////////////////////////////////////////////////////////////////////// +// File: linefind.cpp +// Description: Class to find vertical lines in an image and create +// a corresponding list of empty blobs. +// Author: Ray Smith +// Created: Thu Mar 20 09:49:01 PDT 2008 +// +// (C) Copyright 2008, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#include "linefind.h" +#include "alignedblob.h" +#include "tabvector.h" +#include "blobbox.h" +#include "edgblob.h" +#if defined(USE_OPENCL) +#include "openclwrapper.h" // for OpenclDevice +#endif + +#include "allheaders.h" + +#include <algorithm> + +namespace tesseract { + +/// Denominator of resolution makes max pixel width to allow thin lines. +const int kThinLineFraction = 20; +/// Denominator of resolution makes min pixels to demand line lengths to be. +const int kMinLineLengthFraction = 4; +/// Spacing of cracks across the page to break up tall vertical lines. +const int kCrackSpacing = 100; +/// Grid size used by line finder. Not very critical. +const int kLineFindGridSize = 50; +// Min width of a line in pixels to be considered thick. +const int kMinThickLineWidth = 12; +// Max size of line residue. (The pixels that fail the long thin opening, and +// therefore don't make it to the candidate line mask, but are nevertheless +// part of the line.) +const int kMaxLineResidue = 6; +// Min length in inches of a line segment that exceeds kMinThickLineWidth in +// thickness. (Such lines shouldn't break by simple image degradation.) +const double kThickLengthMultiple = 0.75; +// Max fraction of line box area that can be occupied by non-line pixels. +const double kMaxNonLineDensity = 0.25; +// Max height of a music stave in inches. +const double kMaxStaveHeight = 1.0; +// Minimum fraction of pixels in a music rectangle connected to the staves. +const double kMinMusicPixelFraction = 0.75; + +// Erases the unused blobs from the line_pix image, taking into account +// whether this was a horizontal or vertical line set. +static void RemoveUnusedLineSegments(bool horizontal_lines, + BLOBNBOX_LIST* line_bblobs, + Pix* line_pix) { + int height = pixGetHeight(line_pix); + BLOBNBOX_IT bbox_it(line_bblobs); + for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) { + BLOBNBOX* blob = bbox_it.data(); + if (blob->left_tab_type() != TT_VLINE) { + const TBOX& box = blob->bounding_box(); + Box* pixbox = nullptr; + if (horizontal_lines) { + // Horizontal lines are in tess format and also have x and y flipped + // (to use FindVerticalAlignment) so we have to flip x and y and then + // convert to Leptonica by height - flipped x (ie the right edge). + // See GetLineBoxes for more explanation. + pixbox = boxCreate(box.bottom(), height - box.right(), + box.height(), box.width()); + } else { + // For vertical lines, just flip upside-down to convert to Leptonica. + // The y position of the box in Leptonica terms is the distance from + // the top of the image to the top of the box. + pixbox = boxCreate(box.left(), height - box.top(), + box.width(), box.height()); + } + pixClearInRect(line_pix, pixbox); + boxDestroy(&pixbox); + } + } +} + +// Helper subtracts the line_pix image from the src_pix, and removes residue +// as well by removing components that touch the line, but are not in the +// non_line_pix mask. It is assumed that the non_line_pix mask has already +// been prepared to required accuracy. +static void SubtractLinesAndResidue(Pix* line_pix, Pix* non_line_pix, + int resolution, Pix* src_pix) { + // First remove the lines themselves. + pixSubtract(src_pix, src_pix, line_pix); + // Subtract the non-lines from the image to get the residue. + Pix* residue_pix = pixSubtract(nullptr, src_pix, non_line_pix); + // Dilate the lines so they touch the residue. + Pix* fat_line_pix = pixDilateBrick(nullptr, line_pix, 3, 3); + // Seed fill the fat lines to get all the residue. + pixSeedfillBinary(fat_line_pix, fat_line_pix, residue_pix, 8); + // Subtract the residue from the original image. + pixSubtract(src_pix, src_pix, fat_line_pix); + pixDestroy(&fat_line_pix); + pixDestroy(&residue_pix); +} + +// Returns the maximum strokewidth in the given binary image by doubling +// the maximum of the distance function. +static int MaxStrokeWidth(Pix* pix) { + Pix* dist_pix = pixDistanceFunction(pix, 4, 8, L_BOUNDARY_BG); + int width = pixGetWidth(dist_pix); + int height = pixGetHeight(dist_pix); + int wpl = pixGetWpl(dist_pix); + l_uint32* data = pixGetData(dist_pix); + // Find the maximum value in the distance image. + int max_dist = 0; + for (int y = 0; y < height; ++y) { + for (int x = 0; x < width; ++x) { + int pixel = GET_DATA_BYTE(data, x); + if (pixel > max_dist) + max_dist = pixel; + } + data += wpl; + } + pixDestroy(&dist_pix); + return max_dist * 2; +} + +// Returns the number of components in the intersection_pix touched by line_box. +static int NumTouchingIntersections(Box* line_box, Pix* intersection_pix) { + if (intersection_pix == nullptr) return 0; + Pix* rect_pix = pixClipRectangle(intersection_pix, line_box, nullptr); + Boxa* boxa = pixConnComp(rect_pix, nullptr, 8); + pixDestroy(&rect_pix); + if (boxa == nullptr) return false; + int result = boxaGetCount(boxa); + boxaDestroy(&boxa); + return result; +} + +// Returns the number of black pixels found in the box made by adding the line +// width to both sides of the line bounding box. (Increasing the smallest +// dimension of the bounding box.) +static int CountPixelsAdjacentToLine(int line_width, Box* line_box, + Pix* nonline_pix) { + l_int32 x, y, box_width, box_height; + boxGetGeometry(line_box, &x, &y, &box_width, &box_height); + if (box_width > box_height) { + // horizontal line. + int bottom = std::min(pixGetHeight(nonline_pix), y + box_height + line_width); + y = std::max(0, y - line_width); + box_height = bottom - y; + } else { + // Vertical line. + int right = std::min(pixGetWidth(nonline_pix), x + box_width + line_width); + x = std::max(0, x - line_width); + box_width = right - x; + } + Box* box = boxCreate(x, y, box_width, box_height); + Pix* rect_pix = pixClipRectangle(nonline_pix, box, nullptr); + boxDestroy(&box); + l_int32 result; + pixCountPixels(rect_pix, &result, nullptr); + pixDestroy(&rect_pix); + return result; +} + +// Helper erases false-positive line segments from the input/output line_pix. +// 1. Since thick lines shouldn't really break up, we can eliminate some false +// positives by marking segments that are at least kMinThickLineWidth +// thickness, yet have a length less than min_thick_length. +// 2. Lines that don't have at least 2 intersections with other lines and have +// a lot of neighbouring non-lines are probably not lines (perhaps arabic +// or Hindi words, or underlines.) +// Bad line components are erased from line_pix. +// Returns the number of remaining connected components. +static int FilterFalsePositives(int resolution, Pix* nonline_pix, + Pix* intersection_pix, Pix* line_pix) { + int min_thick_length = static_cast<int>(resolution * kThickLengthMultiple); + Pixa* pixa = nullptr; + Boxa* boxa = pixConnComp(line_pix, &pixa, 8); + // Iterate over the boxes to remove false positives. + int nboxes = boxaGetCount(boxa); + int remaining_boxes = nboxes; + for (int i = 0; i < nboxes; ++i) { + Box* box = boxaGetBox(boxa, i, L_CLONE); + l_int32 x, y, box_width, box_height; + boxGetGeometry(box, &x, &y, &box_width, &box_height); + Pix* comp_pix = pixaGetPix(pixa, i, L_CLONE); + int max_width = MaxStrokeWidth(comp_pix); + pixDestroy(&comp_pix); + bool bad_line = false; + // If the length is too short to stand-alone as a line, and the box width + // is thick enough, and the stroke width is thick enough it is bad. + if (box_width >= kMinThickLineWidth && box_height >= kMinThickLineWidth && + box_width < min_thick_length && box_height < min_thick_length && + max_width > kMinThickLineWidth) { + // Too thick for the length. + bad_line = true; + } + if (!bad_line && + (intersection_pix == nullptr || + NumTouchingIntersections(box, intersection_pix) < 2)) { + // Test non-line density near the line. + int nonline_count = CountPixelsAdjacentToLine(max_width, box, + nonline_pix); + if (nonline_count > box_height * box_width * kMaxNonLineDensity) + bad_line = true; + } + if (bad_line) { + // Not a good line. + pixClearInRect(line_pix, box); + --remaining_boxes; + } + boxDestroy(&box); + } + pixaDestroy(&pixa); + boxaDestroy(&boxa); + return remaining_boxes; +} + +// Finds vertical and horizontal line objects in the given pix. +// Uses the given resolution to determine size thresholds instead of any +// that may be present in the pix. +// The output vertical_x and vertical_y contain a sum of the output vectors, +// thereby giving the mean vertical direction. +// If pix_music_mask != nullptr, and music is detected, a mask of the staves +// and anything that is connected (bars, notes etc.) will be returned in +// pix_music_mask, the mask subtracted from pix, and the lines will not +// appear in v_lines or h_lines. +// The output vectors are owned by the list and Frozen (cannot refit) by +// having no boxes, as there is no need to refit or merge separator lines. +// The detected lines are removed from the pix. +void LineFinder::FindAndRemoveLines(int resolution, bool debug, Pix* pix, + int* vertical_x, int* vertical_y, + Pix** pix_music_mask, + TabVector_LIST* v_lines, + TabVector_LIST* h_lines) { + if (pix == nullptr || vertical_x == nullptr || vertical_y == nullptr) { + tprintf("Error in parameters for LineFinder::FindAndRemoveLines\n"); + return; + } + Pix* pix_vline = nullptr; + Pix* pix_non_vline = nullptr; + Pix* pix_hline = nullptr; + Pix* pix_non_hline = nullptr; + Pix* pix_intersections = nullptr; + Pixa* pixa_display = debug ? pixaCreate(0) : nullptr; + GetLineMasks(resolution, pix, &pix_vline, &pix_non_vline, &pix_hline, + &pix_non_hline, &pix_intersections, pix_music_mask, + pixa_display); + // Find lines, convert to TabVector_LIST and remove those that are used. + FindAndRemoveVLines(resolution, pix_intersections, vertical_x, vertical_y, + &pix_vline, pix_non_vline, pix, v_lines); + if (pix_hline != nullptr) { + // Recompute intersections and re-filter false positive h-lines. + if (pix_vline != nullptr) + pixAnd(pix_intersections, pix_vline, pix_hline); + else + pixDestroy(&pix_intersections); + if (!FilterFalsePositives(resolution, pix_non_hline, pix_intersections, + pix_hline)) { + pixDestroy(&pix_hline); + } + } + FindAndRemoveHLines(resolution, pix_intersections, *vertical_x, *vertical_y, + &pix_hline, pix_non_hline, pix, h_lines); + if (pixa_display != nullptr && pix_vline != nullptr) + pixaAddPix(pixa_display, pix_vline, L_CLONE); + if (pixa_display != nullptr && pix_hline != nullptr) + pixaAddPix(pixa_display, pix_hline, L_CLONE); + if (pix_vline != nullptr && pix_hline != nullptr) { + // Remove joins (intersections) where lines cross, and the residue. + // Recalculate the intersections, since some lines have been deleted. + pixAnd(pix_intersections, pix_vline, pix_hline); + // Fatten up the intersections and seed-fill to get the intersection + // residue. + Pix* pix_join_residue = pixDilateBrick(nullptr, pix_intersections, 5, 5); + pixSeedfillBinary(pix_join_residue, pix_join_residue, pix, 8); + // Now remove the intersection residue. + pixSubtract(pix, pix, pix_join_residue); + pixDestroy(&pix_join_residue); + } + // Remove any detected music. + if (pix_music_mask != nullptr && *pix_music_mask != nullptr) { + if (pixa_display != nullptr) + pixaAddPix(pixa_display, *pix_music_mask, L_CLONE); + pixSubtract(pix, pix, *pix_music_mask); + } + if (pixa_display != nullptr) + pixaAddPix(pixa_display, pix, L_CLONE); + + pixDestroy(&pix_vline); + pixDestroy(&pix_non_vline); + pixDestroy(&pix_hline); + pixDestroy(&pix_non_hline); + pixDestroy(&pix_intersections); + if (pixa_display != nullptr) { + pixaConvertToPdf(pixa_display, resolution, 1.0f, 0, 0, "LineFinding", + "vhlinefinding.pdf"); + pixaDestroy(&pixa_display); + } +} + +// Converts the Boxa array to a list of C_BLOB, getting rid of severely +// overlapping outlines and those that are children of a bigger one. +// The output is a list of C_BLOBs that are owned by the list. +// The C_OUTLINEs in the C_BLOBs contain no outline data - just empty +// bounding boxes. The Boxa is consumed and destroyed. +void LineFinder::ConvertBoxaToBlobs(int image_width, int image_height, + Boxa** boxes, C_BLOB_LIST* blobs) { + C_OUTLINE_LIST outlines; + C_OUTLINE_IT ol_it = &outlines; + // Iterate the boxes to convert to outlines. + int nboxes = boxaGetCount(*boxes); + for (int i = 0; i < nboxes; ++i) { + l_int32 x, y, width, height; + boxaGetBoxGeometry(*boxes, i, &x, &y, &width, &height); + // Make a C_OUTLINE from the leptonica box. This is a bit of a hack, + // as there is no outline, just a bounding box, but with some very + // small changes to coutln.cpp, it works nicely. + ICOORD top_left(x, y); + ICOORD bot_right(x + width, y + height); + CRACKEDGE startpt; + startpt.pos = top_left; + auto* outline = new C_OUTLINE(&startpt, top_left, bot_right, 0); + ol_it.add_after_then_move(outline); + } + // Use outlines_to_blobs to convert the outlines to blobs and find + // overlapping and contained objects. The output list of blobs in the block + // has all the bad ones filtered out and deleted. + BLOCK block; + ICOORD page_tl(0, 0); + ICOORD page_br(image_width, image_height); + outlines_to_blobs(&block, page_tl, page_br, &outlines); + // Transfer the created blobs to the output list. + C_BLOB_IT blob_it(blobs); + blob_it.add_list_after(block.blob_list()); + // The boxes aren't needed any more. + boxaDestroy(boxes); +} + +// Finds vertical line objects in pix_vline and removes the from src_pix. +// Uses the given resolution to determine size thresholds instead of any +// that may be present in the pix. +// The output vertical_x and vertical_y contain a sum of the output vectors, +// thereby giving the mean vertical direction. +// The output vectors are owned by the list and Frozen (cannot refit) by +// having no boxes, as there is no need to refit or merge separator lines. +// If no good lines are found, pix_vline is destroyed. +// None of the input pointers may be nullptr, and if *pix_vline is nullptr then +// the function does nothing. +void LineFinder::FindAndRemoveVLines(int resolution, + Pix* pix_intersections, + int* vertical_x, int* vertical_y, + Pix** pix_vline, Pix* pix_non_vline, + Pix* src_pix, TabVector_LIST* vectors) { + if (pix_vline == nullptr || *pix_vline == nullptr) return; + C_BLOB_LIST line_cblobs; + BLOBNBOX_LIST line_bblobs; + GetLineBoxes(false, *pix_vline, pix_intersections, + &line_cblobs, &line_bblobs); + int width = pixGetWidth(src_pix); + int height = pixGetHeight(src_pix); + ICOORD bleft(0, 0); + ICOORD tright(width, height); + FindLineVectors(bleft, tright, &line_bblobs, vertical_x, vertical_y, vectors); + if (!vectors->empty()) { + RemoveUnusedLineSegments(false, &line_bblobs, *pix_vline); + SubtractLinesAndResidue(*pix_vline, pix_non_vline, resolution, src_pix); + ICOORD vertical; + vertical.set_with_shrink(*vertical_x, *vertical_y); + TabVector::MergeSimilarTabVectors(vertical, vectors, nullptr); + } else { + pixDestroy(pix_vline); + } +} + +// Finds horizontal line objects in pix_hline and removes them from src_pix. +// Uses the given resolution to determine size thresholds instead of any +// that may be present in the pix. +// The output vertical_x and vertical_y contain a sum of the output vectors, +// thereby giving the mean vertical direction. +// The output vectors are owned by the list and Frozen (cannot refit) by +// having no boxes, as there is no need to refit or merge separator lines. +// If no good lines are found, pix_hline is destroyed. +// None of the input pointers may be nullptr, and if *pix_hline is nullptr then +// the function does nothing. +void LineFinder::FindAndRemoveHLines(int resolution, + Pix* pix_intersections, + int vertical_x, int vertical_y, + Pix** pix_hline, Pix* pix_non_hline, + Pix* src_pix, TabVector_LIST* vectors) { + if (pix_hline == nullptr || *pix_hline == nullptr) return; + C_BLOB_LIST line_cblobs; + BLOBNBOX_LIST line_bblobs; + GetLineBoxes(true, *pix_hline, pix_intersections, &line_cblobs, &line_bblobs); + int width = pixGetWidth(src_pix); + int height = pixGetHeight(src_pix); + ICOORD bleft(0, 0); + ICOORD tright(height, width); + FindLineVectors(bleft, tright, &line_bblobs, &vertical_x, &vertical_y, + vectors); + if (!vectors->empty()) { + RemoveUnusedLineSegments(true, &line_bblobs, *pix_hline); + SubtractLinesAndResidue(*pix_hline, pix_non_hline, resolution, src_pix); + ICOORD vertical; + vertical.set_with_shrink(vertical_x, vertical_y); + TabVector::MergeSimilarTabVectors(vertical, vectors, nullptr); + // Iterate the vectors to flip them. x and y were flipped for horizontal + // lines, so FindLineVectors can work just with the vertical case. + // See GetLineBoxes for more on the flip. + TabVector_IT h_it(vectors); + for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) { + h_it.data()->XYFlip(); + } + } else { + pixDestroy(pix_hline); + } +} + +// Finds vertical lines in the given list of BLOBNBOXes. bleft and tright +// are the bounds of the image on which the input line_bblobs were found. +// The input line_bblobs list is const really. +// The output vertical_x and vertical_y are the total of all the vectors. +// The output list of TabVector makes no reference to the input BLOBNBOXes. +void LineFinder::FindLineVectors(const ICOORD& bleft, const ICOORD& tright, + BLOBNBOX_LIST* line_bblobs, + int* vertical_x, int* vertical_y, + TabVector_LIST* vectors) { + BLOBNBOX_IT bbox_it(line_bblobs); + int b_count = 0; + // Put all the blobs into the grid to find the lines, and move the blobs + // to the output lists. + AlignedBlob blob_grid(kLineFindGridSize, bleft, tright); + for (bbox_it.mark_cycle_pt(); !bbox_it.cycled_list(); bbox_it.forward()) { + BLOBNBOX* bblob = bbox_it.data(); + bblob->set_left_tab_type(TT_MAYBE_ALIGNED); + bblob->set_left_rule(bleft.x()); + bblob->set_right_rule(tright.x()); + bblob->set_left_crossing_rule(bleft.x()); + bblob->set_right_crossing_rule(tright.x()); + blob_grid.InsertBBox(false, true, bblob); + ++b_count; + } + if (b_count == 0) + return; + + // Search the entire grid, looking for vertical line vectors. + BlobGridSearch lsearch(&blob_grid); + BLOBNBOX* bbox; + TabVector_IT vector_it(vectors); + *vertical_x = 0; + *vertical_y = 1; + lsearch.StartFullSearch(); + while ((bbox = lsearch.NextFullSearch()) != nullptr) { + if (bbox->left_tab_type() == TT_MAYBE_ALIGNED) { + const TBOX& box = bbox->bounding_box(); + if (AlignedBlob::WithinTestRegion(2, box.left(), box.bottom())) + tprintf("Finding line vector starting at bbox (%d,%d)\n", + box.left(), box.bottom()); + AlignedBlobParams align_params(*vertical_x, *vertical_y, box.width()); + TabVector* vector = blob_grid.FindVerticalAlignment(align_params, bbox, + vertical_x, + vertical_y); + if (vector != nullptr) { + vector->Freeze(); + vector_it.add_to_end(vector); + } + } + } +} + +// Returns a Pix music mask if music is detected. +// Any vertical line that has at least 5 intersections in sufficient density +// is taken to be a bar. Bars are used as a seed and the entire touching +// component is added to the output music mask and subtracted from the lines. +// Returns nullptr and does minimal work if no music is found. +static Pix* FilterMusic(int resolution, Pix* pix_closed, + Pix* pix_vline, Pix* pix_hline, + l_int32* v_empty, l_int32* h_empty) { + int max_stave_height = static_cast<int>(resolution * kMaxStaveHeight); + Pix* intersection_pix = pixAnd(nullptr, pix_vline, pix_hline); + Boxa* boxa = pixConnComp(pix_vline, nullptr, 8); + // Iterate over the boxes to find music bars. + int nboxes = boxaGetCount(boxa); + Pix* music_mask = nullptr; + for (int i = 0; i < nboxes; ++i) { + Box* box = boxaGetBox(boxa, i, L_CLONE); + l_int32 x, y, box_width, box_height; + boxGetGeometry(box, &x, &y, &box_width, &box_height); + int joins = NumTouchingIntersections(box, intersection_pix); + // Test for the join density being at least 5 per max_stave_height, + // ie (joins-1)/box_height >= (5-1)/max_stave_height. + if (joins >= 5 && (joins - 1) * max_stave_height >= 4 * box_height) { + // This is a music bar. Add to the mask. + if (music_mask == nullptr) + music_mask = pixCreate(pixGetWidth(pix_vline), pixGetHeight(pix_vline), + 1); + pixSetInRect(music_mask, box); + } + boxDestroy(&box); + } + boxaDestroy(&boxa); + pixDestroy(&intersection_pix); + if (music_mask != nullptr) { + // The mask currently contains just the bars. Use the mask as a seed + // and the pix_closed as the mask for a seedfill to get all the + // intersecting staves. + pixSeedfillBinary(music_mask, music_mask, pix_closed, 8); + // Filter out false positives. CCs in the music_mask should be the vast + // majority of the pixels in their bounding boxes, as we expect just a + // tiny amount of text, a few phrase marks, and crescendo etc left. + Boxa* boxa = pixConnComp(music_mask, nullptr, 8); + // Iterate over the boxes to find music components. + int nboxes = boxaGetCount(boxa); + for (int i = 0; i < nboxes; ++i) { + Box* box = boxaGetBox(boxa, i, L_CLONE); + Pix* rect_pix = pixClipRectangle(music_mask, box, nullptr); + l_int32 music_pixels; + pixCountPixels(rect_pix, &music_pixels, nullptr); + pixDestroy(&rect_pix); + rect_pix = pixClipRectangle(pix_closed, box, nullptr); + l_int32 all_pixels; + pixCountPixels(rect_pix, &all_pixels, nullptr); + pixDestroy(&rect_pix); + if (music_pixels < kMinMusicPixelFraction * all_pixels) { + // False positive. Delete from the music mask. + pixClearInRect(music_mask, box); + } + boxDestroy(&box); + } + l_int32 no_remaining_music; + boxaDestroy(&boxa); + pixZero(music_mask, &no_remaining_music); + if (no_remaining_music) { + pixDestroy(&music_mask); + } else { + pixSubtract(pix_vline, pix_vline, music_mask); + pixSubtract(pix_hline, pix_hline, music_mask); + // We may have deleted all the lines + pixZero(pix_vline, v_empty); + pixZero(pix_hline, h_empty); + } + } + return music_mask; +} + +// Most of the heavy lifting of line finding. Given src_pix and its separate +// resolution, returns image masks: +// pix_vline candidate vertical lines. +// pix_non_vline pixels that didn't look like vertical lines. +// pix_hline candidate horizontal lines. +// pix_non_hline pixels that didn't look like horizontal lines. +// pix_intersections pixels where vertical and horizontal lines meet. +// pix_music_mask candidate music staves. +// This function promises to initialize all the output (2nd level) pointers, +// but any of the returns that are empty will be nullptr on output. +// None of the input (1st level) pointers may be nullptr except pix_music_mask, +// which will disable music detection, and pixa_display. +void LineFinder::GetLineMasks(int resolution, Pix* src_pix, + Pix** pix_vline, Pix** pix_non_vline, + Pix** pix_hline, Pix** pix_non_hline, + Pix** pix_intersections, Pix** pix_music_mask, + Pixa* pixa_display) { + Pix* pix_closed = nullptr; + Pix* pix_hollow = nullptr; + + int max_line_width = resolution / kThinLineFraction; + int min_line_length = resolution / kMinLineLengthFraction; + if (pixa_display != nullptr) { + tprintf("Image resolution = %d, max line width = %d, min length=%d\n", + resolution, max_line_width, min_line_length); + } + int closing_brick = max_line_width / 3; + +// only use opencl if compiled w/ OpenCL and selected device is opencl +#ifdef USE_OPENCL + if (OpenclDevice::selectedDeviceIsOpenCL()) { + // OpenCL pixGetLines Operation + int clStatus = OpenclDevice::initMorphCLAllocations(pixGetWpl(src_pix), + pixGetHeight(src_pix), + src_pix); + bool getpixclosed = pix_music_mask != nullptr; + OpenclDevice::pixGetLinesCL(nullptr, src_pix, pix_vline, pix_hline, + &pix_closed, getpixclosed, closing_brick, + closing_brick, max_line_width, max_line_width, + min_line_length, min_line_length); + } else { +#endif + // Close up small holes, making it less likely that false alarms are found + // in thickened text (as it will become more solid) and also smoothing over + // some line breaks and nicks in the edges of the lines. + pix_closed = pixCloseBrick(nullptr, src_pix, closing_brick, closing_brick); + if (pixa_display != nullptr) + pixaAddPix(pixa_display, pix_closed, L_CLONE); + // Open up with a big box to detect solid areas, which can then be subtracted. + // This is very generous and will leave in even quite wide lines. + Pix* pix_solid = pixOpenBrick(nullptr, pix_closed, max_line_width, + max_line_width); + if (pixa_display != nullptr) + pixaAddPix(pixa_display, pix_solid, L_CLONE); + pix_hollow = pixSubtract(nullptr, pix_closed, pix_solid); + + pixDestroy(&pix_solid); + + // Now open up in both directions independently to find lines of at least + // 1 inch/kMinLineLengthFraction in length. + if (pixa_display != nullptr) + pixaAddPix(pixa_display, pix_hollow, L_CLONE); + *pix_vline = pixOpenBrick(nullptr, pix_hollow, 1, min_line_length); + *pix_hline = pixOpenBrick(nullptr, pix_hollow, min_line_length, 1); + + pixDestroy(&pix_hollow); +#ifdef USE_OPENCL + } +#endif + + // Lines are sufficiently rare, that it is worth checking for a zero image. + l_int32 v_empty = 0; + l_int32 h_empty = 0; + pixZero(*pix_vline, &v_empty); + pixZero(*pix_hline, &h_empty); + if (pix_music_mask != nullptr) { + if (!v_empty && !h_empty) { + *pix_music_mask = FilterMusic(resolution, pix_closed, + *pix_vline, *pix_hline, + &v_empty, &h_empty); + } else { + *pix_music_mask = nullptr; + } + } + pixDestroy(&pix_closed); + Pix* pix_nonlines = nullptr; + *pix_intersections = nullptr; + Pix* extra_non_hlines = nullptr; + if (!v_empty) { + // Subtract both line candidates from the source to get definite non-lines. + pix_nonlines = pixSubtract(nullptr, src_pix, *pix_vline); + if (!h_empty) { + pixSubtract(pix_nonlines, pix_nonlines, *pix_hline); + // Intersections are a useful indicator for likelihood of being a line. + *pix_intersections = pixAnd(nullptr, *pix_vline, *pix_hline); + // Candidate vlines are not hlines (apart from the intersections) + // and vice versa. + extra_non_hlines = pixSubtract(nullptr, *pix_vline, *pix_intersections); + } + *pix_non_vline = pixErodeBrick(nullptr, pix_nonlines, kMaxLineResidue, 1); + pixSeedfillBinary(*pix_non_vline, *pix_non_vline, pix_nonlines, 8); + if (!h_empty) { + // Candidate hlines are not vlines. + pixOr(*pix_non_vline, *pix_non_vline, *pix_hline); + pixSubtract(*pix_non_vline, *pix_non_vline, *pix_intersections); + } + if (!FilterFalsePositives(resolution, *pix_non_vline, *pix_intersections, + *pix_vline)) + pixDestroy(pix_vline); // No candidates left. + } else { + // No vertical lines. + pixDestroy(pix_vline); + *pix_non_vline = nullptr; + if (!h_empty) { + pix_nonlines = pixSubtract(nullptr, src_pix, *pix_hline); + } + } + if (h_empty) { + pixDestroy(pix_hline); + *pix_non_hline = nullptr; + if (v_empty) { + return; + } + } else { + *pix_non_hline = pixErodeBrick(nullptr, pix_nonlines, 1, kMaxLineResidue); + pixSeedfillBinary(*pix_non_hline, *pix_non_hline, pix_nonlines, 8); + if (extra_non_hlines != nullptr) { + pixOr(*pix_non_hline, *pix_non_hline, extra_non_hlines); + pixDestroy(&extra_non_hlines); + } + if (!FilterFalsePositives(resolution, *pix_non_hline, *pix_intersections, + *pix_hline)) + pixDestroy(pix_hline); // No candidates left. + } + if (pixa_display != nullptr) { + if (*pix_vline != nullptr) pixaAddPix(pixa_display, *pix_vline, L_CLONE); + if (*pix_hline != nullptr) pixaAddPix(pixa_display, *pix_hline, L_CLONE); + if (pix_nonlines != nullptr) pixaAddPix(pixa_display, pix_nonlines, L_CLONE); + if (*pix_non_vline != nullptr) + pixaAddPix(pixa_display, *pix_non_vline, L_CLONE); + if (*pix_non_hline != nullptr) + pixaAddPix(pixa_display, *pix_non_hline, L_CLONE); + if (*pix_intersections != nullptr) + pixaAddPix(pixa_display, *pix_intersections, L_CLONE); + if (pix_music_mask != nullptr && *pix_music_mask != nullptr) + pixaAddPix(pixa_display, *pix_music_mask, L_CLONE); + } + pixDestroy(&pix_nonlines); +} + +// Returns a list of boxes corresponding to the candidate line segments. Sets +// the line_crossings member of the boxes so we can later determine the number +// of intersections touched by a full line. +void LineFinder::GetLineBoxes(bool horizontal_lines, + Pix* pix_lines, Pix* pix_intersections, + C_BLOB_LIST* line_cblobs, + BLOBNBOX_LIST* line_bblobs) { + // Put a single pixel crack in every line at an arbitrary spacing, + // so they break up and the bounding boxes can be used to get the + // direction accurately enough without needing outlines. + int wpl = pixGetWpl(pix_lines); + int width = pixGetWidth(pix_lines); + int height = pixGetHeight(pix_lines); + l_uint32* data = pixGetData(pix_lines); + if (horizontal_lines) { + for (int y = 0; y < height; ++y, data += wpl) { + for (int x = kCrackSpacing; x < width; x += kCrackSpacing) { + CLEAR_DATA_BIT(data, x); + } + } + } else { + for (int y = kCrackSpacing; y < height; y += kCrackSpacing) { + memset(data + wpl * y, 0, wpl * sizeof(*data)); + } + } + // Get the individual connected components + Boxa* boxa = pixConnComp(pix_lines, nullptr, 8); + ConvertBoxaToBlobs(width, height, &boxa, line_cblobs); + // Make the BLOBNBOXes from the C_BLOBs. + C_BLOB_IT blob_it(line_cblobs); + BLOBNBOX_IT bbox_it(line_bblobs); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + C_BLOB* cblob = blob_it.data(); + auto* bblob = new BLOBNBOX(cblob); + bbox_it.add_to_end(bblob); + // Determine whether the line segment touches two intersections. + const TBOX& bbox = bblob->bounding_box(); + Box* box = boxCreate(bbox.left(), bbox.bottom(), + bbox.width(), bbox.height()); + bblob->set_line_crossings(NumTouchingIntersections(box, pix_intersections)); + boxDestroy(&box); + // Transform the bounding box prior to finding lines. To save writing + // two line finders, flip x and y for horizontal lines and re-use the + // tab-stop detection code. For vertical lines we still have to flip the + // y-coordinates to switch from leptonica coords to tesseract coords. + if (horizontal_lines) { + // Note that we have Leptonica coords stored in a Tesseract box, so that + // bbox.bottom(), being the MIN y coord, is actually the top, so to get + // back to Leptonica coords in RemoveUnusedLineSegments, we have to + // use height - box.right() as the top, which looks very odd. + TBOX new_box(height - bbox.top(), bbox.left(), + height - bbox.bottom(), bbox.right()); + bblob->set_bounding_box(new_box); + } else { + TBOX new_box(bbox.left(), height - bbox.top(), + bbox.right(), height - bbox.bottom()); + bblob->set_bounding_box(new_box); + } + } +} + +} // namespace tesseract. diff --git a/tesseract/src/textord/linefind.h b/tesseract/src/textord/linefind.h new file mode 100644 index 00000000..93b58e1f --- /dev/null +++ b/tesseract/src/textord/linefind.h @@ -0,0 +1,149 @@ +/////////////////////////////////////////////////////////////////////// +// File: linefind.h +// Description: Class to find vertical lines in an image and create +// a corresponding list of empty blobs. +// Author: Ray Smith +// Created: Thu Mar 20 09:49:01 PDT 2008 +// +// (C) Copyright 2008, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_TEXTORD_LINEFIND_H_ +#define TESSERACT_TEXTORD_LINEFIND_H_ + +struct Boxa; +struct Pix; +struct Pixa; + +namespace tesseract { + +class TabVector_LIST; +class C_BLOB_LIST; +class BLOBNBOX_LIST; +class ICOORD; + +/** + * The LineFinder class is a simple static function wrapper class that mainly + * exposes the FindVerticalLines function. + */ +class LineFinder { + public: + /** + * Finds vertical and horizontal line objects in the given pix and removes + * them. + * + * Uses the given resolution to determine size thresholds instead of any + * that may be present in the pix. + * + * The output vertical_x and vertical_y contain a sum of the output vectors, + * thereby giving the mean vertical direction. + * + * If pix_music_mask != nullptr, and music is detected, a mask of the staves + * and anything that is connected (bars, notes etc.) will be returned in + * pix_music_mask, the mask subtracted from pix, and the lines will not + * appear in v_lines or h_lines. + * + * The output vectors are owned by the list and Frozen (cannot refit) by + * having no boxes, as there is no need to refit or merge separator lines. + * + * The detected lines are removed from the pix. + */ + static void FindAndRemoveLines(int resolution, bool debug, Pix* pix, + int* vertical_x, int* vertical_y, + Pix** pix_music_mask, + TabVector_LIST* v_lines, + TabVector_LIST* h_lines); + + /** + * Converts the Boxa array to a list of C_BLOB, getting rid of severely + * overlapping outlines and those that are children of a bigger one. + * + * The output is a list of C_BLOBs that are owned by the list. + * + * The C_OUTLINEs in the C_BLOBs contain no outline data - just empty + * bounding boxes. The Boxa is consumed and destroyed. + */ + static void ConvertBoxaToBlobs(int image_width, int image_height, + Boxa** boxes, C_BLOB_LIST* blobs); + + private: + // Finds vertical line objects in pix_vline and removes them from src_pix. + // Uses the given resolution to determine size thresholds instead of any + // that may be present in the pix. + // The output vertical_x and vertical_y contain a sum of the output vectors, + // thereby giving the mean vertical direction. + // The output vectors are owned by the list and Frozen (cannot refit) by + // having no boxes, as there is no need to refit or merge separator lines. + // If no good lines are found, pix_vline is destroyed. + static void FindAndRemoveVLines(int resolution, + Pix* pix_intersections, + int* vertical_x, int* vertical_y, + Pix** pix_vline, Pix* pix_non_vline, + Pix* src_pix, TabVector_LIST* vectors); + + + // Finds horizontal line objects in pix_vline and removes them from src_pix. + // Uses the given resolution to determine size thresholds instead of any + // that may be present in the pix. + // The output vertical_x and vertical_y contain a sum of the output vectors, + // thereby giving the mean vertical direction. + // The output vectors are owned by the list and Frozen (cannot refit) by + // having no boxes, as there is no need to refit or merge separator lines. + // If no good lines are found, pix_hline is destroyed. + static void FindAndRemoveHLines(int resolution, + Pix* pix_intersections, + int vertical_x, int vertical_y, + Pix** pix_hline, Pix* pix_non_hline, + Pix* src_pix, TabVector_LIST* vectors); + + // Finds vertical lines in the given list of BLOBNBOXes. bleft and tright + // are the bounds of the image on which the input line_bblobs were found. + // The input line_bblobs list is const really. + // The output vertical_x and vertical_y are the total of all the vectors. + // The output list of TabVector makes no reference to the input BLOBNBOXes. + static void FindLineVectors(const ICOORD& bleft, const ICOORD& tright, + BLOBNBOX_LIST* line_bblobs, + int* vertical_x, int* vertical_y, + TabVector_LIST* vectors); + + // Most of the heavy lifting of line finding. Given src_pix and its separate + // resolution, returns image masks: + // Returns image masks: + // pix_vline candidate vertical lines. + // pix_non_vline pixels that didn't look like vertical lines. + // pix_hline candidate horizontal lines. + // pix_non_hline pixels that didn't look like horizontal lines. + // pix_intersections pixels where vertical and horizontal lines meet. + // pix_music_mask candidate music staves. + // This function promises to initialize all the output (2nd level) pointers, + // but any of the returns that are empty will be nullptr on output. + // None of the input (1st level) pointers may be nullptr except pix_music_mask, + // which will disable music detection, and pixa_display, which is for debug. + static void GetLineMasks(int resolution, Pix* src_pix, + Pix** pix_vline, Pix** pix_non_vline, + Pix** pix_hline, Pix** pix_non_hline, + Pix** pix_intersections, Pix** pix_music_mask, + Pixa* pixa_display); + + // Returns a list of boxes corresponding to the candidate line segments. Sets + // the line_crossings member of the boxes so we can later determine the number + // of intersections touched by a full line. + static void GetLineBoxes(bool horizontal_lines, + Pix* pix_lines, Pix* pix_intersections, + C_BLOB_LIST* line_cblobs, + BLOBNBOX_LIST* line_bblobs); +}; + +} // namespace tesseract. + +#endif // TESSERACT_TEXTORD_LINEFIND_H_ diff --git a/tesseract/src/textord/makerow.cpp b/tesseract/src/textord/makerow.cpp new file mode 100644 index 00000000..0df8243a --- /dev/null +++ b/tesseract/src/textord/makerow.cpp @@ -0,0 +1,2673 @@ +/********************************************************************** + * File: makerow.cpp (Formerly makerows.c) + * Description: Code to arrange blobs into rows of text. + * Author: Ray Smith + * + * (C) Copyright 1992, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + + // Include automatically generated configuration file if running autoconf. +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#include "makerow.h" + +#include "blobbox.h" +#include "ccstruct.h" +#include "detlinefit.h" +#include "statistc.h" +#include "drawtord.h" +#include "blkocc.h" +#include "sortflts.h" +#include "oldbasel.h" +#include "textord.h" +#include "tordmain.h" +#include "underlin.h" +#include "tprintf.h" +#include "tovars.h" + +#include <algorithm> +#include <vector> // for std::vector + +namespace tesseract { + +BOOL_VAR(textord_heavy_nr, false, "Vigorously remove noise"); +BOOL_VAR(textord_show_initial_rows, false, "Display row accumulation"); +BOOL_VAR(textord_show_parallel_rows, false, "Display page correlated rows"); +BOOL_VAR(textord_show_expanded_rows, false, "Display rows after expanding"); +BOOL_VAR(textord_show_final_rows, false, "Display rows after final fitting"); +BOOL_VAR(textord_show_final_blobs, false, "Display blob bounds after pre-ass"); +BOOL_VAR(textord_test_landscape, false, "Tests refer to land/port"); +BOOL_VAR(textord_parallel_baselines, true, "Force parallel baselines"); +BOOL_VAR(textord_straight_baselines, false, "Force straight baselines"); +BOOL_VAR(textord_old_baselines, true, "Use old baseline algorithm"); +BOOL_VAR(textord_old_xheight, false, "Use old xheight algorithm"); +BOOL_VAR(textord_fix_xheight_bug, true, "Use spline baseline"); +BOOL_VAR(textord_fix_makerow_bug, true, "Prevent multiple baselines"); +BOOL_VAR(textord_debug_xheights, false, "Test xheight algorithms"); +static BOOL_VAR(textord_biased_skewcalc, true, "Bias skew estimates with line length"); +static BOOL_VAR(textord_interpolating_skew, true, "Interpolate across gaps"); +static INT_VAR(textord_skewsmooth_offset, 4, "For smooth factor"); +static INT_VAR(textord_skewsmooth_offset2, 1, "For smooth factor"); +INT_VAR(textord_test_x, -INT32_MAX, "coord of test pt"); +INT_VAR(textord_test_y, -INT32_MAX, "coord of test pt"); +INT_VAR(textord_min_blobs_in_row, 4, "Min blobs before gradient counted"); +INT_VAR(textord_spline_minblobs, 8, "Min blobs in each spline segment"); +INT_VAR(textord_spline_medianwin, 6, "Size of window for spline segmentation"); +static INT_VAR(textord_max_blob_overlaps, 4, + "Max number of blobs a big blob can overlap"); +INT_VAR(textord_min_xheight, 10, "Min credible pixel xheight"); +double_VAR(textord_spline_shift_fraction, 0.02, + "Fraction of line spacing for quad"); +double_VAR(textord_spline_outlier_fraction, 0.1, + "Fraction of line spacing for outlier"); +double_VAR(textord_skew_ile, 0.5, "Ile of gradients for page skew"); +double_VAR(textord_skew_lag, 0.02, "Lag for skew on row accumulation"); +double_VAR(textord_linespace_iqrlimit, 0.2, "Max iqr/median for linespace"); +double_VAR(textord_width_limit, 8, "Max width of blobs to make rows"); +double_VAR(textord_chop_width, 1.5, "Max width before chopping"); +static double_VAR(textord_expansion_factor, 1.0, + "Factor to expand rows by in expand_rows"); +static double_VAR(textord_overlap_x, 0.375, "Fraction of linespace for good overlap"); +double_VAR(textord_minxh, 0.25, "fraction of linesize for min xheight"); +double_VAR(textord_min_linesize, 1.25, "* blob height for initial linesize"); +double_VAR(textord_excess_blobsize, 1.3, + "New row made if blob makes row this big"); +double_VAR(textord_occupancy_threshold, 0.4, "Fraction of neighbourhood"); +double_VAR(textord_underline_width, 2.0, "Multiple of line_size for underline"); +double_VAR(textord_min_blob_height_fraction, 0.75, + "Min blob height/top to include blob top into xheight stats"); +double_VAR(textord_xheight_mode_fraction, 0.4, + "Min pile height to make xheight"); +double_VAR(textord_ascheight_mode_fraction, 0.08, + "Min pile height to make ascheight"); +static double_VAR(textord_descheight_mode_fraction, 0.08, + "Min pile height to make descheight"); +double_VAR(textord_ascx_ratio_min, 1.25, "Min cap/xheight"); +double_VAR(textord_ascx_ratio_max, 1.8, "Max cap/xheight"); +double_VAR(textord_descx_ratio_min, 0.25, "Min desc/xheight"); +double_VAR(textord_descx_ratio_max, 0.6, "Max desc/xheight"); +double_VAR(textord_xheight_error_margin, 0.1, "Accepted variation"); +INT_VAR(textord_lms_line_trials, 12, "Number of linew fits to do"); +BOOL_VAR(textord_new_initial_xheight, true, "Use test xheight mechanism"); +BOOL_VAR(textord_debug_blob, false, "Print test blob information"); + +#define MAX_HEIGHT_MODES 12 + +const int kMinLeaderCount = 5; + +// Factored-out helper to build a single row from a list of blobs. +// Returns the mean blob size. +static float MakeRowFromBlobs(float line_size, + BLOBNBOX_IT* blob_it, TO_ROW_IT* row_it) { + blob_it->sort(blob_x_order); + blob_it->move_to_first(); + TO_ROW* row = nullptr; + float total_size = 0.0f; + int blob_count = 0; + // Add all the blobs to a single TO_ROW. + for (; !blob_it->empty(); blob_it->forward()) { + BLOBNBOX* blob = blob_it->extract(); + int top = blob->bounding_box().top(); + int bottom = blob->bounding_box().bottom(); + if (row == nullptr) { + row = new TO_ROW(blob, top, bottom, line_size); + row_it->add_before_then_move(row); + } else { + row->add_blob(blob, top, bottom, line_size); + } + total_size += top - bottom; + ++blob_count; + } + return blob_count > 0 ? total_size / blob_count : total_size; +} + +// Helper to make a row using the children of a single blob. +// Returns the mean size of the blobs created. +static float MakeRowFromSubBlobs(TO_BLOCK* block, C_BLOB* blob, + TO_ROW_IT* row_it) { + // The blobs made from the children will go in the small_blobs list. + BLOBNBOX_IT bb_it(&block->small_blobs); + C_OUTLINE_IT ol_it(blob->out_list()); + // Get the children. + ol_it.set_to_list(ol_it.data()->child()); + if (ol_it.empty()) + return 0.0f; + for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) { + // Deep copy the child outline and use that to make a blob. + blob = new C_BLOB(C_OUTLINE::deep_copy(ol_it.data())); + // Correct direction as needed. + blob->CheckInverseFlagAndDirection(); + auto* bbox = new BLOBNBOX(blob); + bb_it.add_after_then_move(bbox); + } + // Now we can make a row from the blobs. + return MakeRowFromBlobs(block->line_size, &bb_it, row_it); +} + +/** + * @name make_single_row + * + * Arrange the blobs into a single row... well actually, if there is + * only a single blob, it makes 2 rows, in case the top-level blob + * is a container of the real blobs to recognize. + */ +float make_single_row(ICOORD page_tr, bool allow_sub_blobs, + TO_BLOCK* block, TO_BLOCK_LIST* blocks) { + BLOBNBOX_IT blob_it = &block->blobs; + TO_ROW_IT row_it = block->get_rows(); + + // Include all the small blobs and large blobs. + blob_it.add_list_after(&block->small_blobs); + blob_it.add_list_after(&block->noise_blobs); + blob_it.add_list_after(&block->large_blobs); + if (block->blobs.singleton() && allow_sub_blobs) { + blob_it.move_to_first(); + float size = MakeRowFromSubBlobs(block, blob_it.data()->cblob(), &row_it); + if (size > block->line_size) + block->line_size = size; + } else if (block->blobs.empty()) { + // Make a fake blob. + C_BLOB* blob = C_BLOB::FakeBlob(block->block->pdblk.bounding_box()); + // The blobnbox owns the blob. + auto* bblob = new BLOBNBOX(blob); + blob_it.add_after_then_move(bblob); + } + MakeRowFromBlobs(block->line_size, &blob_it, &row_it); + // Fit an LMS line to the rows. + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) + fit_lms_line(row_it.data()); + float gradient; + float fit_error; + // Compute the skew based on the fitted line. + compute_page_skew(blocks, gradient, fit_error); + return gradient; +} + +/** + * @name make_rows + * + * Arrange the blobs into rows. + */ +float make_rows(ICOORD page_tr, TO_BLOCK_LIST *port_blocks) { + float port_m; // global skew + float port_err; // global noise + TO_BLOCK_IT block_it; // iterator + + block_it.set_to_list(port_blocks); + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); + block_it.forward()) + make_initial_textrows(page_tr, block_it.data(), FCOORD(1.0f, 0.0f), + !textord_test_landscape); + // compute globally + compute_page_skew(port_blocks, port_m, port_err); + block_it.set_to_list(port_blocks); + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { + cleanup_rows_making(page_tr, block_it.data(), port_m, FCOORD(1.0f, 0.0f), + block_it.data()->block->pdblk.bounding_box().left(), + !textord_test_landscape); + } + return port_m; // global skew +} + +/** + * @name make_initial_textrows + * + * Arrange the good blobs into rows of text. + */ +void make_initial_textrows( //find lines + ICOORD page_tr, + TO_BLOCK* block, //block to do + FCOORD rotation, //for drawing + bool testing_on //correct orientation +) { + TO_ROW_IT row_it = block->get_rows (); + +#ifndef GRAPHICS_DISABLED + ScrollView::Color colour; //of row + + if (textord_show_initial_rows && testing_on) { + if (to_win == nullptr) + create_to_win(page_tr); + } +#endif + //guess skew + assign_blobs_to_rows (block, nullptr, 0, true, true, textord_show_initial_rows && testing_on); + row_it.move_to_first (); + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) + fit_lms_line (row_it.data ()); +#ifndef GRAPHICS_DISABLED + if (textord_show_initial_rows && testing_on) { + colour = ScrollView::RED; + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { + plot_to_row (row_it.data (), colour, rotation); + colour = static_cast<ScrollView::Color>(colour + 1); + if (colour > ScrollView::MAGENTA) + colour = ScrollView::RED; + } + } +#endif +} + + +/** + * @name fit_lms_line + * + * Fit an LMS line to a row. + */ +void fit_lms_line(TO_ROW *row) { + float m, c; // fitted line + tesseract::DetLineFit lms; + BLOBNBOX_IT blob_it = row->blob_list(); + + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + const TBOX& box = blob_it.data()->bounding_box(); + lms.Add(ICOORD((box.left() + box.right()) / 2, box.bottom())); + } + double error = lms.Fit(&m, &c); + row->set_line(m, c, error); +} + + +/** + * @name compute_page_skew + * + * Compute the skew over a full page by averaging the gradients over + * all the lines. Get the error of the same row. + */ +void compute_page_skew( //get average gradient + TO_BLOCK_LIST *blocks, //list of blocks + float &page_m, //average gradient + float &page_err //average error + ) { + int32_t row_count; //total rows + int32_t blob_count; //total_blobs + int32_t row_err; //integer error + int32_t row_index; //of total + TO_ROW *row; //current row + TO_BLOCK_IT block_it = blocks; //iterator + + row_count = 0; + blob_count = 0; + for (block_it.mark_cycle_pt (); !block_it.cycled_list (); + block_it.forward ()) { + POLY_BLOCK* pb = block_it.data()->block->pdblk.poly_block(); + if (pb != nullptr && !pb->IsText()) + continue; // Pretend non-text blocks don't exist. + row_count += block_it.data ()->get_rows ()->length (); + //count up rows + TO_ROW_IT row_it(block_it.data()->get_rows()); + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) + blob_count += row_it.data ()->blob_list ()->length (); + } + if (row_count == 0) { + page_m = 0.0f; + page_err = 0.0f; + return; + } + // of rows + std::vector<float> gradients(blob_count); + // of rows + std::vector<float> errors(blob_count); + + row_index = 0; + for (block_it.mark_cycle_pt (); !block_it.cycled_list (); + block_it.forward ()) { + POLY_BLOCK* pb = block_it.data()->block->pdblk.poly_block(); + if (pb != nullptr && !pb->IsText()) + continue; // Pretend non-text blocks don't exist. + TO_ROW_IT row_it(block_it.data ()->get_rows()); + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { + row = row_it.data (); + blob_count = row->blob_list ()->length (); + row_err = static_cast<int32_t>(ceil (row->line_error ())); + if (row_err <= 0) + row_err = 1; + if (textord_biased_skewcalc) { + blob_count /= row_err; + for (blob_count /= row_err; blob_count > 0; blob_count--) { + gradients[row_index] = row->line_m (); + errors[row_index] = row->line_error (); + row_index++; + } + } + else if (blob_count >= textord_min_blobs_in_row) { + //get gradient + gradients[row_index] = row->line_m (); + errors[row_index] = row->line_error (); + row_index++; + } + } + } + if (row_index == 0) { + //desperate + for (block_it.mark_cycle_pt (); !block_it.cycled_list (); + block_it.forward ()) { + POLY_BLOCK* pb = block_it.data()->block->pdblk.poly_block(); + if (pb != nullptr && !pb->IsText()) + continue; // Pretend non-text blocks don't exist. + TO_ROW_IT row_it(block_it.data()->get_rows()); + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); + row_it.forward ()) { + row = row_it.data (); + gradients[row_index] = row->line_m (); + errors[row_index] = row->line_error (); + row_index++; + } + } + } + row_count = row_index; + row_index = choose_nth_item (static_cast<int32_t>(row_count * textord_skew_ile), + &gradients[0], row_count); + page_m = gradients[row_index]; + row_index = choose_nth_item (static_cast<int32_t>(row_count * textord_skew_ile), + &errors[0], row_count); + page_err = errors[row_index]; +} + +const double kNoiseSize = 0.5; // Fraction of xheight. +const int kMinSize = 8; // Min pixels to be xheight. + +/** + * Return true if the dot looks like it is part of the i. + * Doesn't work for any other diacritical. + */ +static bool dot_of_i(BLOBNBOX* dot, BLOBNBOX* i, TO_ROW* row) { + const TBOX& ibox = i->bounding_box(); + const TBOX& dotbox = dot->bounding_box(); + + // Must overlap horizontally by enough and be high enough. + int overlap = std::min(dotbox.right(), ibox.right()) - + std::max(dotbox.left(), ibox.left()); + if (ibox.height() <= 2 * dotbox.height() || + (overlap * 2 < ibox.width() && overlap < dotbox.width())) + return false; + + // If the i is tall and thin then it is good. + if (ibox.height() > ibox.width() * 2) + return true; // The i or ! must be tall and thin. + + // It might still be tall and thin, but it might be joined to something. + // So search the outline for a piece of large height close to the edges + // of the dot. + const double kHeightFraction = 0.6; + double target_height = std::min(dotbox.bottom(), ibox.top()); + target_height -= row->line_m()*dotbox.left() + row->line_c(); + target_height *= kHeightFraction; + int left_min = dotbox.left() - dotbox.width(); + int middle = (dotbox.left() + dotbox.right())/2; + int right_max = dotbox.right() + dotbox.width(); + int left_miny = 0; + int left_maxy = 0; + int right_miny = 0; + int right_maxy = 0; + bool found_left = false; + bool found_right = false; + bool in_left = false; + bool in_right = false; + C_BLOB* blob = i->cblob(); + C_OUTLINE_IT o_it = blob->out_list(); + for (o_it.mark_cycle_pt(); !o_it.cycled_list(); o_it.forward()) { + C_OUTLINE* outline = o_it.data(); + int length = outline->pathlength(); + ICOORD pos = outline->start_pos(); + for (int step = 0; step < length; pos += outline->step(step++)) { + int x = pos.x(); + int y = pos.y(); + if (x >= left_min && x < middle && !found_left) { + // We are in the left part so find min and max y. + if (in_left) { + if (y > left_maxy) left_maxy = y; + if (y < left_miny) left_miny = y; + } else { + left_maxy = left_miny = y; + in_left = true; + } + } else if (in_left) { + // We just left the left so look for size. + if (left_maxy - left_miny > target_height) { + if (found_right) + return true; + found_left = true; + } + in_left = false; + } + if (x <= right_max && x > middle && !found_right) { + // We are in the right part so find min and max y. + if (in_right) { + if (y > right_maxy) right_maxy = y; + if (y < right_miny) right_miny = y; + } else { + right_maxy = right_miny = y; + in_right = true; + } + } else if (in_right) { + // We just left the right so look for size. + if (right_maxy - right_miny > target_height) { + if (found_left) + return true; + found_right = true; + } + in_right = false; + } + } + } + return false; +} + +void vigorous_noise_removal(TO_BLOCK* block) { + TO_ROW_IT row_it = block->get_rows (); + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { + TO_ROW* row = row_it.data(); + BLOBNBOX_IT b_it = row->blob_list(); + // Estimate the xheight on the row. + int max_height = 0; + for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { + BLOBNBOX* blob = b_it.data(); + if (blob->bounding_box().height() > max_height) + max_height = blob->bounding_box().height(); + } + STATS hstats(0, max_height + 1); + for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { + BLOBNBOX* blob = b_it.data(); + int height = blob->bounding_box().height(); + if (height >= kMinSize) + hstats.add(blob->bounding_box().height(), 1); + } + float xheight = hstats.median(); + // Delete small objects. + BLOBNBOX* prev = nullptr; + for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { + BLOBNBOX* blob = b_it.data(); + const TBOX& box = blob->bounding_box(); + if (box.height() < kNoiseSize * xheight) { + // Small so delete unless it looks like an i dot. + if (prev != nullptr) { + if (dot_of_i(blob, prev, row)) + continue; // Looks OK. + } + if (!b_it.at_last()) { + BLOBNBOX* next = b_it.data_relative(1); + if (dot_of_i(blob, next, row)) + continue; // Looks OK. + } + // It might be noise so get rid of it. + delete blob->cblob(); + delete b_it.extract(); + } else { + prev = blob; + } + } + } +} + +/** + * cleanup_rows_making + * + * Remove overlapping rows and fit all the blobs to what's left. + */ +void cleanup_rows_making( //find lines + ICOORD page_tr, //top right + TO_BLOCK* block, //block to do + float gradient, //gradient to fit + FCOORD rotation, //for drawing + int32_t block_edge, //edge of block + bool testing_on //correct orientation +) { + //iterators + BLOBNBOX_IT blob_it = &block->blobs; + TO_ROW_IT row_it = block->get_rows (); + +#ifndef GRAPHICS_DISABLED + if (textord_show_parallel_rows && testing_on) { + if (to_win == nullptr) + create_to_win(page_tr); + } +#endif + //get row coords + fit_parallel_rows(block, + gradient, + rotation, + block_edge, + textord_show_parallel_rows && testing_on); + delete_non_dropout_rows(block, + gradient, + rotation, + block_edge, + textord_show_parallel_rows && testing_on); + expand_rows(page_tr, block, gradient, rotation, block_edge, testing_on); + blob_it.set_to_list (&block->blobs); + row_it.set_to_list (block->get_rows ()); + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) + blob_it.add_list_after (row_it.data ()->blob_list ()); + //give blobs back + assign_blobs_to_rows (block, &gradient, 1, false, false, false); + //now new rows must be genuine + blob_it.set_to_list (&block->blobs); + blob_it.add_list_after (&block->large_blobs); + assign_blobs_to_rows (block, &gradient, 2, true, true, false); + //safe to use big ones now + blob_it.set_to_list (&block->blobs); + //throw all blobs in + blob_it.add_list_after (&block->noise_blobs); + blob_it.add_list_after (&block->small_blobs); + assign_blobs_to_rows (block, &gradient, 3, false, false, false); +} + +/** + * delete_non_dropout_rows + * + * Compute the linespacing and offset. + */ +void delete_non_dropout_rows( //find lines + TO_BLOCK* block, //block to do + float gradient, //global skew + FCOORD rotation, //deskew vector + int32_t block_edge, //left edge + bool testing_on //correct orientation +) { + TBOX block_box; //deskewed block + int32_t max_y; //in block + int32_t min_y; + int32_t line_index; //of scan line + int32_t line_count; //no of scan lines + int32_t distance; //to drop-out + int32_t xleft; //of block + int32_t ybottom; //of block + TO_ROW *row; //current row + TO_ROW_IT row_it = block->get_rows (); + BLOBNBOX_IT blob_it = &block->blobs; + + if (row_it.length () == 0) + return; //empty block + block_box = deskew_block_coords (block, gradient); + xleft = block->block->pdblk.bounding_box ().left (); + ybottom = block->block->pdblk.bounding_box ().bottom (); + min_y = block_box.bottom () - 1; + max_y = block_box.top () + 1; + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { + line_index = static_cast<int32_t>(floor (row_it.data ()->intercept ())); + if (line_index <= min_y) + min_y = line_index - 1; + if (line_index >= max_y) + max_y = line_index + 1; + } + line_count = max_y - min_y + 1; + if (line_count <= 0) + return; //empty block + // change in occupation + std::vector<int32_t> deltas(line_count); + // of pixel coords + std::vector<int32_t> occupation(line_count); + + compute_line_occupation(block, gradient, min_y, max_y, &occupation[0], &deltas[0]); + compute_occupation_threshold (static_cast<int32_t>(ceil (block->line_spacing * + (tesseract::CCStruct::kDescenderFraction + + tesseract::CCStruct::kAscenderFraction))), + static_cast<int32_t>(ceil (block->line_spacing * + (tesseract::CCStruct::kXHeightFraction + + tesseract::CCStruct::kAscenderFraction))), + max_y - min_y + 1, &occupation[0], &deltas[0]); +#ifndef GRAPHICS_DISABLED + if (testing_on) { + draw_occupation(xleft, ybottom, min_y, max_y, &occupation[0], &deltas[0]); + } +#endif + compute_dropout_distances(&occupation[0], &deltas[0], line_count); + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { + row = row_it.data (); + line_index = static_cast<int32_t>(floor (row->intercept ())); + distance = deltas[line_index - min_y]; + if (find_best_dropout_row (row, distance, block->line_spacing / 2, + line_index, &row_it, testing_on)) { +#ifndef GRAPHICS_DISABLED + if (testing_on) + plot_parallel_row(row, gradient, block_edge, + ScrollView::WHITE, rotation); +#endif + blob_it.add_list_after (row_it.data ()->blob_list ()); + delete row_it.extract (); //too far away + } + } + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { + blob_it.add_list_after (row_it.data ()->blob_list ()); + } +} + + +/** + * @name find_best_dropout_row + * + * Delete this row if it has a neighbour with better dropout characteristics. + * true is returned if the row should be deleted. + */ +bool find_best_dropout_row( //find neighbours + TO_ROW* row, //row to test + int32_t distance, //dropout dist + float dist_limit, //threshold distance + int32_t line_index, //index of row + TO_ROW_IT* row_it, //current position + bool testing_on //correct orientation +) { + int32_t next_index; // of neighbouring row + int32_t row_offset; //from current row + int32_t abs_dist; //absolute distance + int8_t row_inc; //increment to row_index + TO_ROW *next_row; //nextious row + + if (testing_on) + tprintf ("Row at %g(%g), dropout dist=%d,", + row->intercept (), row->parallel_c (), distance); + if (distance < 0) { + row_inc = 1; + abs_dist = -distance; + } + else { + row_inc = -1; + abs_dist = distance; + } + if (abs_dist > dist_limit) { + if (testing_on) { + tprintf (" too far - deleting\n"); + } + return true; + } + if ((distance < 0 && !row_it->at_last ()) + || (distance >= 0 && !row_it->at_first ())) { + row_offset = row_inc; + do { + next_row = row_it->data_relative (row_offset); + next_index = static_cast<int32_t>(floor (next_row->intercept ())); + if ((distance < 0 + && next_index < line_index + && next_index > line_index + distance + distance) + || (distance >= 0 + && next_index > line_index + && next_index < line_index + distance + distance)) { + if (testing_on) { + tprintf (" nearer neighbour (%d) at %g\n", + line_index + distance - next_index, + next_row->intercept ()); + } + return true; //other is nearer + } + else if (next_index == line_index + || next_index == line_index + distance + distance) { + if (row->believability () <= next_row->believability ()) { + if (testing_on) { + tprintf (" equal but more believable at %g (%g/%g)\n", + next_row->intercept (), + row->believability (), + next_row->believability ()); + } + return true; //other is more believable + } + } + row_offset += row_inc; + } + while ((next_index == line_index + || next_index == line_index + distance + distance) + && row_offset < row_it->length ()); + if (testing_on) + tprintf (" keeping\n"); + } + return false; +} + + +/** + * @name deskew_block_coords + * + * Compute the bounding box of all the blobs in the block + * if they were deskewed without actually doing it. + */ +TBOX deskew_block_coords( //block box + TO_BLOCK *block, //block to do + float gradient //global skew + ) { + TBOX result; //block bounds + TBOX blob_box; //of block + FCOORD rotation; //deskew vector + float length; //of gradient vector + TO_ROW_IT row_it = block->get_rows (); + TO_ROW *row; //current row + BLOBNBOX *blob; //current blob + BLOBNBOX_IT blob_it; //iterator + + length = sqrt (gradient * gradient + 1); + rotation = FCOORD (1 / length, -gradient / length); + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { + row = row_it.data (); + blob_it.set_to_list (row->blob_list ()); + for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); + blob_it.forward ()) { + blob = blob_it.data (); + blob_box = blob->bounding_box (); + blob_box.rotate (rotation);//de-skew it + result += blob_box; + } + } + return result; +} + + +/** + * @name compute_line_occupation + * + * Compute the pixel projection back on the y axis given the global + * skew. Also compute the 1st derivative. + */ +void compute_line_occupation( //project blobs + TO_BLOCK *block, //block to do + float gradient, //global skew + int32_t min_y, //min coord in block + int32_t max_y, //in block + int32_t *occupation, //output projection + int32_t *deltas //derivative + ) { + int32_t line_count; //maxy-miny+1 + int32_t line_index; //of scan line + int index; //array index for daft compilers + TO_ROW *row; //current row + TO_ROW_IT row_it = block->get_rows (); + BLOBNBOX *blob; //current blob + BLOBNBOX_IT blob_it; //iterator + float length; //of skew vector + TBOX blob_box; //bounding box + FCOORD rotation; //inverse of skew + + line_count = max_y - min_y + 1; + length = sqrt (gradient * gradient + 1); + rotation = FCOORD (1 / length, -gradient / length); + for (line_index = 0; line_index < line_count; line_index++) + deltas[line_index] = 0; + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { + row = row_it.data (); + blob_it.set_to_list (row->blob_list ()); + for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); + blob_it.forward ()) { + blob = blob_it.data (); + blob_box = blob->bounding_box (); + blob_box.rotate (rotation);//de-skew it + int32_t width = blob_box.right() - blob_box.left(); + index = blob_box.bottom() - min_y; + ASSERT_HOST(index >= 0 && index < line_count); + // count transitions + deltas[index] += width; + index = blob_box.top() - min_y; + ASSERT_HOST(index >= 0 && index < line_count); + deltas[index] -= width; + } + } + occupation[0] = deltas[0]; + for (line_index = 1; line_index < line_count; line_index++) + occupation[line_index] = occupation[line_index - 1] + deltas[line_index]; +} + + +/** + * compute_occupation_threshold + * + * Compute thresholds for textline or not for the occupation array. + */ +void compute_occupation_threshold( //project blobs + int32_t low_window, //below result point + int32_t high_window, //above result point + int32_t line_count, //array sizes + int32_t *occupation, //input projection + int32_t *thresholds //output thresholds + ) { + int32_t line_index; //of thresholds line + int32_t low_index; //in occupation + int32_t high_index; //in occupation + int32_t sum; //current average + int32_t divisor; //to get thresholds + int32_t min_index; //of min occ + int32_t min_occ; //min in locality + int32_t test_index; //for finding min + + divisor = + static_cast<int32_t>(ceil ((low_window + high_window) / textord_occupancy_threshold)); + if (low_window + high_window < line_count) { + for (sum = 0, high_index = 0; high_index < low_window; high_index++) + sum += occupation[high_index]; + for (low_index = 0; low_index < high_window; low_index++, high_index++) + sum += occupation[high_index]; + min_occ = occupation[0]; + min_index = 0; + for (test_index = 1; test_index < high_index; test_index++) { + if (occupation[test_index] <= min_occ) { + min_occ = occupation[test_index]; + min_index = test_index; //find min in region + } + } + for (line_index = 0; line_index < low_window; line_index++) + thresholds[line_index] = (sum - min_occ) / divisor + min_occ; + //same out to end + for (low_index = 0; high_index < line_count; low_index++, high_index++) { + sum -= occupation[low_index]; + sum += occupation[high_index]; + if (occupation[high_index] <= min_occ) { + //find min in region + min_occ = occupation[high_index]; + min_index = high_index; + } + //lost min from region + if (min_index <= low_index) { + min_occ = occupation[low_index + 1]; + min_index = low_index + 1; + for (test_index = low_index + 2; test_index <= high_index; + test_index++) { + if (occupation[test_index] <= min_occ) { + min_occ = occupation[test_index]; + //find min in region + min_index = test_index; + } + } + } + thresholds[line_index++] = (sum - min_occ) / divisor + min_occ; + } + } + else { + min_occ = occupation[0]; + min_index = 0; + for (sum = 0, low_index = 0; low_index < line_count; low_index++) { + if (occupation[low_index] < min_occ) { + min_occ = occupation[low_index]; + min_index = low_index; + } + sum += occupation[low_index]; + } + line_index = 0; + } + for (; line_index < line_count; line_index++) + thresholds[line_index] = (sum - min_occ) / divisor + min_occ; + //same out to end +} + + +/** + * @name compute_dropout_distances + * + * Compute the distance from each coordinate to the nearest dropout. + */ +void compute_dropout_distances( //project blobs + int32_t *occupation, //input projection + int32_t *thresholds, //output thresholds + int32_t line_count //array sizes + ) { + int32_t line_index; //of thresholds line + int32_t distance; //from prev dropout + int32_t next_dist; //to next dropout + int32_t back_index; //for back filling + int32_t prev_threshold; //before overwrite + + distance = -line_count; + line_index = 0; + do { + do { + distance--; + prev_threshold = thresholds[line_index]; + //distance from prev + thresholds[line_index] = distance; + line_index++; + } + while (line_index < line_count + && (occupation[line_index] < thresholds[line_index] + || occupation[line_index - 1] >= prev_threshold)); + if (line_index < line_count) { + back_index = line_index - 1; + next_dist = 1; + while (next_dist < -distance && back_index >= 0) { + thresholds[back_index] = next_dist; + back_index--; + next_dist++; + distance++; + } + distance = 1; + } + } + while (line_index < line_count); +} + + +/** + * @name expand_rows + * + * Expand each row to the least of its allowed size and touching its + * neighbours. If the expansion would entirely swallow a neighbouring row + * then do so. + */ +void expand_rows( //find lines + ICOORD page_tr, //top right + TO_BLOCK* block, //block to do + float gradient, //gradient to fit + FCOORD rotation, //for drawing + int32_t block_edge, //edge of block + bool testing_on //correct orientation +) { + bool swallowed_row; //eaten a neighbour + float y_max, y_min; //new row limits + float y_bottom, y_top; //allowed limits + TO_ROW *test_row; //next row + TO_ROW *row; //current row + //iterators + BLOBNBOX_IT blob_it = &block->blobs; + TO_ROW_IT row_it = block->get_rows (); + +#ifndef GRAPHICS_DISABLED + if (textord_show_expanded_rows && testing_on) { + if (to_win == nullptr) + create_to_win(page_tr); + } +#endif + + adjust_row_limits(block); //shift min,max. + if (textord_new_initial_xheight) { + if (block->get_rows ()->length () == 0) + return; + compute_row_stats(block, textord_show_expanded_rows && testing_on); + } + assign_blobs_to_rows (block, &gradient, 4, true, false, false); + //get real membership + if (block->get_rows ()->length () == 0) + return; + fit_parallel_rows(block, + gradient, + rotation, + block_edge, + textord_show_expanded_rows && testing_on); + if (!textord_new_initial_xheight) + compute_row_stats(block, textord_show_expanded_rows && testing_on); + row_it.move_to_last (); + do { + row = row_it.data (); + y_max = row->max_y (); //get current limits + y_min = row->min_y (); + y_bottom = row->intercept () - block->line_size * textord_expansion_factor * + tesseract::CCStruct::kDescenderFraction; + y_top = row->intercept () + block->line_size * textord_expansion_factor * + (tesseract::CCStruct::kXHeightFraction + + tesseract::CCStruct::kAscenderFraction); + if (y_min > y_bottom) { //expansion allowed + if (textord_show_expanded_rows && testing_on) + tprintf("Expanding bottom of row at %f from %f to %f\n", + row->intercept(), y_min, y_bottom); + //expandable + swallowed_row = true; + while (swallowed_row && !row_it.at_last ()) { + swallowed_row = false; + //get next one + test_row = row_it.data_relative (1); + //overlaps space + if (test_row->max_y () > y_bottom) { + if (test_row->min_y () > y_bottom) { + if (textord_show_expanded_rows && testing_on) + tprintf("Eating row below at %f\n", test_row->intercept()); + row_it.forward (); +#ifndef GRAPHICS_DISABLED + if (textord_show_expanded_rows && testing_on) + plot_parallel_row(test_row, + gradient, + block_edge, + ScrollView::WHITE, + rotation); +#endif + blob_it.set_to_list (row->blob_list ()); + blob_it.add_list_after (test_row->blob_list ()); + //swallow complete row + delete row_it.extract (); + row_it.backward (); + swallowed_row = true; + } + else if (test_row->max_y () < y_min) { + //shorter limit + y_bottom = test_row->max_y (); + if (textord_show_expanded_rows && testing_on) + tprintf("Truncating limit to %f due to touching row at %f\n", + y_bottom, test_row->intercept()); + } + else { + y_bottom = y_min; //can't expand it + if (textord_show_expanded_rows && testing_on) + tprintf("Not expanding limit beyond %f due to touching row at %f\n", + y_bottom, test_row->intercept()); + } + } + } + y_min = y_bottom; //expand it + } + if (y_max < y_top) { //expansion allowed + if (textord_show_expanded_rows && testing_on) + tprintf("Expanding top of row at %f from %f to %f\n", + row->intercept(), y_max, y_top); + swallowed_row = true; + while (swallowed_row && !row_it.at_first ()) { + swallowed_row = false; + //get one above + test_row = row_it.data_relative (-1); + if (test_row->min_y () < y_top) { + if (test_row->max_y () < y_top) { + if (textord_show_expanded_rows && testing_on) + tprintf("Eating row above at %f\n", test_row->intercept()); + row_it.backward (); + blob_it.set_to_list (row->blob_list ()); +#ifndef GRAPHICS_DISABLED + if (textord_show_expanded_rows && testing_on) + plot_parallel_row(test_row, + gradient, + block_edge, + ScrollView::WHITE, + rotation); +#endif + blob_it.add_list_after (test_row->blob_list ()); + //swallow complete row + delete row_it.extract (); + row_it.forward (); + swallowed_row = true; + } + else if (test_row->min_y () < y_max) { + //shorter limit + y_top = test_row->min_y (); + if (textord_show_expanded_rows && testing_on) + tprintf("Truncating limit to %f due to touching row at %f\n", + y_top, test_row->intercept()); + } + else { + y_top = y_max; //can't expand it + if (textord_show_expanded_rows && testing_on) + tprintf("Not expanding limit beyond %f due to touching row at %f\n", + y_top, test_row->intercept()); + } + } + } + y_max = y_top; + } + //new limits + row->set_limits (y_min, y_max); + row_it.backward (); + } + while (!row_it.at_last ()); +} + + +/** + * adjust_row_limits + * + * Change the limits of rows to suit the default fractions. + */ +void adjust_row_limits( //tidy limits + TO_BLOCK *block //block to do + ) { + TO_ROW *row; //current row + float size; //size of row + float ymax; //top of row + float ymin; //bottom of row + TO_ROW_IT row_it = block->get_rows (); + + if (textord_show_expanded_rows) + tprintf("Adjusting row limits for block(%d,%d)\n", + block->block->pdblk.bounding_box().left(), + block->block->pdblk.bounding_box().top()); + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { + row = row_it.data (); + size = row->max_y () - row->min_y (); + if (textord_show_expanded_rows) + tprintf("Row at %f has min %f, max %f, size %f\n", + row->intercept(), row->min_y(), row->max_y(), size); + size /= tesseract::CCStruct::kXHeightFraction + + tesseract::CCStruct::kAscenderFraction + + tesseract::CCStruct::kDescenderFraction; + ymax = size * (tesseract::CCStruct::kXHeightFraction + + tesseract::CCStruct::kAscenderFraction); + ymin = -size * tesseract::CCStruct::kDescenderFraction; + row->set_limits (row->intercept () + ymin, row->intercept () + ymax); + row->merged = false; + } +} + + +/** + * @name compute_row_stats + * + * Compute the linespacing and offset. + */ +void compute_row_stats( //find lines + TO_BLOCK* block, //block to do + bool testing_on //correct orientation +) { + int32_t row_index; //of median + TO_ROW *row; //current row + TO_ROW *prev_row; //previous row + float iqr; //inter quartile range + TO_ROW_IT row_it = block->get_rows (); + //number of rows + int16_t rowcount = row_it.length (); + // for choose nth + std::vector<TO_ROW*> rows(rowcount); + rowcount = 0; + prev_row = nullptr; + row_it.move_to_last (); //start at bottom + do { + row = row_it.data (); + if (prev_row != nullptr) { + rows[rowcount++] = prev_row; + prev_row->spacing = row->intercept () - prev_row->intercept (); + if (testing_on) + tprintf ("Row at %g yields spacing of %g\n", + row->intercept (), prev_row->spacing); + } + prev_row = row; + row_it.backward (); + } + while (!row_it.at_last ()); + block->key_row = prev_row; + block->baseline_offset = + fmod (prev_row->parallel_c (), block->line_spacing); + if (testing_on) + tprintf ("Blob based spacing=(%g,%g), offset=%g", + block->line_size, block->line_spacing, block->baseline_offset); + if (rowcount > 0) { + row_index = choose_nth_item(rowcount * 3 / 4, &rows[0], rowcount, + sizeof (TO_ROW *), row_spacing_order); + iqr = rows[row_index]->spacing; + row_index = choose_nth_item(rowcount / 4, &rows[0], rowcount, + sizeof (TO_ROW *), row_spacing_order); + iqr -= rows[row_index]->spacing; + row_index = choose_nth_item(rowcount / 2, &rows[0], rowcount, + sizeof (TO_ROW *), row_spacing_order); + block->key_row = rows[row_index]; + if (testing_on) + tprintf (" row based=%g(%g)", rows[row_index]->spacing, iqr); + if (rowcount > 2 + && iqr < rows[row_index]->spacing * textord_linespace_iqrlimit) { + if (!textord_new_initial_xheight) { + if (rows[row_index]->spacing < block->line_spacing + && rows[row_index]->spacing > block->line_size) + //within range + block->line_size = rows[row_index]->spacing; + //spacing=size + else if (rows[row_index]->spacing > block->line_spacing) + block->line_size = block->line_spacing; + //too big so use max + } + else { + if (rows[row_index]->spacing < block->line_spacing) + block->line_size = rows[row_index]->spacing; + else + block->line_size = block->line_spacing; + //too big so use max + } + if (block->line_size < textord_min_xheight) + block->line_size = (float) textord_min_xheight; + block->line_spacing = rows[row_index]->spacing; + block->max_blob_size = + block->line_spacing * textord_excess_blobsize; + } + block->baseline_offset = fmod (rows[row_index]->intercept (), + block->line_spacing); + } + if (testing_on) + tprintf ("\nEstimate line size=%g, spacing=%g, offset=%g\n", + block->line_size, block->line_spacing, block->baseline_offset); +} + + +/** + * @name compute_block_xheight + * + * Compute the xheight of the individual rows, then correlate them + * and interpret ascenderless lines, correcting xheights. + * + * First we compute our best guess of the x-height of each row independently + * with compute_row_xheight(), which looks for a pair of commonly occurring + * heights that could be x-height and ascender height. This function also + * attempts to find descenders of lowercase letters (i.e. not the small + * descenders that could appear in upper case letters as Q,J). + * + * After this computation each row falls into one of the following categories: + * ROW_ASCENDERS_FOUND: we found xheight and ascender modes, so this must be + * a regular row; we'll use its xheight to compute + * xheight and ascrise estimates for the block + * ROW_DESCENDERS_FOUND: no ascenders, so we do not have a high confidence in + * the xheight of this row (don't use it for estimating + * block xheight), but this row can't contain all caps + * ROW_UNKNOWN: a row with no ascenders/descenders, could be all lowercase + * (or mostly lowercase for fonts with very few ascenders), + * all upper case or small caps + * ROW_INVALID: no meaningful xheight could be found for this row + * + * We then run correct_row_xheight() and use the computed xheight and ascrise + * averages to correct xheight values of the rows in ROW_DESCENDERS_FOUND, + * ROW_UNKNOWN and ROW_INVALID categories. + * + */ +void Textord::compute_block_xheight(TO_BLOCK *block, float gradient) { + TO_ROW *row; // current row + float asc_frac_xheight = CCStruct::kAscenderFraction / + CCStruct::kXHeightFraction; + float desc_frac_xheight = CCStruct::kDescenderFraction / + CCStruct::kXHeightFraction; + int32_t min_height, max_height; // limits on xheight + TO_ROW_IT row_it = block->get_rows(); + if (row_it.empty()) return; // no rows + + // Compute the best guess of xheight of each row individually. + // Use xheight and ascrise values of the rows where ascenders were found. + get_min_max_xheight(block->line_size, &min_height, &max_height); + STATS row_asc_xheights(min_height, max_height + 1); + STATS row_asc_ascrise(static_cast<int>(min_height * asc_frac_xheight), + static_cast<int>(max_height * asc_frac_xheight) + 1); + int min_desc_height = static_cast<int>(min_height * desc_frac_xheight); + int max_desc_height = static_cast<int>(max_height * desc_frac_xheight); + STATS row_asc_descdrop(min_desc_height, max_desc_height + 1); + STATS row_desc_xheights(min_height, max_height + 1); + STATS row_desc_descdrop(min_desc_height, max_desc_height + 1); + STATS row_cap_xheights(min_height, max_height + 1); + STATS row_cap_floating_xheights(min_height, max_height + 1); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + row = row_it.data(); + // Compute the xheight of this row if it has not been computed before. + if (row->xheight <= 0.0) { + compute_row_xheight(row, block->block->classify_rotation(), + gradient, block->line_size); + } + ROW_CATEGORY row_category = get_row_category(row); + if (row_category == ROW_ASCENDERS_FOUND) { + row_asc_xheights.add(static_cast<int32_t>(row->xheight), + row->xheight_evidence); + row_asc_ascrise.add(static_cast<int32_t>(row->ascrise), + row->xheight_evidence); + row_asc_descdrop.add(static_cast<int32_t>(-row->descdrop), + row->xheight_evidence); + } else if (row_category == ROW_DESCENDERS_FOUND) { + row_desc_xheights.add(static_cast<int32_t>(row->xheight), + row->xheight_evidence); + row_desc_descdrop.add(static_cast<int32_t>(-row->descdrop), + row->xheight_evidence); + } else if (row_category == ROW_UNKNOWN) { + fill_heights(row, gradient, min_height, max_height, + &row_cap_xheights, &row_cap_floating_xheights); + } + } + + float xheight = 0.0; + float ascrise = 0.0; + float descdrop = 0.0; + // Compute our best guess of xheight of this block. + if (row_asc_xheights.get_total() > 0) { + // Determine xheight from rows where ascenders were found. + xheight = row_asc_xheights.median(); + ascrise = row_asc_ascrise.median(); + descdrop = -row_asc_descdrop.median(); + } else if (row_desc_xheights.get_total() > 0) { + // Determine xheight from rows where descenders were found. + xheight = row_desc_xheights.median(); + descdrop = -row_desc_descdrop.median(); + } else if (row_cap_xheights.get_total() > 0) { + // All the rows in the block were (a/de)scenderless. + // Try to search for two modes in row_cap_heights that could + // be the xheight and the capheight (e.g. some of the rows + // were lowercase, but did not have enough (a/de)scenders. + // If such two modes can not be found, this block is most + // likely all caps (or all small caps, in which case the code + // still works as intended). + compute_xheight_from_modes(&row_cap_xheights, &row_cap_floating_xheights, + textord_single_height_mode && + block->block->classify_rotation().y() == 0.0, + min_height, max_height, &(xheight), &(ascrise)); + if (ascrise == 0) { // assume only caps in the whole block + xheight = row_cap_xheights.median() * CCStruct::kXHeightCapRatio; + } + } else { // default block sizes + xheight = block->line_size * CCStruct::kXHeightFraction; + } + // Correct xheight, ascrise and descdrop if necessary. + bool corrected_xheight = false; + if (xheight < textord_min_xheight) { + xheight = static_cast<float>(textord_min_xheight); + corrected_xheight = true; + } + if (corrected_xheight || ascrise <= 0.0) { + ascrise = xheight * asc_frac_xheight; + } + if (corrected_xheight || descdrop >= 0.0) { + descdrop = -(xheight * desc_frac_xheight); + } + block->xheight = xheight; + + if (textord_debug_xheights) { + tprintf("Block average xheight=%.4f, ascrise=%.4f, descdrop=%.4f\n", + xheight, ascrise, descdrop); + } + // Correct xheight, ascrise, descdrop of rows based on block averages. + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + correct_row_xheight(row_it.data(), xheight, ascrise, descdrop); + } +} + +/** + * @name compute_row_xheight + * + * Estimate the xheight of this row. + * Compute the ascender rise and descender drop at the same time. + * Set xheigh_evidence to the number of blobs with the chosen xheight + * that appear in this row. + */ +void Textord::compute_row_xheight(TO_ROW *row, // row to do + const FCOORD& rotation, + float gradient, // global skew + int block_line_size) { + // Find blobs representing repeated characters in rows and mark them. + // This information is used for computing row xheight and at a later + // stage when words are formed by make_words. + if (!row->rep_chars_marked()) { + mark_repeated_chars(row); + } + + int min_height, max_height; + get_min_max_xheight(block_line_size, &min_height, &max_height); + STATS heights(min_height, max_height + 1); + STATS floating_heights(min_height, max_height + 1); + fill_heights(row, gradient, min_height, max_height, + &heights, &floating_heights); + row->ascrise = 0.0f; + row->xheight = 0.0f; + row->xheight_evidence = + compute_xheight_from_modes(&heights, &floating_heights, + textord_single_height_mode && + rotation.y() == 0.0, + min_height, max_height, + &(row->xheight), &(row->ascrise)); + row->descdrop = 0.0f; + if (row->xheight > 0.0) { + row->descdrop = static_cast<float>( + compute_row_descdrop(row, gradient, row->xheight_evidence, &heights)); + } +} + +/** + * @name fill_heights + * + * Fill the given heights with heights of the blobs that are legal + * candidates for estimating xheight. + */ +void fill_heights(TO_ROW *row, float gradient, int min_height, + int max_height, STATS *heights, STATS *floating_heights) { + float xcentre; // centre of blob + float top; // top y coord of blob + float height; // height of blob + BLOBNBOX *blob; // current blob + int repeated_set; + BLOBNBOX_IT blob_it = row->blob_list(); + if (blob_it.empty()) return; // no blobs in this row + bool has_rep_chars = + row->rep_chars_marked() && row->num_repeated_sets() > 0; + do { + blob = blob_it.data(); + if (!blob->joined_to_prev()) { + xcentre = (blob->bounding_box().left() + + blob->bounding_box().right()) / 2.0f; + top = blob->bounding_box().top(); + height = blob->bounding_box().height(); + if (textord_fix_xheight_bug) + top -= row->baseline.y(xcentre); + else + top -= gradient * xcentre + row->parallel_c(); + if (top >= min_height && top <= max_height) { + heights->add(static_cast<int32_t>(floor(top + 0.5)), 1); + if (height / top < textord_min_blob_height_fraction) { + floating_heights->add(static_cast<int32_t>(floor(top + 0.5)), 1); + } + } + } + // Skip repeated chars, since they are likely to skew the height stats. + if (has_rep_chars && blob->repeated_set() != 0) { + repeated_set = blob->repeated_set(); + blob_it.forward(); + while (!blob_it.at_first() && + blob_it.data()->repeated_set() == repeated_set) { + blob_it.forward(); + if (textord_debug_xheights) + tprintf("Skipping repeated char when computing xheight\n"); + } + } else { + blob_it.forward(); + } + } while (!blob_it.at_first()); +} + +/** + * @name compute_xheight_from_modes + * + * Given a STATS object heights, looks for two most frequently occurring + * heights that look like xheight and xheight + ascrise. If found, sets + * the values of *xheight and *ascrise accordingly, otherwise sets xheight + * to any most frequently occurring height and sets *ascrise to 0. + * Returns the number of times xheight occurred in heights. + * For each mode that is considered for being an xheight the count of + * floating blobs (stored in floating_heights) is subtracted from the + * total count of the blobs of this height. This is done because blobs + * that sit far above the baseline could represent valid ascenders, but + * it is highly unlikely that such a character's height will be an xheight + * (e.g. -, ', =, ^, `, ", ', etc) + * If cap_only, then force finding of only the top mode. + */ +int compute_xheight_from_modes( + STATS *heights, STATS *floating_heights, bool cap_only, int min_height, + int max_height, float *xheight, float *ascrise) { + int blob_index = heights->mode(); // find mode + int blob_count = heights->pile_count(blob_index); // get count of mode + if (textord_debug_xheights) { + tprintf("min_height=%d, max_height=%d, mode=%d, count=%d, total=%d\n", + min_height, max_height, blob_index, blob_count, + heights->get_total()); + heights->print(); + floating_heights->print(); + } + if (blob_count == 0) return 0; + int modes[MAX_HEIGHT_MODES]; // biggest piles + bool in_best_pile = false; + int prev_size = -INT32_MAX; + int best_count = 0; + int mode_count = compute_height_modes(heights, min_height, max_height, + modes, MAX_HEIGHT_MODES); + if (cap_only && mode_count > 1) + mode_count = 1; + int x; + if (textord_debug_xheights) { + tprintf("found %d modes: ", mode_count); + for (x = 0; x < mode_count; x++) tprintf("%d ", modes[x]); + tprintf("\n"); + } + + for (x = 0; x < mode_count - 1; x++) { + if (modes[x] != prev_size + 1) + in_best_pile = false; // had empty height + int modes_x_count = heights->pile_count(modes[x]) - + floating_heights->pile_count(modes[x]); + if ((modes_x_count >= blob_count * textord_xheight_mode_fraction) && + (in_best_pile || modes_x_count > best_count)) { + for (int asc = x + 1; asc < mode_count; asc++) { + float ratio = + static_cast<float>(modes[asc]) / static_cast<float>(modes[x]); + if (textord_ascx_ratio_min < ratio && + ratio < textord_ascx_ratio_max && + (heights->pile_count(modes[asc]) >= + blob_count * textord_ascheight_mode_fraction)) { + if (modes_x_count > best_count) { + in_best_pile = true; + best_count = modes_x_count; + } + if (textord_debug_xheights) { + tprintf("X=%d, asc=%d, count=%d, ratio=%g\n", + modes[x], modes[asc]-modes[x], modes_x_count, ratio); + } + prev_size = modes[x]; + *xheight = static_cast<float>(modes[x]); + *ascrise = static_cast<float>(modes[asc] - modes[x]); + } + } + } + } + if (*xheight == 0) { // single mode + // Remove counts of the "floating" blobs (the one whose height is too + // small in relation to it's top end of the bounding box) from heights + // before computing the single-mode xheight. + // Restore the counts in heights after the mode is found, since + // floating blobs might be useful for determining potential ascenders + // in compute_row_descdrop(). + if (floating_heights->get_total() > 0) { + for (x = min_height; x < max_height; ++x) { + heights->add(x, -(floating_heights->pile_count(x))); + } + blob_index = heights->mode(); // find the modified mode + for (x = min_height; x < max_height; ++x) { + heights->add(x, floating_heights->pile_count(x)); + } + } + *xheight = static_cast<float>(blob_index); + *ascrise = 0.0f; + best_count = heights->pile_count(blob_index); + if (textord_debug_xheights) + tprintf("Single mode xheight set to %g\n", *xheight); + } else if (textord_debug_xheights) { + tprintf("Multi-mode xheight set to %g, asc=%g\n", *xheight, *ascrise); + } + return best_count; +} + +/** + * @name compute_row_descdrop + * + * Estimates the descdrop of this row. This function looks for + * "significant" descenders of lowercase letters (those that could + * not just be the small descenders of upper case letters like Q,J). + * The function also takes into account how many potential ascenders + * this row might contain. If the number of potential ascenders along + * with descenders is close to the expected fraction of the total + * number of blobs in the row, the function returns the descender + * height, returns 0 otherwise. + */ +int32_t compute_row_descdrop(TO_ROW *row, float gradient, + int xheight_blob_count, STATS *asc_heights) { + // Count how many potential ascenders are in this row. + int i_min = asc_heights->min_bucket(); + if ((i_min / row->xheight) < textord_ascx_ratio_min) { + i_min = static_cast<int>( + floor(row->xheight * textord_ascx_ratio_min + 0.5)); + } + int i_max = asc_heights->max_bucket(); + if ((i_max / row->xheight) > textord_ascx_ratio_max) { + i_max = static_cast<int>(floor(row->xheight * textord_ascx_ratio_max)); + } + int num_potential_asc = 0; + for (int i = i_min; i <= i_max; ++i) { + num_potential_asc += asc_heights->pile_count(i); + } + auto min_height = + static_cast<int32_t>(floor(row->xheight * textord_descx_ratio_min + 0.5)); + auto max_height = + static_cast<int32_t>(floor(row->xheight * textord_descx_ratio_max)); + float xcentre; // centre of blob + float height; // height of blob + BLOBNBOX_IT blob_it = row->blob_list(); + BLOBNBOX *blob; // current blob + STATS heights (min_height, max_height + 1); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + blob = blob_it.data(); + if (!blob->joined_to_prev()) { + xcentre = (blob->bounding_box().left() + + blob->bounding_box().right()) / 2.0f; + height = (gradient * xcentre + row->parallel_c() - + blob->bounding_box().bottom()); + if (height >= min_height && height <= max_height) + heights.add(static_cast<int>(floor(height + 0.5)), 1); + } + } + int blob_index = heights.mode(); // find mode + int blob_count = heights.pile_count(blob_index); // get count of mode + float total_fraction = + (textord_descheight_mode_fraction + textord_ascheight_mode_fraction); + if (static_cast<float>(blob_count + num_potential_asc) < + xheight_blob_count * total_fraction) { + blob_count = 0; + } + int descdrop = blob_count > 0 ? -blob_index : 0; + if (textord_debug_xheights) { + tprintf("Descdrop: %d (potential ascenders %d, descenders %d)\n", + descdrop, num_potential_asc, blob_count); + heights.print(); + } + return descdrop; +} + + +/** + * @name compute_height_modes + * + * Find the top maxmodes values in the input array and put their + * indices in the output in the order in which they occurred. + */ +int32_t compute_height_modes(STATS *heights, // stats to search + int32_t min_height, // bottom of range + int32_t max_height, // top of range + int32_t *modes, // output array + int32_t maxmodes) { // size of modes + int32_t pile_count; // no in source pile + int32_t src_count; // no of source entries + int32_t src_index; // current entry + int32_t least_count; // height of smalllest + int32_t least_index; // index of least + int32_t dest_count; // index in modes + + src_count = max_height + 1 - min_height; + dest_count = 0; + least_count = INT32_MAX; + least_index = -1; + for (src_index = 0; src_index < src_count; src_index++) { + pile_count = heights->pile_count(min_height + src_index); + if (pile_count > 0) { + if (dest_count < maxmodes) { + if (pile_count < least_count) { + // find smallest in array + least_count = pile_count; + least_index = dest_count; + } + modes[dest_count++] = min_height + src_index; + } else if (pile_count >= least_count) { + while (least_index < maxmodes - 1) { + modes[least_index] = modes[least_index + 1]; + // shuffle up + least_index++; + } + // new one on end + modes[maxmodes - 1] = min_height + src_index; + if (pile_count == least_count) { + // new smallest + least_index = maxmodes - 1; + } else { + least_count = heights->pile_count(modes[0]); + least_index = 0; + for (dest_count = 1; dest_count < maxmodes; dest_count++) { + pile_count = heights->pile_count(modes[dest_count]); + if (pile_count < least_count) { + // find smallest + least_count = pile_count; + least_index = dest_count; + } + } + } + } + } + } + return dest_count; +} + + +/** + * @name correct_row_xheight + * + * Adjust the xheight etc of this row if not within reasonable limits + * of the average for the block. + */ +void correct_row_xheight(TO_ROW *row, float xheight, + float ascrise, float descdrop) { + ROW_CATEGORY row_category = get_row_category(row); + if (textord_debug_xheights) { + tprintf("correcting row xheight: row->xheight %.4f" + ", row->acrise %.4f row->descdrop %.4f\n", + row->xheight, row->ascrise, row->descdrop); + } + bool normal_xheight = + within_error_margin(row->xheight, xheight, textord_xheight_error_margin); + bool cap_xheight = + within_error_margin(row->xheight, xheight + ascrise, + textord_xheight_error_margin); + // Use the average xheight/ascrise for the following cases: + // -- the xheight of the row could not be determined at all + // -- the row has descenders (e.g. "many groups", "ISBN 12345 p.3") + // and its xheight is close to either cap height or average xheight + // -- the row does not have ascenders or descenders, but its xheight + // is close to the average block xheight (e.g. row with "www.mmm.com") + if (row_category == ROW_ASCENDERS_FOUND) { + if (row->descdrop >= 0.0) { + row->descdrop = row->xheight * (descdrop / xheight); + } + } else if (row_category == ROW_INVALID || + (row_category == ROW_DESCENDERS_FOUND && + (normal_xheight || cap_xheight)) || + (row_category == ROW_UNKNOWN && normal_xheight)) { + if (textord_debug_xheights) tprintf("using average xheight\n"); + row->xheight = xheight; + row->ascrise = ascrise; + row->descdrop = descdrop; + } else if (row_category == ROW_DESCENDERS_FOUND) { + // Assume this is a row with mostly lowercase letters and it's xheight + // is computed correctly (unfortunately there is no way to distinguish + // this from the case when descenders are found, but the most common + // height is capheight). + if (textord_debug_xheights) tprintf("lowercase, corrected ascrise\n"); + row->ascrise = row->xheight * (ascrise / xheight); + } else if (row_category == ROW_UNKNOWN) { + // Otherwise assume this row is an all-caps or small-caps row + // and adjust xheight and ascrise of the row. + + row->all_caps = true; + if (cap_xheight) { // regular all caps + if (textord_debug_xheights) tprintf("all caps\n"); + row->xheight = xheight; + row->ascrise = ascrise; + row->descdrop = descdrop; + } else { // small caps or caps with an odd xheight + if (textord_debug_xheights) { + if (row->xheight < xheight + ascrise && row->xheight > xheight) { + tprintf("small caps\n"); + } else { + tprintf("all caps with irregular xheight\n"); + } + } + row->ascrise = row->xheight * (ascrise / (xheight + ascrise)); + row->xheight -= row->ascrise; + row->descdrop = row->xheight * (descdrop / xheight); + } + } + if (textord_debug_xheights) { + tprintf("corrected row->xheight = %.4f, row->acrise = %.4f, row->descdrop" + " = %.4f\n", row->xheight, row->ascrise, row->descdrop); + } +} + +static int CountOverlaps(const TBOX& box, int min_height, + BLOBNBOX_LIST* blobs) { + int overlaps = 0; + BLOBNBOX_IT blob_it(blobs); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + BLOBNBOX* blob = blob_it.data(); + const TBOX &blob_box = blob->bounding_box(); + if (blob_box.height() >= min_height && box.major_overlap(blob_box)) { + ++overlaps; + } + } + return overlaps; +} + +/** + * @name separate_underlines + * + * Test wide objects for being potential underlines. If they are then + * put them in a separate list in the block. + */ +void separate_underlines(TO_BLOCK* block, // block to do + float gradient, // skew angle + FCOORD rotation, // inverse landscape + bool testing_on) { // correct orientation + BLOBNBOX *blob; // current blob + C_BLOB *rotated_blob; // rotated blob + TO_ROW *row; // current row + float length; // of g_vec + TBOX blob_box; + FCOORD blob_rotation; // inverse of rotation + FCOORD g_vec; // skew rotation + BLOBNBOX_IT blob_it; // iterator + // iterator + BLOBNBOX_IT under_it = &block->underlines; + BLOBNBOX_IT large_it = &block->large_blobs; + TO_ROW_IT row_it = block->get_rows(); + int min_blob_height = static_cast<int>(textord_min_blob_height_fraction * + block->line_size + 0.5); + + // length of vector + length = sqrt(1 + gradient * gradient); + g_vec = FCOORD(1 / length, -gradient / length); + blob_rotation = FCOORD(rotation.x(), -rotation.y()); + blob_rotation.rotate(g_vec); // undoing everything + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + row = row_it.data(); + // get blobs + blob_it.set_to_list(row->blob_list()); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); + blob_it.forward()) { + blob = blob_it.data(); + blob_box = blob->bounding_box(); + if (blob_box.width() > block->line_size * textord_underline_width) { + ASSERT_HOST(blob->cblob() != nullptr); + rotated_blob = crotate_cblob (blob->cblob(), + blob_rotation); + if (test_underline( + testing_on && textord_show_final_rows, + rotated_blob, static_cast<int16_t>(row->intercept()), + static_cast<int16_t>( + block->line_size * + (tesseract::CCStruct::kXHeightFraction + + tesseract::CCStruct::kAscenderFraction / 2.0f)))) { + under_it.add_after_then_move(blob_it.extract()); + if (testing_on && textord_show_final_rows) { + tprintf("Underlined blob at:"); + rotated_blob->bounding_box().print(); + tprintf("Was:"); + blob_box.print(); + } + } else if (CountOverlaps(blob->bounding_box(), min_blob_height, + row->blob_list()) > + textord_max_blob_overlaps) { + large_it.add_after_then_move(blob_it.extract()); + if (testing_on && textord_show_final_rows) { + tprintf("Large blob overlaps %d blobs at:", + CountOverlaps(blob_box, min_blob_height, + row->blob_list())); + blob_box.print(); + } + } + delete rotated_blob; + } + } + } +} + + +/** + * @name pre_associate_blobs + * + * Associate overlapping blobs and fake chop wide blobs. + */ +void pre_associate_blobs( //make rough chars + ICOORD page_tr, //top right + TO_BLOCK* block, //block to do + FCOORD rotation, //inverse landscape + bool testing_on //correct orientation +) { +#ifndef GRAPHICS_DISABLED + ScrollView::Color colour; //of boxes +#endif + BLOBNBOX *blob; //current blob + BLOBNBOX *nextblob; //next in list + TBOX blob_box; + FCOORD blob_rotation; //inverse of rotation + BLOBNBOX_IT blob_it; //iterator + BLOBNBOX_IT start_it; //iterator + TO_ROW_IT row_it = block->get_rows (); + +#ifndef GRAPHICS_DISABLED + colour = ScrollView::RED; +#endif + + blob_rotation = FCOORD (rotation.x (), -rotation.y ()); + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { + //get blobs + blob_it.set_to_list (row_it.data ()->blob_list ()); + for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); + blob_it.forward ()) { + blob = blob_it.data (); + blob_box = blob->bounding_box (); + start_it = blob_it; //save start point + // if (testing_on && textord_show_final_blobs) + // { + // tprintf("Blob at (%d,%d)->(%d,%d), addr=%x, count=%d\n", + // blob_box.left(),blob_box.bottom(), + // blob_box.right(),blob_box.top(), + // (void*)blob,blob_it.length()); + // } + bool overlap; + do { + overlap = false; + if (!blob_it.at_last ()) { + nextblob = blob_it.data_relative(1); + overlap = blob_box.major_x_overlap(nextblob->bounding_box()); + if (overlap) { + blob->merge(nextblob); // merge new blob + blob_box = blob->bounding_box(); // get bigger box + blob_it.forward(); + } + } + } + while (overlap); + blob->chop (&start_it, &blob_it, + blob_rotation, + block->line_size * tesseract::CCStruct::kXHeightFraction * + textord_chop_width); + //attempt chop + } +#ifndef GRAPHICS_DISABLED + if (testing_on && textord_show_final_blobs) { + if (to_win == nullptr) + create_to_win(page_tr); + to_win->Pen(colour); + for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); + blob_it.forward ()) { + blob = blob_it.data (); + blob_box = blob->bounding_box (); + blob_box.rotate (rotation); + if (!blob->joined_to_prev ()) { + to_win->Rectangle (blob_box.left (), blob_box.bottom (), + blob_box.right (), blob_box.top ()); + } + } + colour = static_cast<ScrollView::Color>(colour + 1); + if (colour > ScrollView::MAGENTA) + colour = ScrollView::RED; + } +#endif + } +} + + +/** + * @name fit_parallel_rows + * + * Re-fit the rows in the block to the given gradient. + */ +void fit_parallel_rows( //find lines + TO_BLOCK* block, //block to do + float gradient, //gradient to fit + FCOORD rotation, //for drawing + int32_t block_edge, //edge of block + bool testing_on //correct orientation +) { +#ifndef GRAPHICS_DISABLED + ScrollView::Color colour; //of row +#endif + TO_ROW_IT row_it = block->get_rows (); + + row_it.move_to_first (); + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { + if (row_it.data ()->blob_list ()->empty ()) + delete row_it.extract (); //nothing in it + else + fit_parallel_lms (gradient, row_it.data ()); + } +#ifndef GRAPHICS_DISABLED + if (testing_on) { + colour = ScrollView::RED; + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { + plot_parallel_row (row_it.data (), gradient, + block_edge, colour, rotation); + colour = static_cast<ScrollView::Color>(colour + 1); + if (colour > ScrollView::MAGENTA) + colour = ScrollView::RED; + } + } +#endif + row_it.sort (row_y_order); //may have gone out of order +} + + +/** + * @name fit_parallel_lms + * + * Fit an LMS line to a row. + * Make the fit parallel to the given gradient and set the + * row accordingly. + */ +void fit_parallel_lms(float gradient, TO_ROW *row) { + float c; // fitted line + int blobcount; // no of blobs + tesseract::DetLineFit lms; + BLOBNBOX_IT blob_it = row->blob_list(); + + blobcount = 0; + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + if (!blob_it.data()->joined_to_prev()) { + const TBOX& box = blob_it.data()->bounding_box(); + lms.Add(ICOORD((box.left() + box.right()) / 2, box.bottom())); + blobcount++; + } + } + double error = lms.ConstrainedFit(gradient, &c); + row->set_parallel_line(gradient, c, error); + if (textord_straight_baselines && blobcount > textord_lms_line_trials) { + error = lms.Fit(&gradient, &c); + } + //set the other too + row->set_line(gradient, c, error); +} + + +/** + * @name make_spline_rows + * + * Re-fit the rows in the block to the given gradient. + */ +void Textord::make_spline_rows(TO_BLOCK* block, // block to do + float gradient, // gradient to fit + bool testing_on) { +#ifndef GRAPHICS_DISABLED + ScrollView::Color colour; //of row +#endif + TO_ROW_IT row_it = block->get_rows (); + + row_it.move_to_first (); + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { + if (row_it.data ()->blob_list ()->empty ()) + delete row_it.extract (); //nothing in it + else + make_baseline_spline (row_it.data (), block); + } + if (textord_old_baselines) { +#ifndef GRAPHICS_DISABLED + if (testing_on) { + colour = ScrollView::RED; + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); + row_it.forward ()) { + row_it.data ()->baseline.plot (to_win, colour); + colour = static_cast<ScrollView::Color>(colour + 1); + if (colour > ScrollView::MAGENTA) + colour = ScrollView::RED; + } + } +#endif + make_old_baselines(block, testing_on, gradient); + } +#ifndef GRAPHICS_DISABLED + if (testing_on) { + colour = ScrollView::RED; + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { + row_it.data ()->baseline.plot (to_win, colour); + colour = static_cast<ScrollView::Color>(colour + 1); + if (colour > ScrollView::MAGENTA) + colour = ScrollView::RED; + } + } +#endif +} + +/** + * @name make_baseline_spline + * + * Fit an LMS line to a row. + * Make the fit parallel to the given gradient and set the + * row accordingly. + */ +void make_baseline_spline(TO_ROW *row, //row to fit + TO_BLOCK *block) { + double *coeffs; // quadratic coeffs + int32_t segments; // no of segments + + // spline boundaries + auto *xstarts = new int32_t[row->blob_list()->length() + 1]; + if (segment_baseline(row, block, segments, xstarts) + && !textord_straight_baselines && !textord_parallel_baselines) { + coeffs = linear_spline_baseline(row, block, segments, xstarts); + } else { + xstarts[1] = xstarts[segments]; + segments = 1; + coeffs = new double[3]; + coeffs[0] = 0; + coeffs[1] = row->line_m (); + coeffs[2] = row->line_c (); + } + row->baseline = QSPLINE (segments, xstarts, coeffs); + delete[] coeffs; + delete[] xstarts; +} + + +/** + * @name segment_baseline + * + * Divide the baseline up into segments which require a different + * quadratic fitted to them. + * Return true if enough blobs were far enough away to need a quadratic. + */ +bool +segment_baseline( //split baseline + TO_ROW* row, //row to fit + TO_BLOCK* block, //block it came from + int32_t& segments, //no fo segments + int32_t* xstarts //coords of segments +) { + bool needs_curve; //needs curved line + int blobcount; //no of blobs + int blobindex; //current blob + int last_state; //above, on , below + int state; //of current blob + float yshift; //from baseline + TBOX box; //blob box + TBOX new_box; //new_it box + float middle; //xcentre of blob + //blobs + BLOBNBOX_IT blob_it = row->blob_list (); + BLOBNBOX_IT new_it = blob_it; //front end + SORTED_FLOATS yshifts; //shifts from baseline + + needs_curve = false; + box = box_next_pre_chopped (&blob_it); + xstarts[0] = box.left (); + segments = 1; + blobcount = row->blob_list ()->length (); + if (textord_oldbl_debug) + tprintf ("Segmenting baseline of %d blobs at (%d,%d)\n", + blobcount, box.left (), box.bottom ()); + if (blobcount <= textord_spline_medianwin + || blobcount < textord_spline_minblobs) { + blob_it.move_to_last (); + box = blob_it.data ()->bounding_box (); + xstarts[1] = box.right (); + return false; + } + last_state = 0; + new_it.mark_cycle_pt (); + for (blobindex = 0; blobindex < textord_spline_medianwin; blobindex++) { + new_box = box_next_pre_chopped (&new_it); + middle = (new_box.left () + new_box.right ()) / 2.0; + yshift = new_box.bottom () - row->line_m () * middle - row->line_c (); + //record shift + yshifts.add (yshift, blobindex); + if (new_it.cycled_list ()) { + xstarts[1] = new_box.right (); + return false; + } + } + for (blobcount = 0; blobcount < textord_spline_medianwin / 2; blobcount++) + box = box_next_pre_chopped (&blob_it); + do { + new_box = box_next_pre_chopped (&new_it); + //get middle one + yshift = yshifts[textord_spline_medianwin / 2]; + if (yshift > textord_spline_shift_fraction * block->line_size) + state = 1; + else if (-yshift > textord_spline_shift_fraction * block->line_size) + state = -1; + else + state = 0; + if (state != 0) + needs_curve = true; + // tprintf("State=%d, prev=%d, shift=%g\n", + // state,last_state,yshift); + if (state != last_state && blobcount > textord_spline_minblobs) { + xstarts[segments++] = box.left (); + blobcount = 0; + } + last_state = state; + yshifts.remove (blobindex - textord_spline_medianwin); + box = box_next_pre_chopped (&blob_it); + middle = (new_box.left () + new_box.right ()) / 2.0; + yshift = new_box.bottom () - row->line_m () * middle - row->line_c (); + yshifts.add (yshift, blobindex); + blobindex++; + blobcount++; + } + while (!new_it.cycled_list ()); + if (blobcount > textord_spline_minblobs || segments == 1) { + xstarts[segments] = new_box.right (); + } + else { + xstarts[--segments] = new_box.right (); + } + if (textord_oldbl_debug) + tprintf ("Made %d segments on row at (%d,%d)\n", + segments, box.right (), box.bottom ()); + return needs_curve; +} + + +/** + * @name linear_spline_baseline + * + * Divide the baseline up into segments which require a different + * quadratic fitted to them. + * @return true if enough blobs were far enough away to need a quadratic. + */ +double * +linear_spline_baseline ( //split baseline +TO_ROW * row, //row to fit +TO_BLOCK * block, //block it came from +int32_t & segments, //no fo segments +int32_t xstarts[] //coords of segments +) { + int blobcount; //no of blobs + int blobindex; //current blob + int index1, index2; //blob numbers + int blobs_per_segment; //blobs in each + TBOX box; //blob box + TBOX new_box; //new_it box + //blobs + BLOBNBOX_IT blob_it = row->blob_list (); + BLOBNBOX_IT new_it = blob_it; //front end + float b, c; //fitted curve + tesseract::DetLineFit lms; + int32_t segment; //current segment + + box = box_next_pre_chopped (&blob_it); + xstarts[0] = box.left (); + blobcount = 1; + while (!blob_it.at_first ()) { + blobcount++; + box = box_next_pre_chopped (&blob_it); + } + segments = blobcount / textord_spline_medianwin; + if (segments < 1) + segments = 1; + blobs_per_segment = blobcount / segments; + // quadratic coeffs + auto *coeffs = new double[segments * 3]; + if (textord_oldbl_debug) + tprintf + ("Linear splining baseline of %d blobs at (%d,%d), into %d segments of %d blobs\n", + blobcount, box.left (), box.bottom (), segments, blobs_per_segment); + segment = 1; + for (index2 = 0; index2 < blobs_per_segment / 2; index2++) + box_next_pre_chopped(&new_it); + index1 = 0; + blobindex = index2; + do { + blobindex += blobs_per_segment; + lms.Clear(); + while (index1 < blobindex || (segment == segments && index1 < blobcount)) { + box = box_next_pre_chopped (&blob_it); + int middle = (box.left() + box.right()) / 2; + lms.Add(ICOORD(middle, box.bottom())); + index1++; + if (index1 == blobindex - blobs_per_segment / 2 + || index1 == blobcount - 1) { + xstarts[segment] = box.left (); + } + } + lms.Fit(&b, &c); + coeffs[segment * 3 - 3] = 0; + coeffs[segment * 3 - 2] = b; + coeffs[segment * 3 - 1] = c; + segment++; + if (segment > segments) + break; + + blobindex += blobs_per_segment; + lms.Clear(); + while (index2 < blobindex || (segment == segments && index2 < blobcount)) { + new_box = box_next_pre_chopped (&new_it); + int middle = (new_box.left() + new_box.right()) / 2; + lms.Add(ICOORD (middle, new_box.bottom())); + index2++; + if (index2 == blobindex - blobs_per_segment / 2 + || index2 == blobcount - 1) { + xstarts[segment] = new_box.left (); + } + } + lms.Fit(&b, &c); + coeffs[segment * 3 - 3] = 0; + coeffs[segment * 3 - 2] = b; + coeffs[segment * 3 - 1] = c; + segment++; + } + while (segment <= segments); + return coeffs; +} + + +/** + * @name assign_blobs_to_rows + * + * Make enough rows to allocate all the given blobs to one. + * If a block skew is given, use that, else attempt to track it. + */ +void assign_blobs_to_rows( //find lines + TO_BLOCK* block, //block to do + float* gradient, //block skew + int pass, //identification + bool reject_misses, //chuck big ones out + bool make_new_rows, //add rows for unmatched + bool drawing_skew //draw smoothed skew +) { + OVERLAP_STATE overlap_result; //what to do with it + float ycoord; //current y + float top, bottom; //of blob + float g_length = 1.0f; //from gradient + int16_t row_count; //no of rows + int16_t left_x; //left edge + int16_t last_x; //previous edge + float block_skew; //y delta + float smooth_factor; //for new coords + float near_dist; //dist to nearest row + ICOORD testpt; //testing only + BLOBNBOX *blob; //current blob + TO_ROW *row; //current row + TO_ROW *dest_row = nullptr; //row to put blob in + //iterators + BLOBNBOX_IT blob_it = &block->blobs; + TO_ROW_IT row_it = block->get_rows (); + + ycoord = + (block->block->pdblk.bounding_box ().bottom () + + block->block->pdblk.bounding_box ().top ()) / 2.0f; + if (gradient != nullptr) + g_length = sqrt (1 + *gradient * *gradient); +#ifndef GRAPHICS_DISABLED + if (drawing_skew) + to_win->SetCursor(block->block->pdblk.bounding_box ().left (), ycoord); +#endif + testpt = ICOORD (textord_test_x, textord_test_y); + blob_it.sort (blob_x_order); + smooth_factor = 1.0; + block_skew = 0.0f; + row_count = row_it.length (); //might have rows + if (!blob_it.empty ()) { + left_x = blob_it.data ()->bounding_box ().left (); + } + else { + left_x = block->block->pdblk.bounding_box ().left (); + } + last_x = left_x; + for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { + blob = blob_it.data (); + if (gradient != nullptr) { + block_skew = (1 - 1 / g_length) * blob->bounding_box ().bottom () + + *gradient / g_length * blob->bounding_box ().left (); + } + else if (blob->bounding_box ().left () - last_x > block->line_size / 2 + && last_x - left_x > block->line_size * 2 + && textord_interpolating_skew) { + // tprintf("Interpolating skew from %g",block_skew); + block_skew *= static_cast<float>(blob->bounding_box ().left () - left_x) + / (last_x - left_x); + // tprintf("to %g\n",block_skew); + } + last_x = blob->bounding_box ().left (); + top = blob->bounding_box ().top () - block_skew; + bottom = blob->bounding_box ().bottom () - block_skew; +#ifndef GRAPHICS_DISABLED + if (drawing_skew) + to_win->DrawTo(blob->bounding_box ().left (), ycoord + block_skew); +#endif + if (!row_it.empty ()) { + for (row_it.move_to_first (); + !row_it.at_last () && row_it.data ()->min_y () > top; + row_it.forward ()); + row = row_it.data (); + if (row->min_y () <= top && row->max_y () >= bottom) { + //any overlap + dest_row = row; + overlap_result = most_overlapping_row (&row_it, dest_row, + top, bottom, + block->line_size, + blob->bounding_box (). + contains (testpt)); + if (overlap_result == NEW_ROW && !reject_misses) + overlap_result = ASSIGN; + } + else { + overlap_result = NEW_ROW; + if (!make_new_rows) { + near_dist = row_it.data_relative (-1)->min_y () - top; + //below bottom + if (bottom < row->min_y ()) { + if (row->min_y () - bottom <= + (block->line_spacing - + block->line_size) * tesseract::CCStruct::kDescenderFraction) { + //done it + overlap_result = ASSIGN; + dest_row = row; + } + } + else if (near_dist > 0 + && near_dist < bottom - row->max_y ()) { + row_it.backward (); + dest_row = row_it.data (); + if (dest_row->min_y () - bottom <= + (block->line_spacing - + block->line_size) * tesseract::CCStruct::kDescenderFraction) { + //done it + overlap_result = ASSIGN; + } + } + else { + if (top - row->max_y () <= + (block->line_spacing - + block->line_size) * (textord_overlap_x + + tesseract::CCStruct::kAscenderFraction)) { + //done it + overlap_result = ASSIGN; + dest_row = row; + } + } + } + } + if (overlap_result == ASSIGN) + dest_row->add_blob (blob_it.extract (), top, bottom, + block->line_size); + if (overlap_result == NEW_ROW) { + if (make_new_rows && top - bottom < block->max_blob_size) { + dest_row = + new TO_ROW (blob_it.extract (), top, bottom, + block->line_size); + row_count++; + if (bottom > row_it.data ()->min_y ()) + row_it.add_before_then_move (dest_row); + //insert in right place + else + row_it.add_after_then_move (dest_row); + smooth_factor = + 1.0 / (row_count * textord_skew_lag + + textord_skewsmooth_offset); + } + else + overlap_result = REJECT; + } + } + else if (make_new_rows && top - bottom < block->max_blob_size) { + overlap_result = NEW_ROW; + dest_row = + new TO_ROW(blob_it.extract(), top, bottom, block->line_size); + row_count++; + row_it.add_after_then_move(dest_row); + smooth_factor = 1.0 / (row_count * textord_skew_lag + + textord_skewsmooth_offset2); + } + else + overlap_result = REJECT; + if (blob->bounding_box ().contains(testpt) && textord_debug_blob) { + if (overlap_result != REJECT) { + tprintf("Test blob assigned to row at (%g,%g) on pass %d\n", + dest_row->min_y(), dest_row->max_y(), pass); + } + else { + tprintf("Test blob assigned to no row on pass %d\n", pass); + } + } + if (overlap_result != REJECT) { + while (!row_it.at_first() && + row_it.data()->min_y() > row_it.data_relative(-1)->min_y()) { + row = row_it.extract(); + row_it.backward(); + row_it.add_before_then_move(row); + } + while (!row_it.at_last() && + row_it.data ()->min_y() < row_it.data_relative (1)->min_y()) { + row = row_it.extract(); + row_it.forward(); + // Keep rows in order. + row_it.add_after_then_move(row); + } + BLOBNBOX_IT added_blob_it(dest_row->blob_list()); + added_blob_it.move_to_last(); + TBOX prev_box = added_blob_it.data_relative(-1)->bounding_box(); + if (dest_row->blob_list()->singleton() || + !prev_box.major_x_overlap(blob->bounding_box())) { + block_skew = (1 - smooth_factor) * block_skew + + smooth_factor * (blob->bounding_box().bottom() - + dest_row->initial_min_y()); + } + } + } + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + if (row_it.data()->blob_list()->empty()) + delete row_it.extract(); // Discard empty rows. + } +} + + +/** + * @name most_overlapping_row + * + * Return the row which most overlaps the blob. + */ +OVERLAP_STATE most_overlapping_row( //find best row + TO_ROW_IT* row_it, //iterator + TO_ROW*& best_row, //output row + float top, //top of blob + float bottom, //bottom of blob + float rowsize, //max row size + bool testing_blob //test stuff +) { + OVERLAP_STATE result; //result of tests + float overlap; //of blob & row + float bestover; //nearest row + float merge_top, merge_bottom; //size of merged row + ICOORD testpt; //testing only + TO_ROW *row; //current row + TO_ROW *test_row; //for multiple overlaps + BLOBNBOX_IT blob_it; //for merging rows + + result = ASSIGN; + row = row_it->data (); + bestover = top - bottom; + if (top > row->max_y ()) + bestover -= top - row->max_y (); + if (bottom < row->min_y ()) + //compute overlap + bestover -= row->min_y () - bottom; + if (testing_blob && textord_debug_blob) { + tprintf("Test blob y=(%g,%g), row=(%f,%f), size=%g, overlap=%f\n", + bottom, top, row->min_y(), row->max_y(), rowsize, bestover); + } + test_row = row; + do { + if (!row_it->at_last ()) { + row_it->forward (); + test_row = row_it->data (); + if (test_row->min_y () <= top && test_row->max_y () >= bottom) { + merge_top = + test_row->max_y () > + row->max_y ()? test_row->max_y () : row->max_y (); + merge_bottom = + test_row->min_y () < + row->min_y ()? test_row->min_y () : row->min_y (); + if (merge_top - merge_bottom <= rowsize) { + if (testing_blob && textord_debug_blob) { + tprintf ("Merging rows at (%g,%g), (%g,%g)\n", + row->min_y (), row->max_y (), + test_row->min_y (), test_row->max_y ()); + } + test_row->set_limits (merge_bottom, merge_top); + blob_it.set_to_list (test_row->blob_list ()); + blob_it.add_list_after (row->blob_list ()); + blob_it.sort (blob_x_order); + row_it->backward (); + delete row_it->extract (); + row_it->forward (); + bestover = -1.0f; //force replacement + } + overlap = top - bottom; + if (top > test_row->max_y ()) + overlap -= top - test_row->max_y (); + if (bottom < test_row->min_y ()) + overlap -= test_row->min_y () - bottom; + if (bestover >= rowsize - 1 && overlap >= rowsize - 1) { + result = REJECT; + } + if (overlap > bestover) { + bestover = overlap; //find biggest overlap + row = test_row; + } + if (testing_blob && textord_debug_blob) { + tprintf("Test blob y=(%g,%g), row=(%f,%f), size=%g, overlap=%f->%f\n", + bottom, top, test_row->min_y(), test_row->max_y(), + rowsize, overlap, bestover); + } + } + } + } + while (!row_it->at_last () + && test_row->min_y () <= top && test_row->max_y () >= bottom); + while (row_it->data () != row) + row_it->backward (); //make it point to row + //doesn't overlap much + if (top - bottom - bestover > rowsize * textord_overlap_x && + (!textord_fix_makerow_bug || bestover < rowsize * textord_overlap_x) + && result == ASSIGN) + result = NEW_ROW; //doesn't overlap enough + best_row = row; + return result; +} + + +/** + * @name blob_x_order + * + * Sort function to sort blobs in x from page left. + */ +int blob_x_order( //sort function + const void *item1, //items to compare + const void *item2) { + //converted ptr + const BLOBNBOX *blob1 = *reinterpret_cast<const BLOBNBOX* const*>(item1); + //converted ptr + const BLOBNBOX *blob2 = *reinterpret_cast<const BLOBNBOX* const*>(item2); + + if (blob1->bounding_box ().left () < blob2->bounding_box ().left ()) + return -1; + else if (blob1->bounding_box ().left () > blob2->bounding_box ().left ()) + return 1; + else + return 0; +} + + +/** + * @name row_y_order + * + * Sort function to sort rows in y from page top. + */ +int row_y_order( //sort function + const void *item1, //items to compare + const void *item2) { + //converted ptr + const TO_ROW *row1 = *reinterpret_cast<const TO_ROW* const*>(item1); + //converted ptr + const TO_ROW *row2 = *reinterpret_cast<const TO_ROW* const*>(item2); + + if (row1->parallel_c () > row2->parallel_c ()) + return -1; + else if (row1->parallel_c () < row2->parallel_c ()) + return 1; + else + return 0; +} + + +/** + * @name row_spacing_order + * + * Qsort style function to compare 2 TO_ROWS based on their spacing value. + */ +int row_spacing_order( //sort function + const void *item1, //items to compare + const void *item2) { + //converted ptr + const TO_ROW *row1 = *reinterpret_cast<const TO_ROW* const*>(item1); + //converted ptr + const TO_ROW *row2 = *reinterpret_cast<const TO_ROW* const*>(item2); + + if (row1->spacing < row2->spacing) + return -1; + else if (row1->spacing > row2->spacing) + return 1; + else + return 0; +} + +/** + * @name mark_repeated_chars + * + * Mark blobs marked with BTFT_LEADER in repeated sets using the + * repeated_set member of BLOBNBOX. + */ +void mark_repeated_chars(TO_ROW *row) { + BLOBNBOX_IT box_it(row->blob_list()); // Iterator. + int num_repeated_sets = 0; + if (!box_it.empty()) { + do { + BLOBNBOX* bblob = box_it.data(); + int repeat_length = 1; + if (bblob->flow() == BTFT_LEADER && + !bblob->joined_to_prev() && bblob->cblob() != nullptr) { + BLOBNBOX_IT test_it(box_it); + for (test_it.forward(); !test_it.at_first();) { + bblob = test_it.data(); + if (bblob->flow() != BTFT_LEADER) + break; + test_it.forward(); + bblob = test_it.data(); + if (bblob->joined_to_prev() || bblob->cblob() == nullptr) { + repeat_length = 0; + break; + } + ++repeat_length; + } + } + if (repeat_length >= kMinLeaderCount) { + num_repeated_sets++; + for (; repeat_length > 0; box_it.forward(), --repeat_length) { + bblob = box_it.data(); + bblob->set_repeated_set(num_repeated_sets); + } + } else { + bblob->set_repeated_set(0); + box_it.forward(); + } + } while (!box_it.at_first()); // until all done + } + row->set_num_repeated_sets(num_repeated_sets); +} + +} // namespace tesseract diff --git a/tesseract/src/textord/makerow.h b/tesseract/src/textord/makerow.h new file mode 100644 index 00000000..c9e1e5e6 --- /dev/null +++ b/tesseract/src/textord/makerow.h @@ -0,0 +1,291 @@ +/********************************************************************** + * File: makerow.h (Formerly makerows.h) + * Description: Code to arrange blobs into rows of text. + * Author: Ray Smith + * + * (C) Copyright 1992, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef MAKEROW_H +#define MAKEROW_H + +#include "params.h" +#include "ocrblock.h" +#include "blobs.h" +#include "blobbox.h" +#include "statistc.h" + +namespace tesseract { + +enum OVERLAP_STATE +{ + ASSIGN, //assign it to row + REJECT, //reject it - dual overlap + NEW_ROW +}; + +enum ROW_CATEGORY { + ROW_ASCENDERS_FOUND, + ROW_DESCENDERS_FOUND, + ROW_UNKNOWN, + ROW_INVALID, +}; + +extern BOOL_VAR_H(textord_heavy_nr, false, "Vigorously remove noise"); +extern BOOL_VAR_H (textord_show_initial_rows, false, +"Display row accumulation"); +extern BOOL_VAR_H (textord_show_parallel_rows, false, +"Display page correlated rows"); +extern BOOL_VAR_H (textord_show_expanded_rows, false, +"Display rows after expanding"); +extern BOOL_VAR_H (textord_show_final_rows, false, +"Display rows after final fitting"); +extern BOOL_VAR_H (textord_show_final_blobs, false, +"Display blob bounds after pre-ass"); +extern BOOL_VAR_H (textord_test_landscape, false, "Tests refer to land/port"); +extern BOOL_VAR_H (textord_parallel_baselines, true, +"Force parallel baselines"); +extern BOOL_VAR_H (textord_straight_baselines, false, +"Force straight baselines"); +extern BOOL_VAR_H (textord_quadratic_baselines, false, +"Use quadratic splines"); +extern BOOL_VAR_H (textord_old_baselines, true, "Use old baseline algorithm"); +extern BOOL_VAR_H (textord_old_xheight, true, "Use old xheight algorithm"); +extern BOOL_VAR_H (textord_fix_xheight_bug, true, "Use spline baseline"); +extern BOOL_VAR_H (textord_fix_makerow_bug, true, +"Prevent multiple baselines"); +extern BOOL_VAR_H (textord_cblob_blockocc, true, +"Use new projection for underlines"); +extern BOOL_VAR_H (textord_debug_xheights, false, "Test xheight algorithms"); +extern INT_VAR_H (textord_test_x, -INT32_MAX, "coord of test pt"); +extern INT_VAR_H (textord_test_y, -INT32_MAX, "coord of test pt"); +extern INT_VAR_H (textord_min_blobs_in_row, 4, +"Min blobs before gradient counted"); +extern INT_VAR_H (textord_spline_minblobs, 8, +"Min blobs in each spline segment"); +extern INT_VAR_H (textord_spline_medianwin, 6, +"Size of window for spline segmentation"); +extern INT_VAR_H (textord_min_xheight, 10, "Min credible pixel xheight"); +extern double_VAR_H (textord_spline_shift_fraction, 0.02, +"Fraction of line spacing for quad"); +extern double_VAR_H (textord_spline_outlier_fraction, 0.1, +"Fraction of line spacing for outlier"); +extern double_VAR_H (textord_skew_ile, 0.5, "Ile of gradients for page skew"); +extern double_VAR_H (textord_skew_lag, 0.75, +"Lag for skew on row accumulation"); +extern double_VAR_H (textord_linespace_iqrlimit, 0.2, +"Max iqr/median for linespace"); +extern double_VAR_H (textord_width_limit, 8, +"Max width of blobs to make rows"); +extern double_VAR_H (textord_chop_width, 1.5, "Max width before chopping"); +extern double_VAR_H (textord_minxh, 0.25, +"fraction of linesize for min xheight"); +extern double_VAR_H (textord_min_linesize, 1.25, +"* blob height for initial linesize"); +extern double_VAR_H (textord_excess_blobsize, 1.3, +"New row made if blob makes row this big"); +extern double_VAR_H (textord_occupancy_threshold, 0.4, +"Fraction of neighbourhood"); +extern double_VAR_H (textord_underline_width, 2.0, +"Multiple of line_size for underline"); +extern double_VAR_H(textord_min_blob_height_fraction, 0.75, +"Min blob height/top to include blob top into xheight stats"); +extern double_VAR_H (textord_xheight_mode_fraction, 0.4, +"Min pile height to make xheight"); +extern double_VAR_H (textord_ascheight_mode_fraction, 0.15, +"Min pile height to make ascheight"); +extern double_VAR_H (textord_ascx_ratio_min, 1.2, "Min cap/xheight"); +extern double_VAR_H (textord_ascx_ratio_max, 1.7, "Max cap/xheight"); +extern double_VAR_H (textord_descx_ratio_min, 0.15, "Min desc/xheight"); +extern double_VAR_H (textord_descx_ratio_max, 0.6, "Max desc/xheight"); +extern double_VAR_H (textord_xheight_error_margin, 0.1, "Accepted variation"); +extern INT_VAR_H (textord_lms_line_trials, 12, "Number of linew fits to do"); +extern BOOL_VAR_H (textord_new_initial_xheight, true, +"Use test xheight mechanism"); +extern BOOL_VAR_H(textord_debug_blob, false, "Print test blob information"); + +inline void get_min_max_xheight(int block_linesize, + int *min_height, int *max_height) { + *min_height = static_cast<int32_t>(floor(block_linesize * textord_minxh)); + if (*min_height < textord_min_xheight) *min_height = textord_min_xheight; + *max_height = static_cast<int32_t>(ceil(block_linesize * 3.0)); +} + +inline ROW_CATEGORY get_row_category(const TO_ROW *row) { + if (row->xheight <= 0) return ROW_INVALID; + return (row->ascrise > 0) ? ROW_ASCENDERS_FOUND : + (row->descdrop != 0) ? ROW_DESCENDERS_FOUND : ROW_UNKNOWN; +} + +inline bool within_error_margin(float test, float num, float margin) { + return (test >= num * (1 - margin) && test <= num * (1 + margin)); +} + +void fill_heights(TO_ROW *row, float gradient, int min_height, + int max_height, STATS *heights, STATS *floating_heights); + +float make_single_row(ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK* block, + TO_BLOCK_LIST* blocks); +float make_rows(ICOORD page_tr, // top right + TO_BLOCK_LIST *port_blocks); +void make_initial_textrows(ICOORD page_tr, + TO_BLOCK* block, // block to do + FCOORD rotation, // for drawing + bool testing_on); // correct orientation +void fit_lms_line(TO_ROW *row); +void compute_page_skew(TO_BLOCK_LIST *blocks, // list of blocks + float &page_m, // average gradient + float &page_err); // average error +void vigorous_noise_removal(TO_BLOCK* block); +void cleanup_rows_making(ICOORD page_tr, // top right + TO_BLOCK* block, // block to do + float gradient, // gradient to fit + FCOORD rotation, // for drawing + int32_t block_edge, // edge of block + bool testing_on); // correct orientation +void delete_non_dropout_rows( //find lines + TO_BLOCK* block, //block to do + float gradient, //global skew + FCOORD rotation, //deskew vector + int32_t block_edge, //left edge + bool testing_on //correct orientation +); +bool find_best_dropout_row( //find neighbours + TO_ROW* row, //row to test + int32_t distance, //dropout dist + float dist_limit, //threshold distance + int32_t line_index, //index of row + TO_ROW_IT* row_it, //current position + bool testing_on //correct orientation +); +TBOX deskew_block_coords( //block box + TO_BLOCK *block, //block to do + float gradient //global skew + ); +void compute_line_occupation( //project blobs + TO_BLOCK *block, //block to do + float gradient, //global skew + int32_t min_y, //min coord in block + int32_t max_y, //in block + int32_t *occupation, //output projection + int32_t *deltas //derivative + ); +void compute_occupation_threshold( //project blobs + int32_t low_window, //below result point + int32_t high_window, //above result point + int32_t line_count, //array sizes + int32_t *occupation, //input projection + int32_t *thresholds //output thresholds + ); +void compute_dropout_distances( //project blobs + int32_t *occupation, //input projection + int32_t *thresholds, //output thresholds + int32_t line_count //array sizes + ); +void expand_rows( //find lines + ICOORD page_tr, //top right + TO_BLOCK* block, //block to do + float gradient, //gradient to fit + FCOORD rotation, //for drawing + int32_t block_edge, //edge of block + bool testing_on //correct orientation +); +void adjust_row_limits( //tidy limits + TO_BLOCK *block //block to do + ); +void compute_row_stats( //find lines + TO_BLOCK* block, //block to do + bool testing_on //correct orientation +); +float median_block_xheight( //find lines + TO_BLOCK *block, //block to do + float gradient //global skew + ); + +int compute_xheight_from_modes( + STATS *heights, STATS *floating_heights, bool cap_only, int min_height, + int max_height, float *xheight, float *ascrise); + +int32_t compute_row_descdrop(TO_ROW *row, // row to do + float gradient, // global skew + int xheight_blob_count, + STATS *heights); +int32_t compute_height_modes(STATS *heights, // stats to search + int32_t min_height, // bottom of range + int32_t max_height, // top of range + int32_t *modes, // output array + int32_t maxmodes); // size of modes +void correct_row_xheight(TO_ROW *row, // row to fix + float xheight, // average values + float ascrise, + float descdrop); +void separate_underlines(TO_BLOCK* block, // block to do + float gradient, // skew angle + FCOORD rotation, // inverse landscape + bool testing_on); // correct orientation +void pre_associate_blobs(ICOORD page_tr, // top right + TO_BLOCK* block, // block to do + FCOORD rotation, // inverse landscape + bool testing_on); // correct orientation +void fit_parallel_rows(TO_BLOCK* block, // block to do + float gradient, // gradient to fit + FCOORD rotation, // for drawing + int32_t block_edge, // edge of block + bool testing_on); // correct orientation +void fit_parallel_lms(float gradient, // forced gradient + TO_ROW *row); // row to fit +void make_baseline_spline(TO_ROW *row, // row to fit + TO_BLOCK *block); // block it came from +bool segment_baseline( //split baseline + TO_ROW* row, //row to fit + TO_BLOCK* block, //block it came from + int32_t& segments, //no fo segments + int32_t* xstarts //coords of segments +); +double *linear_spline_baseline ( //split baseline +TO_ROW * row, //row to fit +TO_BLOCK * block, //block it came from +int32_t & segments, //no fo segments +int32_t xstarts[] //coords of segments +); +void assign_blobs_to_rows( //find lines + TO_BLOCK* block, //block to do + float* gradient, //block skew + int pass, //identification + bool reject_misses, //chuck big ones out + bool make_new_rows, //add rows for unmatched + bool drawing_skew //draw smoothed skew +); + //find best row +OVERLAP_STATE most_overlapping_row(TO_ROW_IT* row_it, //iterator + TO_ROW*& best_row, //output row + float top, //top of blob + float bottom, //bottom of blob + float rowsize, //max row size + bool testing_blob //test stuff + ); +int blob_x_order( //sort function + const void *item1, //items to compare + const void *item2); +int row_y_order( //sort function + const void *item1, //items to compare + const void *item2); +int row_spacing_order( //sort function + const void *item1, //items to compare + const void *item2); + +void mark_repeated_chars(TO_ROW *row); + +} // namespace tesseract + +#endif diff --git a/tesseract/src/textord/oldbasel.cpp b/tesseract/src/textord/oldbasel.cpp new file mode 100644 index 00000000..f8dadc33 --- /dev/null +++ b/tesseract/src/textord/oldbasel.cpp @@ -0,0 +1,1698 @@ +/********************************************************************** + * File: oldbasel.cpp (Formerly oldbl.c) + * Description: A re-implementation of the old baseline algorithm. + * Author: Ray Smith + * + * (C) Copyright 1993, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + + // Include automatically generated configuration file if running autoconf. +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#include "oldbasel.h" + +#include "ccstruct.h" +#include "statistc.h" +#include "quadlsq.h" +#include "detlinefit.h" +#include "makerow.h" +#include "drawtord.h" +#include "textord.h" +#include "tprintf.h" + +#include <vector> // for std::vector + +#include <algorithm> + +namespace tesseract { + +static BOOL_VAR (textord_really_old_xheight, false, +"Use original wiseowl xheight"); +BOOL_VAR (textord_oldbl_debug, false, "Debug old baseline generation"); +static BOOL_VAR (textord_debug_baselines, false, "Debug baseline generation"); +static BOOL_VAR (textord_oldbl_paradef, true, "Use para default mechanism"); +static BOOL_VAR (textord_oldbl_split_splines, true, "Split stepped splines"); +static BOOL_VAR (textord_oldbl_merge_parts, true, "Merge suspect partitions"); +static BOOL_VAR (oldbl_corrfix, true, "Improve correlation of heights"); +static BOOL_VAR (oldbl_xhfix, false, +"Fix bug in modes threshold for xheights"); +static BOOL_VAR(textord_ocropus_mode, false, "Make baselines for ocropus"); +static double_VAR (oldbl_xhfract, 0.4, "Fraction of est allowed in calc"); +static INT_VAR (oldbl_holed_losscount, 10, +"Max lost before fallback line used"); +static double_VAR (oldbl_dot_error_size, 1.26, "Max aspect ratio of a dot"); +static double_VAR (textord_oldbl_jumplimit, 0.15, +"X fraction for new partition"); + +#define TURNLIMIT 1 /*min size for turning point */ +#define X_HEIGHT_FRACTION 0.7 /*x-height/caps height */ +#define DESCENDER_FRACTION 0.5 /*descender/x-height */ +#define MIN_ASC_FRACTION 0.20 /*min size of ascenders */ +#define MIN_DESC_FRACTION 0.25 /*min size of descenders */ +#define MINASCRISE 2.0 /*min ascender/desc step */ +#define MAXHEIGHTVARIANCE 0.15 /*accepted variation in x-height */ +#define MAXHEIGHT 300 /*max blob height */ +#define MAXOVERLAP 0.1 /*max 10% missed overlap */ +#define MAXBADRUN 2 /*max non best for failed */ +#define HEIGHTBUCKETS 200 /* Num of buckets */ +#define MODENUM 10 +#define MAXPARTS 6 +#define SPLINESIZE 23 + +#define ABS(x) ((x)<0 ? (-(x)) : (x)) + +/********************************************************************** + * make_old_baselines + * + * Top level function to make baselines the old way. + **********************************************************************/ + +void Textord::make_old_baselines(TO_BLOCK* block, // block to do + bool testing_on, // correct orientation + float gradient) { + QSPLINE *prev_baseline; // baseline of previous row + TO_ROW *row; // current row + TO_ROW_IT row_it = block->get_rows(); + BLOBNBOX_IT blob_it; + + prev_baseline = nullptr; // nothing yet + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + row = row_it.data(); + find_textlines(block, row, 2, nullptr); + if (row->xheight <= 0 && prev_baseline != nullptr) + find_textlines(block, row, 2, prev_baseline); + if (row->xheight > 0) { // was a good one + prev_baseline = &row->baseline; + } else { + prev_baseline = nullptr; + blob_it.set_to_list(row->blob_list()); + if (textord_debug_baselines) + tprintf("Row baseline generation failed on row at (%d,%d)\n", + blob_it.data()->bounding_box().left(), + blob_it.data()->bounding_box().bottom()); + } + } + correlate_lines(block, gradient); + block->block->set_xheight(block->xheight); +} + + +/********************************************************************** + * correlate_lines + * + * Correlate the x-heights and ascender heights of a block to fill-in + * the ascender height and descender height for rows without one. + * Also fix baselines of rows without a decent fit. + **********************************************************************/ + +void Textord::correlate_lines(TO_BLOCK *block, float gradient) { + int rowcount; /*no of rows to do */ + int rowindex; /*no of row */ + // iterator + TO_ROW_IT row_it = block->get_rows (); + + rowcount = row_it.length (); + if (rowcount == 0) { + //default value + block->xheight = block->line_size; + return; /*none to do */ + } + // array of ptrs + std::vector <TO_ROW *> rows(rowcount); + rowindex = 0; + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) + //make array + rows[rowindex++] = row_it.data (); + + /*try to fix bad lines */ + correlate_neighbours(block, &rows[0], rowcount); + + if (textord_really_old_xheight || textord_old_xheight) { + block->xheight = static_cast<float>(correlate_with_stats(&rows[0], rowcount, block)); + if (block->xheight <= 0) + block->xheight = block->line_size * tesseract::CCStruct::kXHeightFraction; + if (block->xheight < textord_min_xheight) + block->xheight = (float) textord_min_xheight; + } else { + compute_block_xheight(block, gradient); + } +} + + +/********************************************************************** + * correlate_neighbours + * + * Try to fix rows that had a bad spline fit by using neighbours. + **********************************************************************/ + +void Textord::correlate_neighbours(TO_BLOCK *block, // block rows are in. + TO_ROW **rows, // rows of block. + int rowcount) { // no of rows to do. + TO_ROW *row; /*current row */ + int rowindex; /*no of row */ + int otherrow; /*second row */ + int upperrow; /*row above to use */ + int lowerrow; /*row below to use */ + float biggest; + + for (rowindex = 0; rowindex < rowcount; rowindex++) { + row = rows[rowindex]; /*current row */ + if (row->xheight < 0) { + /*quadratic failed */ + for (otherrow = rowindex - 2; + otherrow >= 0 + && (rows[otherrow]->xheight < 0.0 + || !row->baseline.overlap (&rows[otherrow]->baseline, + MAXOVERLAP)); otherrow--); + upperrow = otherrow; /*decent row above */ + for (otherrow = rowindex + 1; + otherrow < rowcount + && (rows[otherrow]->xheight < 0.0 + || !row->baseline.overlap (&rows[otherrow]->baseline, + MAXOVERLAP)); otherrow++); + lowerrow = otherrow; /*decent row below */ + if (upperrow >= 0) + find_textlines(block, row, 2, &rows[upperrow]->baseline); + if (row->xheight < 0 && lowerrow < rowcount) + find_textlines(block, row, 2, &rows[lowerrow]->baseline); + if (row->xheight < 0) { + if (upperrow >= 0) + find_textlines(block, row, 1, &rows[upperrow]->baseline); + else if (lowerrow < rowcount) + find_textlines(block, row, 1, &rows[lowerrow]->baseline); + } + } + } + + for (biggest = 0.0f, rowindex = 0; rowindex < rowcount; rowindex++) { + row = rows[rowindex]; /*current row */ + if (row->xheight < 0) /*linear failed */ + /*make do */ + row->xheight = -row->xheight; + biggest = std::max(biggest, row->xheight); + } +} + + +/********************************************************************** + * correlate_with_stats + * + * correlate the x-heights and ascender heights of a block to fill-in + * the ascender height and descender height for rows without one. + **********************************************************************/ + +int Textord::correlate_with_stats(TO_ROW **rows, // rows of block. + int rowcount, // no of rows to do. + TO_BLOCK* block) { + TO_ROW *row; /*current row */ + int rowindex; /*no of row */ + float lineheight; /*mean x-height */ + float ascheight; /*average ascenders */ + float minascheight; /*min allowed ascheight */ + int xcount; /*no of samples for xheight */ + float fullheight; /*mean top height */ + int fullcount; /*no of samples */ + float descheight; /*mean descender drop */ + float mindescheight; /*min allowed descheight */ + int desccount; /*no of samples */ + + /*no samples */ + xcount = fullcount = desccount = 0; + lineheight = ascheight = fullheight = descheight = 0.0; + for (rowindex = 0; rowindex < rowcount; rowindex++) { + row = rows[rowindex]; /*current row */ + if (row->ascrise > 0.0) { /*got ascenders? */ + lineheight += row->xheight;/*average x-heights */ + ascheight += row->ascrise; /*average ascenders */ + xcount++; + } + else { + fullheight += row->xheight;/*assume full height */ + fullcount++; + } + if (row->descdrop < 0.0) { /*got descenders? */ + /*average descenders */ + descheight += row->descdrop; + desccount++; + } + } + + if (xcount > 0 && (!oldbl_corrfix || xcount >= fullcount)) { + lineheight /= xcount; /*average x-height */ + /*average caps height */ + fullheight = lineheight + ascheight / xcount; + /*must be decent size */ + if (fullheight < lineheight * (1 + MIN_ASC_FRACTION)) + fullheight = lineheight * (1 + MIN_ASC_FRACTION); + } + else { + fullheight /= fullcount; /*average max height */ + /*guess x-height */ + lineheight = fullheight * X_HEIGHT_FRACTION; + } + if (desccount > 0 && (!oldbl_corrfix || desccount >= rowcount / 2)) + descheight /= desccount; /*average descenders */ + else + /*guess descenders */ + descheight = -lineheight * DESCENDER_FRACTION; + + if (lineheight > 0.0f) + block->block->set_cell_over_xheight((fullheight - descheight) / lineheight); + + minascheight = lineheight * MIN_ASC_FRACTION; + mindescheight = -lineheight * MIN_DESC_FRACTION; + for (rowindex = 0; rowindex < rowcount; rowindex++) { + row = rows[rowindex]; /*do each row */ + row->all_caps = false; + if (row->ascrise / row->xheight < MIN_ASC_FRACTION) { + /*no ascenders */ + if (row->xheight >= lineheight * (1 - MAXHEIGHTVARIANCE) + && row->xheight <= lineheight * (1 + MAXHEIGHTVARIANCE)) { + row->ascrise = fullheight - lineheight; + /*set to average */ + row->xheight = lineheight; + + } + else if (row->xheight >= fullheight * (1 - MAXHEIGHTVARIANCE) + && row->xheight <= fullheight * (1 + MAXHEIGHTVARIANCE)) { + row->ascrise = row->xheight - lineheight; + /*set to average */ + row->xheight = lineheight; + row->all_caps = true; + } + else { + row->ascrise = (fullheight - lineheight) * row->xheight + / fullheight; + /*scale it */ + row->xheight -= row->ascrise; + row->all_caps = true; + } + if (row->ascrise < minascheight) + row->ascrise = + row->xheight * ((1.0 - X_HEIGHT_FRACTION) / X_HEIGHT_FRACTION); + } + if (row->descdrop > mindescheight) { + if (row->xheight >= lineheight * (1 - MAXHEIGHTVARIANCE) + && row->xheight <= lineheight * (1 + MAXHEIGHTVARIANCE)) + /*set to average */ + row->descdrop = descheight; + else + row->descdrop = -row->xheight * DESCENDER_FRACTION; + } + } + return static_cast<int>(lineheight); //block xheight +} + + +/********************************************************************** + * find_textlines + * + * Compute the baseline for the given row. + **********************************************************************/ + +void Textord::find_textlines(TO_BLOCK *block, // block row is in + TO_ROW *row, // row to do + int degree, // required approximation + QSPLINE *spline) { // starting spline + int partcount; /*no of partitions of */ + bool holed_line = false; //lost too many blobs + int bestpart; /*biggest partition */ + int partsizes[MAXPARTS]; /*no in each partition */ + int lineheight; /*guessed x-height */ + float jumplimit; /*allowed delta change */ + int blobcount; /*no of blobs on line */ + int pointcount; /*no of coords */ + int xstarts[SPLINESIZE + 1]; //segment boundaries + int segments; //no of segments + + //no of blobs in row + blobcount = row->blob_list ()->length (); + // partition no of each blob + std::vector<char> partids(blobcount); + // useful sample points + std::vector<int> xcoords(blobcount); + // useful sample points + std::vector<int> ycoords(blobcount); + // edges of blob rectangles + std::vector<TBOX> blobcoords(blobcount); + // diffs from 1st approx + std::vector<float> ydiffs(blobcount); + + lineheight = get_blob_coords(row, static_cast<int>(block->line_size), &blobcoords[0], + holed_line, blobcount); + /*limit for line change */ + jumplimit = lineheight * textord_oldbl_jumplimit; + if (jumplimit < MINASCRISE) + jumplimit = MINASCRISE; + + if (textord_oldbl_debug) { + tprintf + ("\nInput height=%g, Estimate x-height=%d pixels, jumplimit=%.2f\n", + block->line_size, lineheight, jumplimit); + } + if (holed_line) + make_holed_baseline(&blobcoords[0], blobcount, spline, &row->baseline, + row->line_m ()); + else + make_first_baseline(&blobcoords[0], blobcount, + &xcoords[0], &ycoords[0], spline, &row->baseline, jumplimit); +#ifndef GRAPHICS_DISABLED + if (textord_show_final_rows) + row->baseline.plot (to_win, ScrollView::GOLDENROD); +#endif + if (blobcount > 1) { + bestpart = partition_line(&blobcoords[0], blobcount, + &partcount, &partids[0], partsizes, + &row->baseline, jumplimit, &ydiffs[0]); + pointcount = partition_coords(&blobcoords[0], blobcount, + &partids[0], bestpart, &xcoords[0], &ycoords[0]); + segments = segment_spline(&blobcoords[0], blobcount, + &xcoords[0], &ycoords[0], degree, pointcount, xstarts); + if (!holed_line) { + do { + row->baseline = QSPLINE(xstarts, segments, + &xcoords[0], &ycoords[0], pointcount, degree); + } + while (textord_oldbl_split_splines + && split_stepped_spline (&row->baseline, jumplimit / 2, + &xcoords[0], xstarts, segments)); + } + find_lesser_parts(row, &blobcoords[0], blobcount, + &partids[0], partsizes, partcount, bestpart); + + } + else { + row->xheight = -1.0f; /*failed */ + row->descdrop = 0.0f; + row->ascrise = 0.0f; + } + row->baseline.extrapolate (row->line_m (), + block->block->pdblk.bounding_box ().left (), + block->block->pdblk.bounding_box ().right ()); + + if (textord_really_old_xheight) { + old_first_xheight (row, &blobcoords[0], lineheight, + blobcount, &row->baseline, jumplimit); + } else if (textord_old_xheight) { + make_first_xheight (row, &blobcoords[0], lineheight, static_cast<int>(block->line_size), + blobcount, &row->baseline, jumplimit); + } else { + compute_row_xheight(row, block->block->classify_rotation(), + row->line_m(), block->line_size); + } +} + +/********************************************************************** + * get_blob_coords + * + * Fill the blobcoords array with the coordinates of the blobs + * in the row. The return value is the first guess at the line height. + **********************************************************************/ + +int get_blob_coords( //get boxes + TO_ROW* row, //row to use + int32_t lineheight, //block level + TBOX* blobcoords, //output boxes + bool& holed_line, //lost a lot of blobs + int& outcount //no of real blobs +) { + //blobs + BLOBNBOX_IT blob_it = row->blob_list (); + int blobindex; /*no along text line */ + int losscount; //lost blobs + int maxlosscount; //greatest lost blobs + /*height stat collection */ + STATS heightstat (0, MAXHEIGHT); + + if (blob_it.empty ()) + return 0; //none + maxlosscount = 0; + losscount = 0; + blob_it.mark_cycle_pt (); + blobindex = 0; + do { + blobcoords[blobindex] = box_next_pre_chopped (&blob_it); + if (blobcoords[blobindex].height () > lineheight * 0.25) + heightstat.add (blobcoords[blobindex].height (), 1); + if (blobindex == 0 + || blobcoords[blobindex].height () > lineheight * 0.25 + || blob_it.cycled_list ()) { + blobindex++; /*no of merged blobs */ + losscount = 0; + } + else { + if (blobcoords[blobindex].height () + < blobcoords[blobindex].width () * oldbl_dot_error_size + && blobcoords[blobindex].width () + < blobcoords[blobindex].height () * oldbl_dot_error_size) { + //counts as dot + blobindex++; + losscount = 0; + } + else { + losscount++; //lost it + if (losscount > maxlosscount) + //remember max + maxlosscount = losscount; + } + } + } + while (!blob_it.cycled_list ()); + + holed_line = maxlosscount > oldbl_holed_losscount; + outcount = blobindex; /*total blobs */ + + if (heightstat.get_total () > 1) + /*guess x-height */ + return static_cast<int>(heightstat.ile (0.25)); + else + return blobcoords[0].height (); +} + + +/********************************************************************** + * make_first_baseline + * + * Make the first estimate at a baseline, either by shifting + * a supplied previous spline, or by doing a piecewise linear + * approximation using all the blobs. + **********************************************************************/ + +void +make_first_baseline ( //initial approximation +TBOX blobcoords[], /*blob bounding boxes */ +int blobcount, /*no of blobcoords */ +int xcoords[], /*coords for spline */ +int ycoords[], /*approximator */ +QSPLINE * spline, /*initial spline */ +QSPLINE * baseline, /*output spline */ +float jumplimit /*guess half descenders */ +) { + int leftedge; /*left edge of line */ + int rightedge; /*right edge of line */ + int blobindex; /*current blob */ + int segment; /*current segment */ + float prevy, thisy, nexty; /*3 y coords */ + float y1, y2, y3; /*3 smooth blobs */ + float maxmax, minmin; /*absolute limits */ + int x2 = 0; /*right edge of old y3 */ + int ycount; /*no of ycoords in use */ + float yturns[SPLINESIZE]; /*y coords of turn pts */ + int xturns[SPLINESIZE]; /*xcoords of turn pts */ + int xstarts[SPLINESIZE + 1]; + int segments; //no of segments + ICOORD shift; //shift of spline + + prevy = 0; + /*left edge of row */ + leftedge = blobcoords[0].left (); + /*right edge of line */ + rightedge = blobcoords[blobcount - 1].right (); + if (spline == nullptr /*no given spline */ + || spline->segments < 3 /*or trivial */ + /*or too non-overlap */ + || spline->xcoords[1] > leftedge + MAXOVERLAP * (rightedge - leftedge) + || spline->xcoords[spline->segments - 1] < rightedge + - MAXOVERLAP * (rightedge - leftedge)) { + if (textord_oldbl_paradef) + return; //use default + xstarts[0] = blobcoords[0].left () - 1; + for (blobindex = 0; blobindex < blobcount; blobindex++) { + xcoords[blobindex] = (blobcoords[blobindex].left () + + blobcoords[blobindex].right ()) / 2; + ycoords[blobindex] = blobcoords[blobindex].bottom (); + } + xstarts[1] = blobcoords[blobcount - 1].right () + 1; + segments = 1; /*no of segments */ + + /*linear */ + *baseline = QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1); + + if (blobcount >= 3) { + y1 = y2 = y3 = 0.0f; + ycount = 0; + segment = 0; /*no of segments */ + maxmax = minmin = 0.0f; + thisy = ycoords[0] - baseline->y (xcoords[0]); + nexty = ycoords[1] - baseline->y (xcoords[1]); + for (blobindex = 2; blobindex < blobcount; blobindex++) { + prevy = thisy; /*shift ycoords */ + thisy = nexty; + nexty = ycoords[blobindex] - baseline->y (xcoords[blobindex]); + /*middle of smooth y */ + if (ABS (thisy - prevy) < jumplimit && ABS (thisy - nexty) < jumplimit) { + y1 = y2; /*shift window */ + y2 = y3; + y3 = thisy; /*middle point */ + ycount++; + /*local max */ + if (ycount >= 3 && ((y1 < y2 && y2 >= y3) + /*local min */ + || (y1 > y2 && y2 <= y3))) { + if (segment < SPLINESIZE - 2) { + /*turning pt */ + xturns[segment] = x2; + yturns[segment] = y2; + segment++; /*no of spline segs */ + } + } + if (ycount == 1) { + maxmax = minmin = y3;/*initialise limits */ + } + else { + if (y3 > maxmax) + maxmax = y3; /*biggest max */ + if (y3 < minmin) + minmin = y3; /*smallest min */ + } + /*possible turning pt */ + x2 = blobcoords[blobindex - 1].right (); + } + } + + jumplimit *= 1.2f; + /*must be wavy */ + if (maxmax - minmin > jumplimit) { + ycount = segment; /*no of segments */ + for (blobindex = 0, segment = 1; blobindex < ycount; + blobindex++) { + if (yturns[blobindex] > minmin + jumplimit + || yturns[blobindex] < maxmax - jumplimit) { + /*significant peak */ + if (segment == 1 + || yturns[blobindex] > prevy + jumplimit + || yturns[blobindex] < prevy - jumplimit) { + /*different to previous */ + xstarts[segment] = xturns[blobindex]; + segment++; + prevy = yturns[blobindex]; + } + /*bigger max */ + else if ((prevy > minmin + jumplimit && yturns[blobindex] > prevy) + /*smaller min */ + || (prevy < maxmax - jumplimit && yturns[blobindex] < prevy)) { + xstarts[segment - 1] = xturns[blobindex]; + /*improved previous */ + prevy = yturns[blobindex]; + } + } + } + xstarts[segment] = blobcoords[blobcount - 1].right () + 1; + segments = segment; /*no of segments */ + /*linear */ + *baseline = QSPLINE (xstarts, segments, xcoords, ycoords, blobcount, 1); + } + } + } + else { + *baseline = *spline; /*copy it */ + shift = ICOORD (0, static_cast<int16_t>(blobcoords[0].bottom () + - spline->y (blobcoords[0].right ()))); + baseline->move (shift); + } +} + + +/********************************************************************** + * make_holed_baseline + * + * Make the first estimate at a baseline, either by shifting + * a supplied previous spline, or by doing a piecewise linear + * approximation using all the blobs. + **********************************************************************/ + +void +make_holed_baseline ( //initial approximation +TBOX blobcoords[], /*blob bounding boxes */ +int blobcount, /*no of blobcoords */ +QSPLINE * spline, /*initial spline */ +QSPLINE * baseline, /*output spline */ +float gradient //of line +) { + int leftedge; /*left edge of line */ + int rightedge; /*right edge of line */ + int blobindex; /*current blob */ + float x; //centre of row + ICOORD shift; //shift of spline + + tesseract::DetLineFit lms; // straight baseline + int32_t xstarts[2]; //straight line + double coeffs[3]; + float c; //line parameter + + /*left edge of row */ + leftedge = blobcoords[0].left (); + /*right edge of line */ + rightedge = blobcoords[blobcount - 1].right(); + for (blobindex = 0; blobindex < blobcount; blobindex++) { + lms.Add(ICOORD((blobcoords[blobindex].left() + + blobcoords[blobindex].right()) / 2, + blobcoords[blobindex].bottom())); + } + lms.ConstrainedFit(gradient, &c); + xstarts[0] = leftedge; + xstarts[1] = rightedge; + coeffs[0] = 0; + coeffs[1] = gradient; + coeffs[2] = c; + *baseline = QSPLINE (1, xstarts, coeffs); + if (spline != nullptr /*no given spline */ + && spline->segments >= 3 /*or trivial */ + /*or too non-overlap */ + && spline->xcoords[1] <= leftedge + MAXOVERLAP * (rightedge - leftedge) + && spline->xcoords[spline->segments - 1] >= rightedge + - MAXOVERLAP * (rightedge - leftedge)) { + *baseline = *spline; /*copy it */ + x = (leftedge + rightedge) / 2.0; + shift = ICOORD (0, static_cast<int16_t>(gradient * x + c - spline->y (x))); + baseline->move (shift); + } +} + + +/********************************************************************** + * partition_line + * + * Partition a row of blobs into different groups of continuous + * y position. jumplimit specifies the max allowable limit on a jump + * before a new partition is started. + * The return value is the biggest partition + **********************************************************************/ + +int +partition_line ( //partition blobs +TBOX blobcoords[], //bounding boxes +int blobcount, /*no of blobs on row */ +int *numparts, /*number of partitions */ +char partids[], /*partition no of each blob */ +int partsizes[], /*no in each partition */ +QSPLINE * spline, /*curve to fit to */ +float jumplimit, /*allowed delta change */ +float ydiffs[] /*diff from spline */ +) { + int blobindex; /*no along text line */ + int bestpart; /*best new partition */ + int biggestpart; /*part with most members */ + float diff; /*difference from line */ + int startx; /*index of start blob */ + float partdiffs[MAXPARTS]; /*step between parts */ + + for (bestpart = 0; bestpart < MAXPARTS; bestpart++) + partsizes[bestpart] = 0; /*zero them all */ + + startx = get_ydiffs (blobcoords, blobcount, spline, ydiffs); + *numparts = 1; /*1 partition */ + bestpart = -1; /*first point */ + float drift = 0.0f; + float last_delta = 0.0f; + for (blobindex = startx; blobindex < blobcount; blobindex++) { + /*do each blob in row */ + diff = ydiffs[blobindex]; /*diff from line */ + if (textord_oldbl_debug) { + tprintf ("%d(%d,%d), ", blobindex, + blobcoords[blobindex].left (), + blobcoords[blobindex].bottom ()); + } + bestpart = choose_partition(diff, partdiffs, bestpart, jumplimit, + &drift, &last_delta, numparts); + /*record partition */ + partids[blobindex] = bestpart; + partsizes[bestpart]++; /*another in it */ + } + + bestpart = -1; /*first point */ + drift = 0.0f; + last_delta = 0.0f; + partsizes[0]--; /*doing 1st pt again */ + /*do each blob in row */ + for (blobindex = startx; blobindex >= 0; blobindex--) { + diff = ydiffs[blobindex]; /*diff from line */ + if (textord_oldbl_debug) { + tprintf ("%d(%d,%d), ", blobindex, + blobcoords[blobindex].left (), + blobcoords[blobindex].bottom ()); + } + bestpart = choose_partition(diff, partdiffs, bestpart, jumplimit, + &drift, &last_delta, numparts); + /*record partition */ + partids[blobindex] = bestpart; + partsizes[bestpart]++; /*another in it */ + } + + for (biggestpart = 0, bestpart = 1; bestpart < *numparts; bestpart++) + if (partsizes[bestpart] >= partsizes[biggestpart]) + biggestpart = bestpart; /*new biggest */ + if (textord_oldbl_merge_parts) + merge_oldbl_parts(blobcoords, + blobcount, + partids, + partsizes, + biggestpart, + jumplimit); + return biggestpart; /*biggest partition */ +} + + +/********************************************************************** + * merge_oldbl_parts + * + * For any adjacent group of blobs in a different part, put them in the + * main part if they fit closely to neighbours in the main part. + **********************************************************************/ + +void +merge_oldbl_parts ( //partition blobs +TBOX blobcoords[], //bounding boxes +int blobcount, /*no of blobs on row */ +char partids[], /*partition no of each blob */ +int partsizes[], /*no in each partition */ +int biggestpart, //major partition +float jumplimit /*allowed delta change */ +) { + bool found_one; //found a bestpart blob + bool close_one; //found was close enough + int blobindex; /*no along text line */ + int prevpart; //previous iteration + int runlength; //no in this part + float diff; /*difference from line */ + int startx; /*index of start blob */ + int test_blob; //another index + FCOORD coord; //blob coordinate + float m, c; //fitted line + QLSQ stats; //line stuff + + prevpart = biggestpart; + runlength = 0; + startx = 0; + for (blobindex = 0; blobindex < blobcount; blobindex++) { + if (partids[blobindex] != prevpart) { + // tprintf("Partition change at (%d,%d) from %d to %d after run of %d\n", + // blobcoords[blobindex].left(),blobcoords[blobindex].bottom(), + // prevpart,partids[blobindex],runlength); + if (prevpart != biggestpart && runlength > MAXBADRUN) { + stats.clear (); + for (test_blob = startx; test_blob < blobindex; test_blob++) { + coord = FCOORD ((blobcoords[test_blob].left () + + blobcoords[test_blob].right ()) / 2.0, + blobcoords[test_blob].bottom ()); + stats.add (coord.x (), coord.y ()); + } + stats.fit (1); + m = stats.get_b (); + c = stats.get_c (); + if (textord_oldbl_debug) + tprintf ("Fitted line y=%g x + %g\n", m, c); + found_one = false; + close_one = false; + for (test_blob = 1; !found_one + && (startx - test_blob >= 0 + || blobindex + test_blob <= blobcount); test_blob++) { + if (startx - test_blob >= 0 + && partids[startx - test_blob] == biggestpart) { + found_one = true; + coord = FCOORD ((blobcoords[startx - test_blob].left () + + blobcoords[startx - + test_blob].right ()) / + 2.0, + blobcoords[startx - + test_blob].bottom ()); + diff = m * coord.x () + c - coord.y (); + if (textord_oldbl_debug) + tprintf + ("Diff of common blob to suspect part=%g at (%g,%g)\n", + diff, coord.x (), coord.y ()); + if (diff < jumplimit && -diff < jumplimit) + close_one = true; + } + if (blobindex + test_blob <= blobcount + && partids[blobindex + test_blob - 1] == biggestpart) { + found_one = true; + coord = + FCOORD ((blobcoords[blobindex + test_blob - 1]. + left () + blobcoords[blobindex + test_blob - + 1].right ()) / 2.0, + blobcoords[blobindex + test_blob - + 1].bottom ()); + diff = m * coord.x () + c - coord.y (); + if (textord_oldbl_debug) + tprintf + ("Diff of common blob to suspect part=%g at (%g,%g)\n", + diff, coord.x (), coord.y ()); + if (diff < jumplimit && -diff < jumplimit) + close_one = true; + } + } + if (close_one) { + if (textord_oldbl_debug) + tprintf + ("Merged %d blobs back into part %d from %d starting at (%d,%d)\n", + runlength, biggestpart, prevpart, + blobcoords[startx].left (), + blobcoords[startx].bottom ()); + //switch sides + partsizes[prevpart] -= runlength; + for (test_blob = startx; test_blob < blobindex; test_blob++) + partids[test_blob] = biggestpart; + } + } + prevpart = partids[blobindex]; + runlength = 1; + startx = blobindex; + } + else + runlength++; + } +} + + +/********************************************************************** + * get_ydiffs + * + * Get the differences between the blobs and the spline, + * putting them in ydiffs. The return value is the index + * of the blob in the middle of the "best behaved" region + **********************************************************************/ + +int +get_ydiffs ( //evaluate differences +TBOX blobcoords[], //bounding boxes +int blobcount, /*no of blobs */ +QSPLINE * spline, /*approximating spline */ +float ydiffs[] /*output */ +) { + int blobindex; /*current blob */ + int xcentre; /*xcoord */ + int lastx; /*last xcentre */ + float diffsum; /*sum of diffs */ + float diff; /*current difference */ + float drift; /*sum of spline steps */ + float bestsum; /*smallest diffsum */ + int bestindex; /*index of bestsum */ + + diffsum = 0.0f; + bestindex = 0; + bestsum = static_cast<float>(INT32_MAX); + drift = 0.0f; + lastx = blobcoords[0].left (); + /*do each blob in row */ + for (blobindex = 0; blobindex < blobcount; blobindex++) { + /*centre of blob */ + xcentre = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) >> 1; + //step functions in spline + drift += spline->step (lastx, xcentre); + lastx = xcentre; + diff = blobcoords[blobindex].bottom (); + diff -= spline->y (xcentre); + diff += drift; + ydiffs[blobindex] = diff; /*store difference */ + if (blobindex > 2) + /*remove old one */ + diffsum -= ABS (ydiffs[blobindex - 3]); + diffsum += ABS (diff); /*add new one */ + if (blobindex >= 2 && diffsum < bestsum) { + bestsum = diffsum; /*find min sum */ + bestindex = blobindex - 1; /*middle of set */ + } + } + return bestindex; +} + + +/********************************************************************** + * choose_partition + * + * Choose a partition for the point and return the index. + **********************************************************************/ + +int +choose_partition ( //select partition +float diff, /*diff from spline */ +float partdiffs[], /*diff on all parts */ +int lastpart, /*last assigned partition */ +float jumplimit, /*new part threshold */ +float* drift, +float* lastdelta, +int *partcount /*no of partitions */ +) { + int partition; /*partition no */ + int bestpart; /*best new partition */ + float bestdelta; /*best gap from a part */ + float delta; /*diff from part */ + + if (lastpart < 0) { + partdiffs[0] = diff; + lastpart = 0; /*first point */ + *drift = 0.0f; + *lastdelta = 0.0f; + } + /*adjusted diff from part */ + delta = diff - partdiffs[lastpart] - *drift; + if (textord_oldbl_debug) { + tprintf ("Diff=%.2f, Delta=%.3f, Drift=%.3f, ", diff, delta, *drift); + } + if (ABS (delta) > jumplimit / 2) { + /*delta on part 0 */ + bestdelta = diff - partdiffs[0] - *drift; + bestpart = 0; /*0 best so far */ + for (partition = 1; partition < *partcount; partition++) { + delta = diff - partdiffs[partition] - *drift; + if (ABS (delta) < ABS (bestdelta)) { + bestdelta = delta; + bestpart = partition; /*part with nearest jump */ + } + } + delta = bestdelta; + /*too far away */ + if (ABS (bestdelta) > jumplimit + && *partcount < MAXPARTS) { /*and spare part left */ + bestpart = (*partcount)++; /*best was new one */ + /*start new one */ + partdiffs[bestpart] = diff - *drift; + delta = 0.0f; + } + } + else { + bestpart = lastpart; /*best was last one */ + } + + if (bestpart == lastpart + && (ABS (delta - *lastdelta) < jumplimit / 2 + || ABS (delta) < jumplimit / 2)) + /*smooth the drift */ + *drift = (3 * *drift + delta) / 3; + *lastdelta = delta; + + if (textord_oldbl_debug) { + tprintf ("P=%d\n", bestpart); + } + + return bestpart; +} + +/********************************************************************** + * partition_coords + * + * Get the x,y coordinates of all points in the bestpart and put them + * in xcoords,ycoords. Return the number of points found. + **********************************************************************/ + +int +partition_coords ( //find relevant coords +TBOX blobcoords[], //bounding boxes +int blobcount, /*no of blobs in row */ +char partids[], /*partition no of each blob */ +int bestpart, /*best new partition */ +int xcoords[], /*points to work on */ +int ycoords[] /*points to work on */ +) { + int blobindex; /*no along text line */ + int pointcount; /*no of points */ + + pointcount = 0; + for (blobindex = 0; blobindex < blobcount; blobindex++) { + if (partids[blobindex] == bestpart) { + /*centre of blob */ + xcoords[pointcount] = (blobcoords[blobindex].left () + blobcoords[blobindex].right ()) >> 1; + ycoords[pointcount++] = blobcoords[blobindex].bottom (); + } + } + return pointcount; /*no of points found */ +} + + +/********************************************************************** + * segment_spline + * + * Segment the row at midpoints between maxima and minima of the x,y pairs. + * The xstarts of the segments are returned and the number found. + **********************************************************************/ + +int +segment_spline ( //make xstarts +TBOX blobcoords[], //boundign boxes +int blobcount, /*no of blobs in row */ +int xcoords[], /*points to work on */ +int ycoords[], /*points to work on */ +int degree, int pointcount, /*no of points */ +int xstarts[] //result +) { + int ptindex; /*no along text line */ + int segment; /*partition no */ + int lastmin, lastmax; /*possible turn points */ + int turnpoints[SPLINESIZE]; /*good turning points */ + int turncount; /*no of turning points */ + int max_x; //max specified coord + + xstarts[0] = xcoords[0] - 1; //leftmost defined pt + max_x = xcoords[pointcount - 1] + 1; + if (degree < 2) + pointcount = 0; + turncount = 0; /*no turning points yet */ + if (pointcount > 3) { + ptindex = 1; + lastmax = lastmin = 0; /*start with first one */ + while (ptindex < pointcount - 1 && turncount < SPLINESIZE - 1) { + /*minimum */ + if (ycoords[ptindex - 1] > ycoords[ptindex] && ycoords[ptindex] <= ycoords[ptindex + 1]) { + if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT) { + if (turncount == 0 || turnpoints[turncount - 1] != lastmax) + /*new max point */ + turnpoints[turncount++] = lastmax; + lastmin = ptindex; /*latest minimum */ + } + else if (ycoords[ptindex] < ycoords[lastmin]) { + lastmin = ptindex; /*lower minimum */ + } + } + + /*maximum */ + if (ycoords[ptindex - 1] < ycoords[ptindex] && ycoords[ptindex] >= ycoords[ptindex + 1]) { + if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT) { + if (turncount == 0 || turnpoints[turncount - 1] != lastmin) + /*new min point */ + turnpoints[turncount++] = lastmin; + lastmax = ptindex; /*latest maximum */ + } + else if (ycoords[ptindex] > ycoords[lastmax]) { + lastmax = ptindex; /*higher maximum */ + } + } + ptindex++; + } + /*possible global min */ + if (ycoords[ptindex] < ycoords[lastmax] - TURNLIMIT + && (turncount == 0 || turnpoints[turncount - 1] != lastmax)) { + if (turncount < SPLINESIZE - 1) + /*2 more turns */ + turnpoints[turncount++] = lastmax; + if (turncount < SPLINESIZE - 1) + turnpoints[turncount++] = ptindex; + } + else if (ycoords[ptindex] > ycoords[lastmin] + TURNLIMIT + /*possible global max */ + && (turncount == 0 || turnpoints[turncount - 1] != lastmin)) { + if (turncount < SPLINESIZE - 1) + /*2 more turns */ + turnpoints[turncount++] = lastmin; + if (turncount < SPLINESIZE - 1) + turnpoints[turncount++] = ptindex; + } + else if (turncount > 0 && turnpoints[turncount - 1] == lastmin + && turncount < SPLINESIZE - 1) { + if (ycoords[ptindex] > ycoords[lastmax]) + turnpoints[turncount++] = ptindex; + else + turnpoints[turncount++] = lastmax; + } + else if (turncount > 0 && turnpoints[turncount - 1] == lastmax + && turncount < SPLINESIZE - 1) { + if (ycoords[ptindex] < ycoords[lastmin]) + turnpoints[turncount++] = ptindex; + else + turnpoints[turncount++] = lastmin; + } + } + + if (textord_oldbl_debug && turncount > 0) + tprintf ("First turn is %d at (%d,%d)\n", + turnpoints[0], xcoords[turnpoints[0]], ycoords[turnpoints[0]]); + for (segment = 1; segment < turncount; segment++) { + /*centre y coord */ + lastmax = (ycoords[turnpoints[segment - 1]] + ycoords[turnpoints[segment]]) / 2; + + /* fix alg so that it works with both rising and falling sections */ + if (ycoords[turnpoints[segment - 1]] < ycoords[turnpoints[segment]]) + /*find rising y centre */ + for (ptindex = turnpoints[segment - 1] + 1; ptindex < turnpoints[segment] && ycoords[ptindex + 1] <= lastmax; ptindex++); + else + /*find falling y centre */ + for (ptindex = turnpoints[segment - 1] + 1; ptindex < turnpoints[segment] && ycoords[ptindex + 1] >= lastmax; ptindex++); + + /*centre x */ + xstarts[segment] = (xcoords[ptindex - 1] + xcoords[ptindex] + + xcoords[turnpoints[segment - 1]] + + xcoords[turnpoints[segment]] + 2) / 4; + /*halfway between turns */ + if (textord_oldbl_debug) + tprintf ("Turn %d is %d at (%d,%d), mid pt is %d@%d, final @%d\n", + segment, turnpoints[segment], + xcoords[turnpoints[segment]], ycoords[turnpoints[segment]], + ptindex - 1, xcoords[ptindex - 1], xstarts[segment]); + } + + xstarts[segment] = max_x; + return segment; /*no of splines */ +} + + +/********************************************************************** + * split_stepped_spline + * + * Re-segment the spline in cases where there is a big step function. + * Return true if any were done. + **********************************************************************/ + +bool +split_stepped_spline( //make xstarts + QSPLINE* baseline, //current shot + float jumplimit, //max step function + int* xcoords, /*points to work on */ + int* xstarts, //result + int& segments //no of segments +) { + bool doneany; //return value + int segment; /*partition no */ + int startindex, centreindex, endindex; + float leftcoord, rightcoord; + int leftindex, rightindex; + float step; //spline step + + doneany = false; + startindex = 0; + for (segment = 1; segment < segments - 1; segment++) { + step = baseline->step ((xstarts[segment - 1] + xstarts[segment]) / 2.0, + (xstarts[segment] + xstarts[segment + 1]) / 2.0); + if (step < 0) + step = -step; + if (step > jumplimit) { + while (xcoords[startindex] < xstarts[segment - 1]) + startindex++; + centreindex = startindex; + while (xcoords[centreindex] < xstarts[segment]) + centreindex++; + endindex = centreindex; + while (xcoords[endindex] < xstarts[segment + 1]) + endindex++; + if (segments >= SPLINESIZE) { + if (textord_debug_baselines) + tprintf ("Too many segments to resegment spline!!\n"); + } + else if (endindex - startindex >= textord_spline_medianwin * 3) { + while (centreindex - startindex < + textord_spline_medianwin * 3 / 2) + centreindex++; + while (endindex - centreindex < + textord_spline_medianwin * 3 / 2) + centreindex--; + leftindex = (startindex + startindex + centreindex) / 3; + rightindex = (centreindex + endindex + endindex) / 3; + leftcoord = + (xcoords[startindex] * 2 + xcoords[centreindex]) / 3.0; + rightcoord = + (xcoords[centreindex] + xcoords[endindex] * 2) / 3.0; + while (xcoords[leftindex] > leftcoord + && leftindex - startindex > textord_spline_medianwin) + leftindex--; + while (xcoords[leftindex] < leftcoord + && centreindex - leftindex > + textord_spline_medianwin / 2) + leftindex++; + if (xcoords[leftindex] - leftcoord > + leftcoord - xcoords[leftindex - 1]) + leftindex--; + while (xcoords[rightindex] > rightcoord + && rightindex - centreindex > + textord_spline_medianwin / 2) + rightindex--; + while (xcoords[rightindex] < rightcoord + && endindex - rightindex > textord_spline_medianwin) + rightindex++; + if (xcoords[rightindex] - rightcoord > + rightcoord - xcoords[rightindex - 1]) + rightindex--; + if (textord_debug_baselines) + tprintf ("Splitting spline at %d with step %g at (%d,%d)\n", + xstarts[segment], + baseline-> + step ((xstarts[segment - 1] + + xstarts[segment]) / 2.0, + (xstarts[segment] + + xstarts[segment + 1]) / 2.0), + (xcoords[leftindex - 1] + xcoords[leftindex]) / 2, + (xcoords[rightindex - 1] + xcoords[rightindex]) / 2); + insert_spline_point (xstarts, segment, + (xcoords[leftindex - 1] + + xcoords[leftindex]) / 2, + (xcoords[rightindex - 1] + + xcoords[rightindex]) / 2, segments); + doneany = true; + } + else if (textord_debug_baselines) { + tprintf + ("Resegmenting spline failed - insufficient pts (%d,%d,%d,%d)\n", + startindex, centreindex, endindex, + (int32_t) textord_spline_medianwin); + } + } + // else tprintf("Spline step at %d is %g\n", + // xstarts[segment], + // baseline->step((xstarts[segment-1]+xstarts[segment])/2.0, + // (xstarts[segment]+xstarts[segment+1])/2.0)); + } + return doneany; +} + + +/********************************************************************** + * insert_spline_point + * + * Insert a new spline point and shuffle up the others. + **********************************************************************/ + +void +insert_spline_point ( //get descenders +int xstarts[], //starts to shuffle +int segment, //insertion pt +int coord1, //coords to add +int coord2, int &segments //total segments +) { + int index; //for shuffling + + for (index = segments; index > segment; index--) + xstarts[index + 1] = xstarts[index]; + segments++; + xstarts[segment] = coord1; + xstarts[segment + 1] = coord2; +} + + +/********************************************************************** + * find_lesser_parts + * + * Average the step from the spline for the other partitions + * and find the commonest partition which has a descender. + **********************************************************************/ + +void +find_lesser_parts ( //get descenders +TO_ROW * row, //row to process +TBOX blobcoords[], //bounding boxes +int blobcount, /*no of blobs */ +char partids[], /*partition of each blob */ +int partsizes[], /*size of each part */ +int partcount, /*no of partitions */ +int bestpart /*biggest partition */ +) { + int blobindex; /*index of blob */ + int partition; /*current partition */ + int xcentre; /*centre of blob */ + int poscount; /*count of best up step */ + int negcount; /*count of best down step */ + float partsteps[MAXPARTS]; /*average step to part */ + float bestneg; /*best down step */ + int runlength; /*length of bad run */ + int biggestrun; /*biggest bad run */ + + biggestrun = 0; + for (partition = 0; partition < partcount; partition++) + partsteps[partition] = 0.0; /*zero accumulators */ + for (runlength = 0, blobindex = 0; blobindex < blobcount; blobindex++) { + xcentre = (blobcoords[blobindex].left () + + blobcoords[blobindex].right ()) >> 1; + /*in other parts */ + int part_id = + static_cast<int>(static_cast<unsigned char>(partids[blobindex])); + if (part_id != bestpart) { + runlength++; /*run of non bests */ + if (runlength > biggestrun) + biggestrun = runlength; + partsteps[part_id] += blobcoords[blobindex].bottom() + - row->baseline.y(xcentre); + } + else + runlength = 0; + } + if (biggestrun > MAXBADRUN) + row->xheight = -1.0f; /*failed */ + else + row->xheight = 1.0f; /*success */ + poscount = negcount = 0; + bestneg = 0.0; /*no step yet */ + for (partition = 0; partition < partcount; partition++) { + if (partition != bestpart) { + // by jetsoft divide by zero possible + if (partsizes[partition] == 0) + partsteps[partition] = 0; + else + partsteps[partition] /= partsizes[partition]; + // + + if (partsteps[partition] >= MINASCRISE + && partsizes[partition] > poscount) { + poscount = partsizes[partition]; + } + if (partsteps[partition] <= -MINASCRISE + && partsizes[partition] > negcount) { + /*ascender rise */ + bestneg = partsteps[partition]; + /*2nd most popular */ + negcount = partsizes[partition]; + } + } + } + /*average x-height */ + partsteps[bestpart] /= blobcount; + row->descdrop = bestneg; +} + + +/********************************************************************** + * old_first_xheight + * + * Makes an x-height spline by copying the baseline and shifting it. + * It estimates the x-height across the line to use as the shift. + * It also finds the ascender height if it can. + **********************************************************************/ + +void +old_first_xheight ( //the wiseowl way +TO_ROW * row, /*current row */ +TBOX blobcoords[], /*blob bounding boxes */ +int initialheight, //initial guess +int blobcount, /*blobs in blobcoords */ +QSPLINE * baseline, /*established */ +float jumplimit /*min ascender height */ +) { + int blobindex; /*current blob */ + /*height statistics */ + STATS heightstat (0, MAXHEIGHT); + int height; /*height of blob */ + int xcentre; /*centre of blob */ + int lineheight; /*approx xheight */ + float ascenders; /*ascender sum */ + int asccount; /*no of ascenders */ + float xsum; /*xheight sum */ + int xcount; /*xheight count */ + float diff; /*height difference */ + + if (blobcount > 1) { + for (blobindex = 0; blobindex < blobcount; blobindex++) { + xcentre = (blobcoords[blobindex].left () + + blobcoords[blobindex].right ()) / 2; + /*height of blob */ + height = static_cast<int>(blobcoords[blobindex].top () - baseline->y (xcentre) + 0.5); + if (height > initialheight * oldbl_xhfract + && height > textord_min_xheight) + heightstat.add (height, 1); + } + if (heightstat.get_total () > 3) { + lineheight = static_cast<int>(heightstat.ile (0.25)); + if (lineheight <= 0) + lineheight = static_cast<int>(heightstat.ile (0.5)); + } + else + lineheight = initialheight; + } + else { + lineheight = static_cast<int>(blobcoords[0].top () + - baseline->y ((blobcoords[0].left () + + blobcoords[0].right ()) / 2) + + 0.5); + } + + xsum = 0.0f; + xcount = 0; + for (ascenders = 0.0f, asccount = 0, blobindex = 0; blobindex < blobcount; + blobindex++) { + xcentre = (blobcoords[blobindex].left () + + blobcoords[blobindex].right ()) / 2; + diff = blobcoords[blobindex].top () - baseline->y (xcentre); + /*is it ascender */ + if (diff > lineheight + jumplimit) { + ascenders += diff; + asccount++; /*count ascenders */ + } + else if (diff > lineheight - jumplimit) { + xsum += diff; /*mean xheight */ + xcount++; + } + } + if (xcount > 0) + xsum /= xcount; /*average xheight */ + else + xsum = static_cast<float>(lineheight); /*guess it */ + row->xheight *= xsum; + if (asccount > 0) + row->ascrise = ascenders / asccount - xsum; + else + row->ascrise = 0.0f; /*had none */ + if (row->xheight == 0) + row->xheight = -1.0f; +} + + +/********************************************************************** + * make_first_xheight + * + * Makes an x-height spline by copying the baseline and shifting it. + * It estimates the x-height across the line to use as the shift. + * It also finds the ascender height if it can. + **********************************************************************/ + +void +make_first_xheight ( //find xheight +TO_ROW * row, /*current row */ +TBOX blobcoords[], /*blob bounding boxes */ +int lineheight, //initial guess +int init_lineheight, //block level guess +int blobcount, /*blobs in blobcoords */ +QSPLINE * baseline, /*established */ +float jumplimit /*min ascender height */ +) { + STATS heightstat (0, HEIGHTBUCKETS); + int lefts[HEIGHTBUCKETS]; + int rights[HEIGHTBUCKETS]; + int modelist[MODENUM]; + int blobindex; + int mode_count; //blobs to count in thr + int sign_bit; + int mode_threshold; + const int kBaselineTouch = 2; // This really should change with resolution. + const int kGoodStrength = 8; // Strength of baseline-touching heights. + const float kMinHeight = 0.25; // Min fraction of lineheight to use. + + sign_bit = row->xheight > 0 ? 1 : -1; + + memset(lefts, 0, HEIGHTBUCKETS * sizeof(lefts[0])); + memset(rights, 0, HEIGHTBUCKETS * sizeof(rights[0])); + mode_count = 0; + for (blobindex = 0; blobindex < blobcount; blobindex++) { + int xcenter = (blobcoords[blobindex].left () + + blobcoords[blobindex].right ()) / 2; + float base = baseline->y(xcenter); + float bottomdiff = fabs(base - blobcoords[blobindex].bottom()); + int strength = textord_ocropus_mode && + bottomdiff <= kBaselineTouch ? kGoodStrength : 1; + int height = static_cast<int>(blobcoords[blobindex].top () - base + 0.5); + if (blobcoords[blobindex].height () > init_lineheight * kMinHeight) { + if (height > lineheight * oldbl_xhfract + && height > textord_min_xheight) { + heightstat.add (height, strength); + if (height < HEIGHTBUCKETS) { + if (xcenter > rights[height]) + rights[height] = xcenter; + if (xcenter > 0 && (lefts[height] == 0 || xcenter < lefts[height])) + lefts[height] = xcenter; + } + } + mode_count += strength; + } + } + + mode_threshold = static_cast<int>(blobcount * 0.1); + if (oldbl_dot_error_size > 1 || oldbl_xhfix) + mode_threshold = static_cast<int>(mode_count * 0.1); + + if (textord_oldbl_debug) { + tprintf ("blobcount=%d, mode_count=%d, mode_t=%d\n", + blobcount, mode_count, mode_threshold); + } + find_top_modes(&heightstat, HEIGHTBUCKETS, modelist, MODENUM); + if (textord_oldbl_debug) { + for (blobindex = 0; blobindex < MODENUM; blobindex++) + tprintf ("mode[%d]=%d ", blobindex, modelist[blobindex]); + tprintf ("\n"); + } + pick_x_height(row, modelist, lefts, rights, &heightstat, mode_threshold); + + if (textord_oldbl_debug) + tprintf ("Output xheight=%g\n", row->xheight); + if (row->xheight < 0 && textord_oldbl_debug) + tprintf ("warning: Row Line height < 0; %4.2f\n", row->xheight); + + if (sign_bit < 0) + row->xheight = -row->xheight; +} + +/********************************************************************** + * find_top_modes + * + * Fill the input array with the indices of the top ten modes of the + * input distribution. + **********************************************************************/ + +const int kMinModeFactorOcropus = 32; +const int kMinModeFactor = 12; + +void +find_top_modes ( //get modes +STATS * stats, //stats to hack +int statnum, //no of piles +int modelist[], int modenum //no of modes to get +) { + int mode_count; + int last_i = 0; + int last_max = INT32_MAX; + int i; + int mode; + int total_max = 0; + int mode_factor = textord_ocropus_mode ? + kMinModeFactorOcropus : kMinModeFactor; + + for (mode_count = 0; mode_count < modenum; mode_count++) { + mode = 0; + for (i = 0; i < statnum; i++) { + if (stats->pile_count (i) > stats->pile_count (mode)) { + if ((stats->pile_count (i) < last_max) || + ((stats->pile_count (i) == last_max) && (i > last_i))) { + mode = i; + } + } + } + last_i = mode; + last_max = stats->pile_count (last_i); + total_max += last_max; + if (last_max <= total_max / mode_factor) + mode = 0; + modelist[mode_count] = mode; + } +} + + +/********************************************************************** + * pick_x_height + * + * Choose based on the height modes the best x height value. + **********************************************************************/ + +void pick_x_height(TO_ROW * row, //row to do + int modelist[], + int lefts[], int rights[], + STATS * heightstat, + int mode_threshold) { + int x; + int y; + int z; + float ratio; + int found_one_bigger = false; + int best_x_height = 0; + int best_asc = 0; + int num_in_best; + + for (x = 0; x < MODENUM; x++) { + for (y = 0; y < MODENUM; y++) { + /* Check for two modes */ + if (modelist[x] && modelist[y] && + heightstat->pile_count (modelist[x]) > mode_threshold && + (!textord_ocropus_mode || + std::min(rights[modelist[x]], rights[modelist[y]]) > + std::max(lefts[modelist[x]], lefts[modelist[y]]))) { + ratio = static_cast<float>(modelist[y]) / static_cast<float>(modelist[x]); + if (1.2 < ratio && ratio < 1.8) { + /* Two modes found */ + best_x_height = modelist[x]; + num_in_best = heightstat->pile_count (modelist[x]); + + /* Try to get one higher */ + do { + found_one_bigger = false; + for (z = 0; z < MODENUM; z++) { + if (modelist[z] == best_x_height + 1 && + (!textord_ocropus_mode || + std::min(rights[modelist[x]], rights[modelist[y]]) > + std::max(lefts[modelist[x]], lefts[modelist[y]]))) { + ratio = static_cast<float>(modelist[y]) / static_cast<float>(modelist[z]); + if ((1.2 < ratio && ratio < 1.8) && + /* Should be half of best */ + heightstat->pile_count (modelist[z]) > + num_in_best * 0.5) { + best_x_height++; + found_one_bigger = true; + break; + } + } + } + } + while (found_one_bigger); + + /* try to get a higher ascender */ + + best_asc = modelist[y]; + num_in_best = heightstat->pile_count (modelist[y]); + + /* Try to get one higher */ + do { + found_one_bigger = false; + for (z = 0; z < MODENUM; z++) { + if (modelist[z] > best_asc && + (!textord_ocropus_mode || + std::min(rights[modelist[x]], rights[modelist[y]]) > + std::max(lefts[modelist[x]], lefts[modelist[y]]))) { + ratio = static_cast<float>(modelist[z]) / static_cast<float>(best_x_height); + if ((1.2 < ratio && ratio < 1.8) && + /* Should be half of best */ + heightstat->pile_count (modelist[z]) > + num_in_best * 0.5) { + best_asc = modelist[z]; + found_one_bigger = true; + break; + } + } + } + } + while (found_one_bigger); + + row->xheight = static_cast<float>(best_x_height); + row->ascrise = static_cast<float>(best_asc) - best_x_height; + return; + } + } + } + } + + best_x_height = modelist[0]; /* Single Mode found */ + num_in_best = heightstat->pile_count (best_x_height); + do { + /* Try to get one higher */ + found_one_bigger = false; + for (z = 1; z < MODENUM; z++) { + /* Should be half of best */ + if ((modelist[z] == best_x_height + 1) && + (heightstat->pile_count (modelist[z]) > num_in_best * 0.5)) { + best_x_height++; + found_one_bigger = true; + break; + } + } + } + while (found_one_bigger); + + row->ascrise = 0.0f; + row->xheight = static_cast<float>(best_x_height); + if (row->xheight == 0) + row->xheight = -1.0f; +} + +} // namespace tesseract diff --git a/tesseract/src/textord/oldbasel.h b/tesseract/src/textord/oldbasel.h new file mode 100644 index 00000000..0e25df0d --- /dev/null +++ b/tesseract/src/textord/oldbasel.h @@ -0,0 +1,164 @@ +/********************************************************************** + * File: oldbasel.h (Formerly oldbl.h) + * Description: A re-implementation of the old baseline algorithm. + * Author: Ray Smith + * + * (C) Copyright 1993, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef OLDBASEL_H +#define OLDBASEL_H + +#include "params.h" +#include "blobbox.h" + +namespace tesseract { + +extern BOOL_VAR_H (textord_oldbl_debug, false, +"Debug old baseline generation"); + +int get_blob_coords( //get boxes + TO_ROW* row, //row to use + int32_t lineheight, //block level + TBOX* blobcoords, //output boxes + bool& holed_line, //lost a lot of blobs + int& outcount //no of real blobs +); +void make_first_baseline ( //initial approximation +TBOX blobcoords[], /*blob bounding boxes */ +int blobcount, /*no of blobcoords */ +int xcoords[], /*coords for spline */ +int ycoords[], /*approximator */ +QSPLINE * spline, /*initial spline */ +QSPLINE * baseline, /*output spline */ +float jumplimit /*guess half descenders */ +); +void make_holed_baseline ( //initial approximation +TBOX blobcoords[], /*blob bounding boxes */ +int blobcount, /*no of blobcoords */ +QSPLINE * spline, /*initial spline */ +QSPLINE * baseline, /*output spline */ +float gradient //of line +); +int partition_line ( //partition blobs +TBOX blobcoords[], //bounding boxes +int blobcount, /*no of blobs on row */ +int *numparts, /*number of partitions */ +char partids[], /*partition no of each blob */ +int partsizes[], /*no in each partition */ +QSPLINE * spline, /*curve to fit to */ +float jumplimit, /*allowed delta change */ +float ydiffs[] /*diff from spline */ +); +void merge_oldbl_parts ( //partition blobs +TBOX blobcoords[], //bounding boxes +int blobcount, /*no of blobs on row */ +char partids[], /*partition no of each blob */ +int partsizes[], /*no in each partition */ +int biggestpart, //major partition +float jumplimit /*allowed delta change */ +); +int get_ydiffs ( //evaluate differences +TBOX blobcoords[], //bounding boxes +int blobcount, /*no of blobs */ +QSPLINE * spline, /*approximating spline */ +float ydiffs[] /*output */ +); +int choose_partition ( //select partition +float diff, /*diff from spline */ +float partdiffs[], /*diff on all parts */ +int lastpart, /*last assigned partition */ +float jumplimit, /*new part threshold */ +float* drift, +float* last_delta, +int *partcount /*no of partitions */ +); +int partition_coords ( //find relevant coords +TBOX blobcoords[], //bounding boxes +int blobcount, /*no of blobs in row */ +char partids[], /*partition no of each blob */ +int bestpart, /*best new partition */ +int xcoords[], /*points to work on */ +int ycoords[] /*points to work on */ +); +int segment_spline ( //make xstarts +TBOX blobcoords[], //boundign boxes +int blobcount, /*no of blobs in row */ +int xcoords[], /*points to work on */ +int ycoords[], /*points to work on */ +int degree, int pointcount, /*no of points */ +int xstarts[] //result +); +bool split_stepped_spline( //make xstarts + QSPLINE* baseline, //current shot + float jumplimit, //max step function + int* xcoords, /*points to work on */ + int* xstarts, //result + int& segments //no of segments +); +void insert_spline_point ( //get descenders +int xstarts[], //starts to shuffle +int segment, //insertion pt +int coord1, //coords to add +int coord2, int &segments //total segments +); +void find_lesser_parts ( //get descenders +TO_ROW * row, //row to process +TBOX blobcoords[], //bounding boxes +int blobcount, /*no of blobs */ +char partids[], /*partition of each blob */ +int partsizes[], /*size of each part */ +int partcount, /*no of partitions */ +int bestpart /*biggest partition */ +); + +void old_first_xheight ( //the wiseowl way +TO_ROW * row, /*current row */ +TBOX blobcoords[], /*blob bounding boxes */ +int initialheight, //initial guess +int blobcount, /*blobs in blobcoords */ +QSPLINE * baseline, /*established */ +float jumplimit /*min ascender height */ +); + +void make_first_xheight ( //find xheight +TO_ROW * row, /*current row */ +TBOX blobcoords[], /*blob bounding boxes */ +int lineheight, //initial guess +int init_lineheight, //block level guess +int blobcount, /*blobs in blobcoords */ +QSPLINE * baseline, /*established */ +float jumplimit /*min ascender height */ +); + +int *make_height_array ( //get array of heights +TBOX blobcoords[], /*blob bounding boxes */ +int blobcount, /*blobs in blobcoords */ +QSPLINE * baseline /*established */ +); + +void find_top_modes ( //get modes +STATS * stats, //stats to hack +int statnum, //no of piles +int modelist[], int modenum //no of modes to get +); + +void pick_x_height(TO_ROW * row, //row to do +int modelist[], +int lefts[], int rights[], +STATS * heightstat, +int mode_threshold); + +} // namespace tesseract + +#endif diff --git a/tesseract/src/textord/pithsync.cpp b/tesseract/src/textord/pithsync.cpp new file mode 100644 index 00000000..462f0b3c --- /dev/null +++ b/tesseract/src/textord/pithsync.cpp @@ -0,0 +1,693 @@ +/********************************************************************** + * File: pithsync.cpp (Formerly pitsync2.c) + * Description: Code to find the optimum fixed pitch segmentation of some blobs. + * Author: Ray Smith + * + * (C) Copyright 1992, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "pithsync.h" + +#include "makerow.h" +#include "pitsync1.h" +#include "topitch.h" +#include "tprintf.h" + +#include <cmath> +#include <cfloat> // for FLT_MAX +#include <vector> // for std::vector + +namespace tesseract { + +/********************************************************************** + * FPCUTPT::setup + * + * Constructor to make a new FPCUTPT. + **********************************************************************/ + +void FPCUTPT::setup( //constructor + FPCUTPT *cutpts, //predecessors + int16_t array_origin, //start coord + STATS *projection, //vertical occupation + int16_t zero_count, //official zero + int16_t pitch, //proposed pitch + int16_t x, //position + int16_t offset //dist to gap + ) { + //half of pitch + int16_t half_pitch = pitch / 2 - 1; + uint32_t lead_flag; //new flag + int32_t ind; //current position + + if (half_pitch > 31) + half_pitch = 31; + else if (half_pitch < 0) + half_pitch = 0; + lead_flag = 1 << half_pitch; + + pred = nullptr; + mean_sum = 0; + sq_sum = offset * offset; + cost = sq_sum; + faked = false; + terminal = false; + fake_count = 0; + xpos = x; + region_index = 0; + mid_cuts = 0; + if (x == array_origin) { + back_balance = 0; + fwd_balance = 0; + for (ind = 0; ind <= half_pitch; ind++) { + fwd_balance >>= 1; + if (projection->pile_count (ind) > zero_count) + fwd_balance |= lead_flag; + } + } + else { + back_balance = cutpts[x - 1 - array_origin].back_balance << 1; + back_balance &= lead_flag + (lead_flag - 1); + if (projection->pile_count (x) > zero_count) + back_balance |= 1; + fwd_balance = cutpts[x - 1 - array_origin].fwd_balance >> 1; + if (projection->pile_count (x + half_pitch) > zero_count) + fwd_balance |= lead_flag; + } +} + + +/********************************************************************** + * FPCUTPT::assign + * + * Constructor to make a new FPCUTPT. + **********************************************************************/ + +void FPCUTPT::assign( //constructor + FPCUTPT* cutpts, //predecessors + int16_t array_origin, //start coord + int16_t x, //position + bool faking, //faking this one + bool mid_cut, //cheap cut. + int16_t offset, //dist to gap + STATS* projection, //vertical occupation + float projection_scale, //scaling + int16_t zero_count, //official zero + int16_t pitch, //proposed pitch + int16_t pitch_error //allowed tolerance +) { + int index; //test index + int balance_index; //for balance factor + int16_t balance_count; //ding factor + int16_t r_index; //test cut number + FPCUTPT *segpt; //segment point + int32_t dist; //from prev segment + double sq_dist; //squared distance + double mean; //mean pitch + double total; //total dists + double factor; //cost function + //half of pitch + int16_t half_pitch = pitch / 2 - 1; + uint32_t lead_flag; //new flag + + if (half_pitch > 31) + half_pitch = 31; + else if (half_pitch < 0) + half_pitch = 0; + lead_flag = 1 << half_pitch; + + back_balance = cutpts[x - 1 - array_origin].back_balance << 1; + back_balance &= lead_flag + (lead_flag - 1); + if (projection->pile_count (x) > zero_count) + back_balance |= 1; + fwd_balance = cutpts[x - 1 - array_origin].fwd_balance >> 1; + if (projection->pile_count (x + half_pitch) > zero_count) + fwd_balance |= lead_flag; + + xpos = x; + cost = FLT_MAX; + pred = nullptr; + faked = faking; + terminal = false; + region_index = 0; + fake_count = INT16_MAX; + for (index = x - pitch - pitch_error; index <= x - pitch + pitch_error; + index++) { + if (index >= array_origin) { + segpt = &cutpts[index - array_origin]; + dist = x - segpt->xpos; + if (!segpt->terminal && segpt->fake_count < INT16_MAX) { + balance_count = 0; + if (textord_balance_factor > 0) { + if (textord_fast_pitch_test) { + lead_flag = back_balance ^ segpt->fwd_balance; + balance_count = 0; + while (lead_flag != 0) { + balance_count++; + lead_flag &= lead_flag - 1; + } + } + else { + for (balance_index = 0; + index + balance_index < x - balance_index; + balance_index++) + balance_count += + (projection->pile_count (index + balance_index) <= + zero_count) ^ (projection->pile_count (x - + balance_index) + <= zero_count); + } + balance_count = + static_cast<int16_t>(balance_count * textord_balance_factor / + projection_scale); + } + r_index = segpt->region_index + 1; + total = segpt->mean_sum + dist; + balance_count += offset; + sq_dist = + dist * dist + segpt->sq_sum + balance_count * balance_count; + mean = total / r_index; + factor = mean - pitch; + factor *= factor; + factor += sq_dist / (r_index) - mean * mean; + if (factor < cost && segpt->fake_count + faked <= fake_count) { + cost = factor; //find least cost + pred = segpt; //save path + mean_sum = total; + sq_sum = sq_dist; + fake_count = segpt->fake_count + faked; + mid_cuts = segpt->mid_cuts + mid_cut; + region_index = r_index; + } + } + } + } +} + + +/********************************************************************** + * FPCUTPT::assign_cheap + * + * Constructor to make a new FPCUTPT on the cheap. + **********************************************************************/ + +void FPCUTPT::assign_cheap( //constructor + FPCUTPT *cutpts, //predecessors + int16_t array_origin, //start coord + int16_t x, //position + bool faking, //faking this one + bool mid_cut, //cheap cut. + int16_t offset, //dist to gap + STATS *projection, //vertical occupation + float projection_scale, //scaling + int16_t zero_count, //official zero + int16_t pitch, //proposed pitch + int16_t pitch_error //allowed tolerance + ) { + int index; //test index + int16_t balance_count; //ding factor + int16_t r_index; //test cut number + FPCUTPT *segpt; //segment point + int32_t dist; //from prev segment + double sq_dist; //squared distance + double mean; //mean pitch + double total; //total dists + double factor; //cost function + //half of pitch + int16_t half_pitch = pitch / 2 - 1; + uint32_t lead_flag; //new flag + + if (half_pitch > 31) + half_pitch = 31; + else if (half_pitch < 0) + half_pitch = 0; + lead_flag = 1 << half_pitch; + + back_balance = cutpts[x - 1 - array_origin].back_balance << 1; + back_balance &= lead_flag + (lead_flag - 1); + if (projection->pile_count (x) > zero_count) + back_balance |= 1; + fwd_balance = cutpts[x - 1 - array_origin].fwd_balance >> 1; + if (projection->pile_count (x + half_pitch) > zero_count) + fwd_balance |= lead_flag; + + xpos = x; + cost = FLT_MAX; + pred = nullptr; + faked = faking; + terminal = false; + region_index = 0; + fake_count = INT16_MAX; + index = x - pitch; + if (index >= array_origin) { + segpt = &cutpts[index - array_origin]; + dist = x - segpt->xpos; + if (!segpt->terminal && segpt->fake_count < INT16_MAX) { + balance_count = 0; + if (textord_balance_factor > 0) { + lead_flag = back_balance ^ segpt->fwd_balance; + balance_count = 0; + while (lead_flag != 0) { + balance_count++; + lead_flag &= lead_flag - 1; + } + balance_count = static_cast<int16_t>(balance_count * textord_balance_factor + / projection_scale); + } + r_index = segpt->region_index + 1; + total = segpt->mean_sum + dist; + balance_count += offset; + sq_dist = + dist * dist + segpt->sq_sum + balance_count * balance_count; + mean = total / r_index; + factor = mean - pitch; + factor *= factor; + factor += sq_dist / (r_index) - mean * mean; + cost = factor; //find least cost + pred = segpt; //save path + mean_sum = total; + sq_sum = sq_dist; + fake_count = segpt->fake_count + faked; + mid_cuts = segpt->mid_cuts + mid_cut; + region_index = r_index; + } + } +} + + +/********************************************************************** + * check_pitch_sync + * + * Construct the lattice of possible segmentation points and choose the + * optimal path. Return the optimal path only. + * The return value is a measure of goodness of the sync. + **********************************************************************/ + +double check_pitch_sync2( //find segmentation + BLOBNBOX_IT *blob_it, //blobs to do + int16_t blob_count, //no of blobs + int16_t pitch, //pitch estimate + int16_t pitch_error, //tolerance + STATS *projection, //vertical + int16_t projection_left, //edges //scale factor + int16_t projection_right, + float projection_scale, + int16_t &occupation_count, //no of occupied cells + FPSEGPT_LIST *seg_list, //output list + int16_t start, //start of good range + int16_t end //end of good range + ) { + bool faking; //illegal cut pt + bool mid_cut; //cheap cut pt. + int16_t x; //current coord + int16_t blob_index; //blob number + int16_t left_edge; //of word + int16_t right_edge; //of word + int16_t array_origin; //x coord of array + int16_t offset; //dist to legal area + int16_t zero_count; //projection zero + int16_t best_left_x = 0; //for equals + int16_t best_right_x = 0; //right edge + TBOX this_box; //bounding box + TBOX next_box; //box of next blob + FPSEGPT *segpt; //segment point + double best_cost; //best path + double mean_sum; //computes result + FPCUTPT *best_end; //end of best path + int16_t best_fake; //best fake level + int16_t best_count; //no of cuts + BLOBNBOX_IT this_it; //copy iterator + FPSEGPT_IT seg_it = seg_list; //output iterator + + // tprintf("Computing sync on word of %d blobs with pitch %d\n", + // blob_count, pitch); + // if (blob_count==8 && pitch==27) + // projection->print(stdout,true); + zero_count = 0; + if (pitch < 3) + pitch = 3; //nothing ludicrous + if ((pitch - 3) / 2 < pitch_error) + pitch_error = (pitch - 3) / 2; + this_it = *blob_it; + this_box = box_next (&this_it);//get box + // left_edge=this_box.left(); //left of word + // right_edge=this_box.right(); + // for (blob_index=1;blob_index<blob_count;blob_index++) + // { + // this_box=box_next(&this_it); + // if (this_box.right()>right_edge) + // right_edge=this_box.right(); + // } + for (left_edge = projection_left; projection->pile_count (left_edge) == 0 + && left_edge < projection_right; left_edge++); + for (right_edge = projection_right; projection->pile_count (right_edge) == 0 + && right_edge > left_edge; right_edge--); + ASSERT_HOST (right_edge >= left_edge); + if (pitsync_linear_version >= 4) + return check_pitch_sync3 (projection_left, projection_right, zero_count, + pitch, pitch_error, projection, + projection_scale, occupation_count, seg_list, + start, end); + array_origin = left_edge - pitch; + // array of points + std::vector<FPCUTPT> cutpts(right_edge - left_edge + pitch * 2 + 1); + for (x = array_origin; x < left_edge; x++) + //free cuts + cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection, + zero_count, pitch, x, 0); + for (offset = 0; offset <= pitch_error; offset++, x++) + //not quite free + cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection, + zero_count, pitch, x, offset); + + this_it = *blob_it; + best_cost = FLT_MAX; + best_end = nullptr; + this_box = box_next (&this_it);//first box + next_box = box_next (&this_it);//second box + blob_index = 1; + while (x < right_edge - pitch_error) { + if (x > this_box.right () + pitch_error && blob_index < blob_count) { + this_box = next_box; + next_box = box_next (&this_it); + blob_index++; + } + faking = false; + mid_cut = false; + if (x <= this_box.left ()) + offset = 0; + else if (x <= this_box.left () + pitch_error) + offset = x - this_box.left (); + else if (x >= this_box.right ()) + offset = 0; + else if (x >= next_box.left () && blob_index < blob_count) { + offset = x - next_box.left (); + if (this_box.right () - x < offset) + offset = this_box.right () - x; + } + else if (x >= this_box.right () - pitch_error) + offset = this_box.right () - x; + else if (x - this_box.left () > pitch * pitsync_joined_edge + && this_box.right () - x > pitch * pitsync_joined_edge) { + mid_cut = true; + offset = 0; + } + else { + faking = true; + offset = projection->pile_count (x); + } + cutpts[x - array_origin].assign (&cutpts[0], array_origin, x, + faking, mid_cut, offset, projection, + projection_scale, zero_count, pitch, + pitch_error); + x++; + } + + best_fake = INT16_MAX; + best_cost = INT32_MAX; + best_count = INT16_MAX; + while (x < right_edge + pitch) { + offset = x < right_edge ? right_edge - x : 0; + cutpts[x - array_origin].assign (&cutpts[0], array_origin, x, + false, false, offset, projection, + projection_scale, zero_count, pitch, + pitch_error); + cutpts[x - array_origin].terminal = true; + if (cutpts[x - array_origin].index () + + cutpts[x - array_origin].fake_count <= best_count + best_fake) { + if (cutpts[x - array_origin].fake_count < best_fake + || (cutpts[x - array_origin].fake_count == best_fake + && cutpts[x - array_origin].cost_function () < best_cost)) { + best_fake = cutpts[x - array_origin].fake_count; + best_cost = cutpts[x - array_origin].cost_function (); + best_left_x = x; + best_right_x = x; + best_count = cutpts[x - array_origin].index (); + } + else if (cutpts[x - array_origin].fake_count == best_fake + && x == best_right_x + 1 + && cutpts[x - array_origin].cost_function () == best_cost) { + //exactly equal + best_right_x = x; + } + } + x++; + } + ASSERT_HOST (best_fake < INT16_MAX); + + best_end = &cutpts[(best_left_x + best_right_x) / 2 - array_origin]; + if (this_box.right () == textord_test_x + && this_box.top () == textord_test_y) { + for (x = left_edge - pitch; x < right_edge + pitch; x++) { + tprintf ("x=%d, C=%g, s=%g, sq=%g, prev=%d\n", + x, cutpts[x - array_origin].cost_function (), + cutpts[x - array_origin].sum (), + cutpts[x - array_origin].squares (), + cutpts[x - array_origin].previous ()->position ()); + } + } + occupation_count = -1; + do { + for (x = best_end->position () - pitch + pitch_error; + x < best_end->position () - pitch_error + && projection->pile_count (x) == 0; x++); + if (x < best_end->position () - pitch_error) + occupation_count++; + //copy it + segpt = new FPSEGPT (best_end); + seg_it.add_before_then_move (segpt); + best_end = best_end->previous (); + } + while (best_end != nullptr); + seg_it.move_to_last (); + mean_sum = seg_it.data ()->sum (); + mean_sum = mean_sum * mean_sum / best_count; + if (seg_it.data ()->squares () - mean_sum < 0) + tprintf ("Impossible sqsum=%g, mean=%g, total=%d\n", + seg_it.data ()->squares (), seg_it.data ()->sum (), best_count); + // tprintf("blob_count=%d, pitch=%d, sync=%g, occ=%d\n", + // blob_count,pitch,seg_it.data()->squares()-mean_sum, + // occupation_count); + return seg_it.data ()->squares () - mean_sum; +} + + +/********************************************************************** + * check_pitch_sync + * + * Construct the lattice of possible segmentation points and choose the + * optimal path. Return the optimal path only. + * The return value is a measure of goodness of the sync. + **********************************************************************/ + +double check_pitch_sync3( //find segmentation + int16_t projection_left, //edges //to be considered 0 + int16_t projection_right, + int16_t zero_count, + int16_t pitch, //pitch estimate + int16_t pitch_error, //tolerance + STATS *projection, //vertical + float projection_scale, //scale factor + int16_t &occupation_count, //no of occupied cells + FPSEGPT_LIST *seg_list, //output list + int16_t start, //start of good range + int16_t end //end of good range + ) { + bool faking; //illegal cut pt + bool mid_cut; //cheap cut pt. + int16_t left_edge; //of word + int16_t right_edge; //of word + int16_t x; //current coord + int16_t array_origin; //x coord of array + int16_t offset; //dist to legal area + int16_t projection_offset; //from scaled projection + int16_t prev_zero; //previous zero dist + int16_t next_zero; //next zero dist + int16_t zero_offset; //scan window + int16_t best_left_x = 0; //for equals + int16_t best_right_x = 0; //right edge + FPSEGPT *segpt; //segment point + int minindex; //next input position + int test_index; //index to mins + double best_cost; //best path + double mean_sum; //computes result + FPCUTPT *best_end; //end of best path + int16_t best_fake; //best fake level + int16_t best_count; //no of cuts + FPSEGPT_IT seg_it = seg_list; //output iterator + + end = (end - start) % pitch; + if (pitch < 3) + pitch = 3; //nothing ludicrous + if ((pitch - 3) / 2 < pitch_error) + pitch_error = (pitch - 3) / 2; + //min dist of zero + zero_offset = static_cast<int16_t>(pitch * pitsync_joined_edge); + for (left_edge = projection_left; projection->pile_count (left_edge) == 0 + && left_edge < projection_right; left_edge++); + for (right_edge = projection_right; projection->pile_count (right_edge) == 0 + && right_edge > left_edge; right_edge--); + array_origin = left_edge - pitch; + // array of points + std::vector<FPCUTPT> cutpts(right_edge - left_edge + pitch * 2 + 1); + // local min results + std::vector<bool> mins(pitch_error * 2 + 1); + for (x = array_origin; x < left_edge; x++) + //free cuts + cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection, + zero_count, pitch, x, 0); + prev_zero = left_edge - 1; + for (offset = 0; offset <= pitch_error; offset++, x++) + //not quite free + cutpts[x - array_origin].setup(&cutpts[0], array_origin, projection, + zero_count, pitch, x, offset); + + best_cost = FLT_MAX; + best_end = nullptr; + for (offset = -pitch_error, minindex = 0; offset < pitch_error; + offset++, minindex++) + mins[minindex] = projection->local_min (x + offset); + next_zero = x + zero_offset + 1; + for (offset = next_zero - 1; offset >= x; offset--) { + if (projection->pile_count (offset) <= zero_count) { + next_zero = offset; + break; + } + } + while (x < right_edge - pitch_error) { + mins[minindex] = projection->local_min (x + pitch_error); + minindex++; + if (minindex > pitch_error * 2) + minindex = 0; + faking = false; + mid_cut = false; + offset = 0; + if (projection->pile_count (x) <= zero_count) { + prev_zero = x; + } + else { + for (offset = 1; offset <= pitch_error; offset++) + if (projection->pile_count (x + offset) <= zero_count + || projection->pile_count (x - offset) <= zero_count) + break; + } + if (offset > pitch_error) { + if (x - prev_zero > zero_offset && next_zero - x > zero_offset) { + for (offset = 0; offset <= pitch_error; offset++) { + test_index = minindex + pitch_error + offset; + if (test_index > pitch_error * 2) + test_index -= pitch_error * 2 + 1; + if (mins[test_index]) + break; + test_index = minindex + pitch_error - offset; + if (test_index > pitch_error * 2) + test_index -= pitch_error * 2 + 1; + if (mins[test_index]) + break; + } + } + if (offset > pitch_error) { + offset = projection->pile_count (x); + faking = true; + } + else { + projection_offset = + static_cast<int16_t>(projection->pile_count (x) / projection_scale); + if (projection_offset > offset) + offset = projection_offset; + mid_cut = true; + } + } + if ((start == 0 && end == 0) + || !textord_fast_pitch_test + || (x - projection_left - start) % pitch <= end) + cutpts[x - array_origin].assign(&cutpts[0], array_origin, x, + faking, mid_cut, offset, projection, + projection_scale, zero_count, pitch, + pitch_error); + else + cutpts[x - array_origin].assign_cheap(&cutpts[0], array_origin, x, + faking, mid_cut, offset, + projection, projection_scale, + zero_count, pitch, + pitch_error); + x++; + if (next_zero < x || next_zero == x + zero_offset) + next_zero = x + zero_offset + 1; + if (projection->pile_count (x + zero_offset) <= zero_count) + next_zero = x + zero_offset; + } + + best_fake = INT16_MAX; + best_cost = INT32_MAX; + best_count = INT16_MAX; + while (x < right_edge + pitch) { + offset = x < right_edge ? right_edge - x : 0; + cutpts[x - array_origin].assign(&cutpts[0], array_origin, x, + false, false, offset, projection, + projection_scale, zero_count, pitch, + pitch_error); + cutpts[x - array_origin].terminal = true; + if (cutpts[x - array_origin].index () + + cutpts[x - array_origin].fake_count <= best_count + best_fake) { + if (cutpts[x - array_origin].fake_count < best_fake + || (cutpts[x - array_origin].fake_count == best_fake + && cutpts[x - array_origin].cost_function () < best_cost)) { + best_fake = cutpts[x - array_origin].fake_count; + best_cost = cutpts[x - array_origin].cost_function (); + best_left_x = x; + best_right_x = x; + best_count = cutpts[x - array_origin].index (); + } + else if (cutpts[x - array_origin].fake_count == best_fake + && x == best_right_x + 1 + && cutpts[x - array_origin].cost_function () == best_cost) { + //exactly equal + best_right_x = x; + } + } + x++; + } + ASSERT_HOST (best_fake < INT16_MAX); + + best_end = &cutpts[(best_left_x + best_right_x) / 2 - array_origin]; + // for (x=left_edge-pitch;x<right_edge+pitch;x++) + // { + // tprintf("x=%d, C=%g, s=%g, sq=%g, prev=%d\n", + // x,cutpts[x-array_origin].cost_function(), + // cutpts[x-array_origin].sum(), + // cutpts[x-array_origin].squares(), + // cutpts[x-array_origin].previous()->position()); + // } + occupation_count = -1; + do { + for (x = best_end->position () - pitch + pitch_error; + x < best_end->position () - pitch_error + && projection->pile_count (x) == 0; x++); + if (x < best_end->position () - pitch_error) + occupation_count++; + //copy it + segpt = new FPSEGPT (best_end); + seg_it.add_before_then_move (segpt); + best_end = best_end->previous (); + } + while (best_end != nullptr); + seg_it.move_to_last (); + mean_sum = seg_it.data ()->sum (); + mean_sum = mean_sum * mean_sum / best_count; + if (seg_it.data ()->squares () - mean_sum < 0) + tprintf ("Impossible sqsum=%g, mean=%g, total=%d\n", + seg_it.data ()->squares (), seg_it.data ()->sum (), best_count); + return seg_it.data ()->squares () - mean_sum; +} + +} // namespace tesseract diff --git a/tesseract/src/textord/pithsync.h b/tesseract/src/textord/pithsync.h new file mode 100644 index 00000000..f6309f19 --- /dev/null +++ b/tesseract/src/textord/pithsync.h @@ -0,0 +1,136 @@ +/********************************************************************** + * File: pithsync.h (Formerly pitsync2.h) + * Description: Code to find the optimum fixed pitch segmentation of some blobs. + * Author: Ray Smith + * + * (C) Copyright 1992, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef PITHSYNC_H +#define PITHSYNC_H + +#include "blobbox.h" +#include "params.h" +#include "statistc.h" + +namespace tesseract { + +class FPSEGPT_LIST; + +class FPCUTPT +{ + public: + FPCUTPT() = default; + void setup ( //start of cut + FPCUTPT cutpts[], //predecessors + int16_t array_origin, //start coord + STATS * projection, //occupation + int16_t zero_count, //official zero + int16_t pitch, //proposed pitch + int16_t x, //position + int16_t offset); //dist to gap + + void assign( //evaluate cut + FPCUTPT cutpts[], //predecessors + int16_t array_origin, //start coord + int16_t x, //position + bool faking, //faking this one + bool mid_cut, //doing free cut + int16_t offset, //extra cost dist + STATS* projection, //occupation + float projection_scale, //scaling + int16_t zero_count, //official zero + int16_t pitch, //proposed pitch + int16_t pitch_error); //allowed tolerance + + void assign_cheap ( //evaluate cut + FPCUTPT cutpts[], //predecessors + int16_t array_origin, //start coord + int16_t x, //position + bool faking, //faking this one + bool mid_cut, //doing free cut + int16_t offset, //extra cost dist + STATS * projection, //occupation + float projection_scale, //scaling + int16_t zero_count, //official zero + int16_t pitch, //proposed pitch + int16_t pitch_error); //allowed tolerance + + int32_t position() { // access func + return xpos; + } + double cost_function() { + return cost; + } + double squares() { + return sq_sum; + } + double sum() { + return mean_sum; + } + FPCUTPT *previous() { + return pred; + } + int16_t cheap_cuts() const { //no of mi cuts + return mid_cuts; + } + int16_t index() const { + return region_index; + } + + bool faked; //faked split point + bool terminal; //successful end + int16_t fake_count; //total fakes to here + + private: + int16_t region_index; //cut serial number + int16_t mid_cuts; //no of cheap cuts + int32_t xpos; //location + uint32_t back_balance; //proj backwards + uint32_t fwd_balance; //proj forwards + FPCUTPT *pred; //optimal previous + double mean_sum; //mean so far + double sq_sum; //summed distsances + double cost; //cost function +}; +double check_pitch_sync2( //find segmentation + BLOBNBOX_IT *blob_it, //blobs to do + int16_t blob_count, //no of blobs + int16_t pitch, //pitch estimate + int16_t pitch_error, //tolerance + STATS *projection, //vertical + int16_t projection_left, //edges //scale factor + int16_t projection_right, + float projection_scale, + int16_t &occupation_count, //no of occupied cells + FPSEGPT_LIST *seg_list, //output list + int16_t start, //start of good range + int16_t end //end of good range + ); +double check_pitch_sync3( //find segmentation + int16_t projection_left, //edges //to be considered 0 + int16_t projection_right, + int16_t zero_count, + int16_t pitch, //pitch estimate + int16_t pitch_error, //tolerance + STATS *projection, //vertical + float projection_scale, //scale factor + int16_t &occupation_count, //no of occupied cells + FPSEGPT_LIST *seg_list, //output list + int16_t start, //start of good range + int16_t end //end of good range + ); + +} // namespace tesseract + +#endif diff --git a/tesseract/src/textord/pitsync1.cpp b/tesseract/src/textord/pitsync1.cpp new file mode 100644 index 00000000..ca46dc84 --- /dev/null +++ b/tesseract/src/textord/pitsync1.cpp @@ -0,0 +1,422 @@ +/********************************************************************** + * File: pitsync1.cpp (Formerly pitsync.c) + * Description: Code to find the optimum fixed pitch segmentation of some blobs. + * Author: Ray Smith + * + * (C) Copyright 1992, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "pitsync1.h" + +#include <cfloat> // for FLT_MAX +#include <cmath> + +namespace tesseract { + +ELISTIZE (FPSEGPT) CLISTIZE (FPSEGPT_LIST) + +INT_VAR(pitsync_linear_version, 6, "Use new fast algorithm"); +double_VAR(pitsync_joined_edge, 0.75, "Dist inside big blob for chopping"); +double_VAR(pitsync_offset_freecut_fraction, 0.25, + "Fraction of cut for free cuts"); +INT_VAR(pitsync_fake_depth, 1, "Max advance fake generation"); + +/********************************************************************** + * FPSEGPT::FPSEGPT + * + * Constructor to make a new FPSEGPT. + * The existing FPCUTPT is duplicated. + **********************************************************************/ + +FPSEGPT::FPSEGPT( //constructor + FPCUTPT *cutpt //create from new form + ) { + pred = nullptr; + mean_sum = cutpt->sum (); + sq_sum = cutpt->squares (); + cost = cutpt->cost_function (); + faked = cutpt->faked; + terminal = cutpt->terminal; + fake_count = cutpt->fake_count; + xpos = cutpt->position (); + mid_cuts = cutpt->cheap_cuts (); +} + + +/********************************************************************** + * FPSEGPT::FPSEGPT + * + * Constructor to make a new FPSEGPT. + **********************************************************************/ + +FPSEGPT::FPSEGPT ( //constructor +int16_t x //position +):xpos (x) { + pred = nullptr; + mean_sum = 0; + sq_sum = 0; + cost = 0; + faked = false; + terminal = false; + fake_count = 0; + mid_cuts = 0; +} + + +/********************************************************************** + * FPSEGPT::FPSEGPT + * + * Constructor to make a new FPSEGPT. + **********************************************************************/ + +FPSEGPT::FPSEGPT ( //constructor +int16_t x, //position +bool faking, //faking this one +int16_t offset, //dist to gap +int16_t region_index, //segment number +int16_t pitch, //proposed pitch +int16_t pitch_error, //allowed tolerance +FPSEGPT_LIST * prev_list //previous segment +) +: fake_count(0), + xpos(x), + mean_sum(0.0), + sq_sum(0.0) +{ + int16_t best_fake; //on previous + FPSEGPT *segpt; //segment point + int32_t dist; //from prev segment + double sq_dist; //squared distance + double mean; //mean pitch + double total; //total dists + double factor; //cost function + FPSEGPT_IT pred_it = prev_list;//for previuos segment + + cost = FLT_MAX; + pred = nullptr; + faked = faking; + terminal = false; + best_fake = INT16_MAX; + mid_cuts = 0; + for (pred_it.mark_cycle_pt (); !pred_it.cycled_list (); pred_it.forward ()) { + segpt = pred_it.data (); + if (segpt->fake_count < best_fake) + best_fake = segpt->fake_count; + dist = x - segpt->xpos; + if (dist >= pitch - pitch_error && dist <= pitch + pitch_error + && !segpt->terminal) { + total = segpt->mean_sum + dist; + sq_dist = dist * dist + segpt->sq_sum + offset * offset; + //sum of squarees + mean = total / region_index; + factor = mean - pitch; + factor *= factor; + factor += sq_dist / (region_index) - mean * mean; + if (factor < cost) { + cost = factor; //find least cost + pred = segpt; //save path + mean_sum = total; + sq_sum = sq_dist; + fake_count = segpt->fake_count + faked; + } + } + } + if (fake_count > best_fake + 1) + pred = nullptr; //fail it +} + +/********************************************************************** + * check_pitch_sync + * + * Construct the lattice of possible segmentation points and choose the + * optimal path. Return the optimal path only. + * The return value is a measure of goodness of the sync. + **********************************************************************/ + +double check_pitch_sync( //find segmentation + BLOBNBOX_IT *blob_it, //blobs to do + int16_t blob_count, //no of blobs + int16_t pitch, //pitch estimate + int16_t pitch_error, //tolerance + STATS *projection, //vertical + FPSEGPT_LIST *seg_list //output list + ) { + int16_t x; //current coord + int16_t min_index; //blob number + int16_t max_index; //blob number + int16_t left_edge; //of word + int16_t right_edge; //of word + int16_t right_max; //max allowed x + int16_t min_x; //in this region + int16_t max_x; + int16_t region_index; + int16_t best_region_index = 0; //for best result + int16_t offset; //dist to legal area + int16_t left_best_x; //edge of good region + int16_t right_best_x; //right edge + TBOX min_box; //bounding box + TBOX max_box; //bounding box + TBOX next_box; //box of next blob + FPSEGPT *segpt; //segment point + FPSEGPT_LIST *segpts; //points in a segment + double best_cost; //best path + double mean_sum; //computes result + FPSEGPT *best_end; //end of best path + BLOBNBOX_IT min_it; //copy iterator + BLOBNBOX_IT max_it; //copy iterator + FPSEGPT_IT segpt_it; //iterator + //output segments + FPSEGPT_IT outseg_it = seg_list; + FPSEGPT_LIST_CLIST lattice; //list of lists + //region iterator + FPSEGPT_LIST_C_IT lattice_it = &lattice; + + // tprintf("Computing sync on word of %d blobs with pitch %d\n", + // blob_count, pitch); + // if (blob_count==8 && pitch==27) + // projection->print(stdout,true); + if (pitch < 3) + pitch = 3; //nothing ludicrous + if ((pitch - 3) / 2 < pitch_error) + pitch_error = (pitch - 3) / 2; + min_it = *blob_it; + min_box = box_next (&min_it); //get box + // if (blob_count==8 && pitch==27) + // tprintf("1st box at (%d,%d)->(%d,%d)\n", + // min_box.left(),min_box.bottom(), + // min_box.right(),min_box.top()); + //left of word + left_edge = min_box.left () + pitch_error; + for (min_index = 1; min_index < blob_count; min_index++) { + min_box = box_next (&min_it); + // if (blob_count==8 && pitch==27) + // tprintf("Box at (%d,%d)->(%d,%d)\n", + // min_box.left(),min_box.bottom(), + // min_box.right(),min_box.top()); + } + right_edge = min_box.right (); //end of word + max_x = left_edge; + //min permissible + min_x = max_x - pitch + pitch_error * 2 + 1; + right_max = right_edge + pitch - pitch_error - 1; + segpts = new FPSEGPT_LIST; //list of points + segpt_it.set_to_list (segpts); + for (x = min_x; x <= max_x; x++) { + segpt = new FPSEGPT (x); //make a new one + //put in list + segpt_it.add_after_then_move (segpt); + } + //first segment + lattice_it.add_before_then_move (segpts); + min_index = 0; + region_index = 1; + best_cost = FLT_MAX; + best_end = nullptr; + min_it = *blob_it; + min_box = box_next (&min_it); //first box + do { + left_best_x = -1; + right_best_x = -1; + segpts = new FPSEGPT_LIST; //list of points + segpt_it.set_to_list (segpts); + min_x += pitch - pitch_error;//next limits + max_x += pitch + pitch_error; + while (min_box.right () < min_x && min_index < blob_count) { + min_index++; + min_box = box_next (&min_it); + } + max_it = min_it; + max_index = min_index; + max_box = min_box; + next_box = box_next (&max_it); + for (x = min_x; x <= max_x && x <= right_max; x++) { + while (x < right_edge && max_index < blob_count + && x > max_box.right ()) { + max_index++; + max_box = next_box; + next_box = box_next (&max_it); + } + if (x <= max_box.left () + pitch_error + || x >= max_box.right () - pitch_error || x >= right_edge + || (max_index < blob_count - 1 && x >= next_box.left ()) + || (x - max_box.left () > pitch * pitsync_joined_edge + && max_box.right () - x > pitch * pitsync_joined_edge)) { + // || projection->local_min(x)) + if (x - max_box.left () > 0 + && x - max_box.left () <= pitch_error) + //dist to real break + offset = x - max_box.left (); + else if (max_box.right () - x > 0 + && max_box.right () - x <= pitch_error + && (max_index >= blob_count - 1 + || x < next_box.left ())) + offset = max_box.right () - x; + else + offset = 0; + // offset=pitsync_offset_freecut_fraction*projection->pile_count(x); + segpt = new FPSEGPT (x, false, offset, region_index, + pitch, pitch_error, lattice_it.data ()); + } + else { + offset = projection->pile_count (x); + segpt = new FPSEGPT (x, true, offset, region_index, + pitch, pitch_error, lattice_it.data ()); + } + if (segpt->previous () != nullptr) { + segpt_it.add_after_then_move (segpt); + if (x >= right_edge - pitch_error) { + segpt->terminal = true;//no more wanted + if (segpt->cost_function () < best_cost) { + best_cost = segpt->cost_function (); + //find least + best_end = segpt; + best_region_index = region_index; + left_best_x = x; + right_best_x = x; + } + else if (segpt->cost_function () == best_cost + && right_best_x == x - 1) + right_best_x = x; + } + } + else { + delete segpt; //no good + } + } + if (segpts->empty ()) { + if (best_end != nullptr) + break; //already found one + make_illegal_segment (lattice_it.data (), min_box, min_it, + region_index, pitch, pitch_error, segpts); + } + else { + if (right_best_x > left_best_x + 1) { + left_best_x = (left_best_x + right_best_x + 1) / 2; + for (segpt_it.mark_cycle_pt (); !segpt_it.cycled_list () + && segpt_it.data ()->position () != left_best_x; + segpt_it.forward ()); + if (segpt_it.data ()->position () == left_best_x) + //middle of region + best_end = segpt_it.data (); + } + } + //new segment + lattice_it.add_before_then_move (segpts); + region_index++; + } + while (min_x < right_edge); + ASSERT_HOST (best_end != nullptr);//must always find some + + for (lattice_it.mark_cycle_pt (); !lattice_it.cycled_list (); + lattice_it.forward ()) { + segpts = lattice_it.data (); + segpt_it.set_to_list (segpts); + // if (blob_count==8 && pitch==27) + // { + // for (segpt_it.mark_cycle_pt();!segpt_it.cycled_list();segpt_it.forward()) + // { + // segpt=segpt_it.data(); + // tprintf("At %d, (%x) cost=%g, m=%g, sq=%g, pred=%x\n", + // segpt->position(),segpt,segpt->cost_function(), + // segpt->sum(),segpt->squares(),segpt->previous()); + // } + // tprintf("\n"); + // } + for (segpt_it.mark_cycle_pt (); !segpt_it.cycled_list () + && segpt_it.data () != best_end; segpt_it.forward ()); + if (segpt_it.data () == best_end) { + //save good one + segpt = segpt_it.extract (); + outseg_it.add_before_then_move (segpt); + best_end = segpt->previous (); + } + } + ASSERT_HOST (best_end == nullptr); + ASSERT_HOST (!outseg_it.empty ()); + outseg_it.move_to_last (); + mean_sum = outseg_it.data ()->sum (); + mean_sum = mean_sum * mean_sum / best_region_index; + if (outseg_it.data ()->squares () - mean_sum < 0) + tprintf ("Impossible sqsum=%g, mean=%g, total=%d\n", + outseg_it.data ()->squares (), outseg_it.data ()->sum (), + best_region_index); + lattice.deep_clear (); //shift the lot + return outseg_it.data ()->squares () - mean_sum; +} + + +/********************************************************************** + * make_illegal_segment + * + * Make a fake set of chop points due to having no legal places. + **********************************************************************/ + +void make_illegal_segment( //find segmentation + FPSEGPT_LIST *prev_list, //previous segments + TBOX blob_box, //bounding box + BLOBNBOX_IT blob_it, //iterator + int16_t region_index, //number of segment + int16_t pitch, //pitch estimate + int16_t pitch_error, //tolerance + FPSEGPT_LIST *seg_list //output list + ) { + int16_t x; //current coord + int16_t min_x = 0; //in this region + int16_t max_x = 0; + int16_t offset; //dist to edge + FPSEGPT *segpt; //segment point + FPSEGPT *prevpt; //previous point + float best_cost; //best path + FPSEGPT_IT segpt_it = seg_list;//iterator + //previous points + FPSEGPT_IT prevpt_it = prev_list; + + best_cost = FLT_MAX; + for (prevpt_it.mark_cycle_pt (); !prevpt_it.cycled_list (); + prevpt_it.forward ()) { + prevpt = prevpt_it.data (); + if (prevpt->cost_function () < best_cost) { + //find least + best_cost = prevpt->cost_function (); + min_x = prevpt->position (); + max_x = min_x; //limits on coords + } + else if (prevpt->cost_function () == best_cost) { + max_x = prevpt->position (); + } + } + min_x += pitch - pitch_error; + max_x += pitch + pitch_error; + for (x = min_x; x <= max_x; x++) { + while (x > blob_box.right ()) { + blob_box = box_next (&blob_it); + } + offset = x - blob_box.left (); + if (blob_box.right () - x < offset) + offset = blob_box.right () - x; + segpt = new FPSEGPT (x, false, offset, + region_index, pitch, pitch_error, prev_list); + if (segpt->previous () != nullptr) { + ASSERT_HOST (offset >= 0); + fprintf (stderr, "made fake at %d\n", x); + //make one up + segpt_it.add_after_then_move (segpt); + segpt->faked = true; + segpt->fake_count++; + } + else + delete segpt; + } +} + +} // namespace tesseract diff --git a/tesseract/src/textord/pitsync1.h b/tesseract/src/textord/pitsync1.h new file mode 100644 index 00000000..310a6d8a --- /dev/null +++ b/tesseract/src/textord/pitsync1.h @@ -0,0 +1,125 @@ +/********************************************************************** + * File: pitsync1.h (Formerly pitsync.h) + * Description: Code to find the optimum fixed pitch segmentation of some blobs. + * Author: Ray Smith + * Created: Thu Nov 19 11:48:05 GMT 1992 + * + * (C) Copyright 1992, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef PITSYNC1_H +#define PITSYNC1_H + +#include "elst.h" +#include "clst.h" +#include "blobbox.h" +#include "params.h" +#include "statistc.h" +#include "pithsync.h" + +namespace tesseract { + +class FPSEGPT_LIST; + +class FPSEGPT : public ELIST_LINK +{ + public: + FPSEGPT() = default; + FPSEGPT( //constructor + int16_t x); //position + FPSEGPT( //constructor + int16_t x, //position + bool faking, //faking this one + int16_t offset, //extra cost dist + int16_t region_index, //segment number + int16_t pitch, //proposed pitch + int16_t pitch_error, //allowed tolerance + FPSEGPT_LIST *prev_list); //previous segment + FPSEGPT(FPCUTPT *cutpt); //build from new type + + int32_t position() { // access func + return xpos; + } + double cost_function() { + return cost; + } + double squares() { + return sq_sum; + } + double sum() { + return mean_sum; + } + FPSEGPT *previous() { + return pred; + } + int16_t cheap_cuts() const { //no of cheap cuts + return mid_cuts; + } + + bool faked; //faked split point + bool terminal; //successful end + int16_t fake_count; //total fakes to here + + private: + int16_t mid_cuts; //no of cheap cuts + int32_t xpos; //location + FPSEGPT *pred; //optimal previous + double mean_sum; //mean so far + double sq_sum; //summed distsances + double cost; //cost function +}; + +ELISTIZEH (FPSEGPT) CLISTIZEH (FPSEGPT_LIST) +extern +INT_VAR_H (pitsync_linear_version, 0, "Use new fast algorithm"); +extern +double_VAR_H (pitsync_joined_edge, 0.75, +"Dist inside big blob for chopping"); +extern +double_VAR_H (pitsync_offset_freecut_fraction, 0.25, +"Fraction of cut for free cuts"); +extern +INT_VAR_H (pitsync_fake_depth, 1, "Max advance fake generation"); +double check_pitch_sync( //find segmentation + BLOBNBOX_IT *blob_it, //blobs to do + int16_t blob_count, //no of blobs + int16_t pitch, //pitch estimate + int16_t pitch_error, //tolerance + STATS *projection, //vertical + FPSEGPT_LIST *seg_list //output list + ); +void make_illegal_segment( //find segmentation + FPSEGPT_LIST *prev_list, //previous segments + TBOX blob_box, //bounding box + BLOBNBOX_IT blob_it, //iterator + int16_t region_index, //number of segment + int16_t pitch, //pitch estimate + int16_t pitch_error, //tolerance + FPSEGPT_LIST *seg_list //output list + ); +int16_t vertical_torow_projection( //project whole row + TO_ROW *row, //row to do + STATS *projection //output + ); +void vertical_cblob_projection( //project outlines + C_BLOB *blob, //blob to project + STATS *stats //output + ); +void vertical_coutline_projection( //project outlines + C_OUTLINE *outline, //outline to project + STATS *stats //output + ); + +} // namespace tesseract + +#endif diff --git a/tesseract/src/textord/scanedg.cpp b/tesseract/src/textord/scanedg.cpp new file mode 100644 index 00000000..fa0608cb --- /dev/null +++ b/tesseract/src/textord/scanedg.cpp @@ -0,0 +1,405 @@ +/********************************************************************** + * File: scanedg.cpp (Formerly scanedge.c) + * Description: Raster scanning crack based edge extractor. + * Author: Ray Smith + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "scanedg.h" + +#include "crakedge.h" +#include "edgloop.h" +#include "pdblock.h" + +#include "allheaders.h" + +#include <memory> // std::unique_ptr + +namespace tesseract { + +#define WHITE_PIX 1 /*thresholded colours */ +#define BLACK_PIX 0 +// Flips between WHITE_PIX and BLACK_PIX. +#define FLIP_COLOUR(pix) (1-(pix)) + +struct CrackPos { + CRACKEDGE** free_cracks; // Freelist for fast allocation. + int x; // Position of new edge. + int y; +}; + +static void free_crackedges(CRACKEDGE* start); + +static void join_edges(CRACKEDGE* edge1, CRACKEDGE* edge2, + CRACKEDGE** free_cracks, + C_OUTLINE_IT* outline_it); + +static void line_edges(int16_t x, int16_t y, int16_t xext, uint8_t uppercolour, + uint8_t* bwpos, + CRACKEDGE** prevline, CRACKEDGE** free_cracks, + C_OUTLINE_IT* outline_it); + +static void make_margins(PDBLK* block, BLOCK_LINE_IT* line_it, + uint8_t* pixels, uint8_t margin, + int16_t left, int16_t right, int16_t y); + +static CRACKEDGE* h_edge(int sign, CRACKEDGE* join, CrackPos* pos); +static CRACKEDGE* v_edge(int sign, CRACKEDGE* join, CrackPos* pos); + +/********************************************************************** + * block_edges + * + * Extract edges from a PDBLK. + **********************************************************************/ + +void block_edges(Pix *t_pix, // thresholded image + PDBLK *block, // block in image + C_OUTLINE_IT* outline_it) { + ICOORD bleft; // bounding box + ICOORD tright; + BLOCK_LINE_IT line_it = block; // line iterator + + int width = pixGetWidth(t_pix); + int height = pixGetHeight(t_pix); + int wpl = pixGetWpl(t_pix); + // lines in progress + std::unique_ptr<CRACKEDGE*[]> ptrline(new CRACKEDGE*[width + 1]); + CRACKEDGE *free_cracks = nullptr; + + block->bounding_box(bleft, tright); // block box + ASSERT_HOST(tright.x() <= width); + ASSERT_HOST(tright.y() <= height); + int block_width = tright.x() - bleft.x(); + for (int x = block_width; x >= 0; x--) + ptrline[x] = nullptr; // no lines in progress + + std::unique_ptr<uint8_t[]> bwline(new uint8_t[width]); + + const uint8_t margin = WHITE_PIX; + + for (int y = tright.y() - 1; y >= bleft.y() - 1; y--) { + if (y >= bleft.y() && y < tright.y()) { + // Get the binary pixels from the image. + l_uint32* line = pixGetData(t_pix) + wpl * (height - 1 - y); + for (int x = 0; x < block_width; ++x) { + bwline[x] = GET_DATA_BIT(line, x + bleft.x()) ^ 1; + } + make_margins(block, &line_it, bwline.get(), margin, bleft.x(), tright.x(), y); + } else { + memset(bwline.get(), margin, block_width * sizeof(bwline[0])); + } + line_edges(bleft.x(), y, block_width, + margin, bwline.get(), ptrline.get(), &free_cracks, outline_it); + } + + free_crackedges(free_cracks); // really free them +} + + +/********************************************************************** + * make_margins + * + * Get an image line and set to margin non-text pixels. + **********************************************************************/ + +static +void make_margins( //get a line + PDBLK *block, //block in image + BLOCK_LINE_IT *line_it, //for old style + uint8_t *pixels, //pixels to strip + uint8_t margin, //white-out pixel + int16_t left, //block edges + int16_t right, + int16_t y //line coord + ) { + ICOORDELT_IT seg_it; + int32_t start; //of segment + int16_t xext; //of segment + int xindex; //index to pixel + + if (block->poly_block () != nullptr) { + std::unique_ptr<PB_LINE_IT> lines(new PB_LINE_IT (block->poly_block ())); + const std::unique_ptr</*non-const*/ ICOORDELT_LIST> segments( + lines->get_line(y)); + if (!segments->empty ()) { + seg_it.set_to_list(segments.get()); + seg_it.mark_cycle_pt (); + start = seg_it.data ()->x (); + xext = seg_it.data ()->y (); + for (xindex = left; xindex < right; xindex++) { + if (xindex >= start && !seg_it.cycled_list ()) { + xindex = start + xext - 1; + seg_it.forward (); + start = seg_it.data ()->x (); + xext = seg_it.data ()->y (); + } + else + pixels[xindex - left] = margin; + } + } + else { + for (xindex = left; xindex < right; xindex++) + pixels[xindex - left] = margin; + } + } + else { + start = line_it->get_line (y, xext); + for (xindex = left; xindex < start; xindex++) + pixels[xindex - left] = margin; + for (xindex = start + xext; xindex < right; xindex++) + pixels[xindex - left] = margin; + } +} + +/********************************************************************** + * line_edges + * + * Scan a line for edges and update the edges in progress. + * When edges close into loops, send them for approximation. + **********************************************************************/ + +static +void line_edges(int16_t x, // coord of line start + int16_t y, // coord of line + int16_t xext, // width of line + uint8_t uppercolour, // start of prev line + uint8_t * bwpos, // thresholded line + CRACKEDGE ** prevline, // edges in progress + CRACKEDGE **free_cracks, + C_OUTLINE_IT* outline_it) { + CrackPos pos = {free_cracks, x, y }; + int xmax; // max x coord + int prevcolour; // of previous pixel + CRACKEDGE *current; // current h edge + CRACKEDGE *newcurrent; // new h edge + + xmax = x + xext; // max allowable coord + prevcolour = uppercolour; // forced plain margin + current = nullptr; // nothing yet + + // do each pixel + for (; pos.x < xmax; pos.x++, prevline++) { + const int colour = *bwpos++; // current pixel + if (*prevline != nullptr) { + // changed above + // change colour + uppercolour = FLIP_COLOUR(uppercolour); + if (colour == prevcolour) { + if (colour == uppercolour) { + // finish a line + join_edges(current, *prevline, free_cracks, outline_it); + current = nullptr; // no edge now + } else { + // new horiz edge + current = h_edge(uppercolour - colour, *prevline, &pos); + } + *prevline = nullptr; // no change this time + } else { + if (colour == uppercolour) + *prevline = v_edge(colour - prevcolour, *prevline, &pos); + // 8 vs 4 connection + else if (colour == WHITE_PIX) { + join_edges(current, *prevline, free_cracks, outline_it); + current = h_edge(uppercolour - colour, nullptr, &pos); + *prevline = v_edge(colour - prevcolour, current, &pos); + } else { + newcurrent = h_edge(uppercolour - colour, *prevline, &pos); + *prevline = v_edge(colour - prevcolour, current, &pos); + current = newcurrent; // right going h edge + } + prevcolour = colour; // remember new colour + } + } else { + if (colour != prevcolour) { + *prevline = current = v_edge(colour - prevcolour, current, &pos); + prevcolour = colour; + } + if (colour != uppercolour) + current = h_edge(uppercolour - colour, current, &pos); + else + current = nullptr; // no edge now + } + } + if (current != nullptr) { + // out of block + if (*prevline != nullptr) { // got one to join to? + join_edges(current, *prevline, free_cracks, outline_it); + *prevline = nullptr; // tidy now + } else { + // fake vertical + *prevline = v_edge(FLIP_COLOUR(prevcolour)-prevcolour, current, &pos); + } + } else if (*prevline != nullptr) { + //continue fake + *prevline = v_edge(FLIP_COLOUR(prevcolour)-prevcolour, *prevline, &pos); + } +} + + +/********************************************************************** + * h_edge + * + * Create a new horizontal CRACKEDGE and join it to the given edge. + **********************************************************************/ + +static +CRACKEDGE *h_edge(int sign, // sign of edge + CRACKEDGE* join, // edge to join to + CrackPos* pos) { + CRACKEDGE *newpt; // return value + + if (*pos->free_cracks != nullptr) { + newpt = *pos->free_cracks; + *pos->free_cracks = newpt->next; // get one fast + } else { + newpt = new CRACKEDGE; + } + newpt->pos.set_y(pos->y + 1); // coords of pt + newpt->stepy = 0; // edge is horizontal + + if (sign > 0) { + newpt->pos.set_x(pos->x + 1); // start location + newpt->stepx = -1; + newpt->stepdir = 0; + } else { + newpt->pos.set_x(pos->x); // start location + newpt->stepx = 1; + newpt->stepdir = 2; + } + + if (join == nullptr) { + newpt->next = newpt; // ptrs to other ends + newpt->prev = newpt; + } else { + if (newpt->pos.x() + newpt->stepx == join->pos.x() + && newpt->pos.y() == join->pos.y()) { + newpt->prev = join->prev; // update other ends + newpt->prev->next = newpt; + newpt->next = join; // join up + join->prev = newpt; + } else { + newpt->next = join->next; // update other ends + newpt->next->prev = newpt; + newpt->prev = join; // join up + join->next = newpt; + } + } + return newpt; +} + + +/********************************************************************** + * v_edge + * + * Create a new vertical CRACKEDGE and join it to the given edge. + **********************************************************************/ + +static +CRACKEDGE *v_edge(int sign, // sign of edge + CRACKEDGE* join, + CrackPos* pos) { + CRACKEDGE *newpt; // return value + + if (*pos->free_cracks != nullptr) { + newpt = *pos->free_cracks; + *pos->free_cracks = newpt->next; // get one fast + } else { + newpt = new CRACKEDGE; + } + newpt->pos.set_x(pos->x); // coords of pt + newpt->stepx = 0; // edge is vertical + + if (sign > 0) { + newpt->pos.set_y(pos->y); // start location + newpt->stepy = 1; + newpt->stepdir = 3; + } else { + newpt->pos.set_y(pos->y + 1); // start location + newpt->stepy = -1; + newpt->stepdir = 1; + } + + if (join == nullptr) { + newpt->next = newpt; //ptrs to other ends + newpt->prev = newpt; + } else { + if (newpt->pos.x() == join->pos.x() + && newpt->pos.y() + newpt->stepy == join->pos.y()) { + newpt->prev = join->prev; // update other ends + newpt->prev->next = newpt; + newpt->next = join; // join up + join->prev = newpt; + } else { + newpt->next = join->next; // update other ends + newpt->next->prev = newpt; + newpt->prev = join; // join up + join->next = newpt; + } + } + return newpt; +} + + +/********************************************************************** + * join_edges + * + * Join 2 edges together. Send the outline for approximation when a + * closed loop is formed. + **********************************************************************/ + +static +void join_edges(CRACKEDGE *edge1, // edges to join + CRACKEDGE *edge2, // no specific order + CRACKEDGE **free_cracks, + C_OUTLINE_IT* outline_it) { + if (edge1->pos.x() + edge1->stepx != edge2->pos.x() + || edge1->pos.y() + edge1->stepy != edge2->pos.y()) { + CRACKEDGE *tempedge = edge1; + edge1 = edge2; // swap around + edge2 = tempedge; + } + + if (edge1->next == edge2) { + // already closed + complete_edge(edge1, outline_it); + // attach freelist to end + edge1->prev->next = *free_cracks; + *free_cracks = edge1; // and free list + } else { + // update opposite ends + edge2->prev->next = edge1->next; + edge1->next->prev = edge2->prev; + edge1->next = edge2; // make joins + edge2->prev = edge1; + } +} + + +/********************************************************************** + * free_crackedges + * + * Really free the CRACKEDGEs by giving them back to delete. + **********************************************************************/ + +static void free_crackedges(CRACKEDGE *start) { + CRACKEDGE *current; // current edge to free + CRACKEDGE *next; // next one to free + + for (current = start; current != nullptr; current = next) { + next = current->next; + delete current; // delete them all + } +} + +} // namespace tesseract diff --git a/tesseract/src/textord/scanedg.h b/tesseract/src/textord/scanedg.h new file mode 100644 index 00000000..96bf6478 --- /dev/null +++ b/tesseract/src/textord/scanedg.h @@ -0,0 +1,38 @@ +/********************************************************************** + * File: scanedg.h (Formerly scanedge.h) + * Description: Raster scanning crack based edge extractor. + * Author: Ray Smith + * + * (C) Copyright 1991, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef SCANEDG_H +#define SCANEDG_H + +#include "params.h" +#include "scrollview.h" + +struct Pix; + +namespace tesseract { + +class C_OUTLINE_IT; +class PDBLK; + +void block_edges(Pix* t_image, // thresholded image + PDBLK* block, // block in image + C_OUTLINE_IT* outline_it); + +} // namespace tesseract + +#endif diff --git a/tesseract/src/textord/sortflts.cpp b/tesseract/src/textord/sortflts.cpp new file mode 100644 index 00000000..01548e9f --- /dev/null +++ b/tesseract/src/textord/sortflts.cpp @@ -0,0 +1,81 @@ +/********************************************************************** + * File: sortflts.cpp (Formerly sfloats.c) + * Description: Code to maintain a sorted list of floats. + * Author: Ray Smith + * + * (C) Copyright 1993, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "sortflts.h" + +namespace tesseract { + +ELISTIZE (SORTED_FLOAT) +/** + * @name SORTED_FLOATS::add + * + * Add a new entry to the sorted list of floats. + */ +void SORTED_FLOATS::add( //add new entry + float value, + int32_t key) { + auto *new_float = new SORTED_FLOAT (value, key); + + if (list.empty ()) + it.add_after_stay_put (new_float); + else { + it.move_to_first (); + while (!it.at_last () && it.data ()->entry < value) + it.forward (); + if (it.data ()->entry < value) + it.add_after_stay_put (new_float); + else + it.add_before_stay_put (new_float); + } +} + + +/** + * @name SORTED_FLOATS::remove + * + * Remove an entry from the sorted list of floats. + */ + +void SORTED_FLOATS::remove( //remove the entry + int32_t key) { + if (!list.empty ()) { + for (it.mark_cycle_pt (); !it.cycled_list (); it.forward ()) { + if (it.data ()->address == key) { + delete it.extract (); + return; + } + } + } +} + + +/** + * @name SORTED_FLOATS::operator[] + * + * Return the floating point value of the given index into the list. + */ + +float +SORTED_FLOATS::operator[] ( //get an entry +int32_t index //to list +) { + it.move_to_first (); + return it.data_relative (index)->entry; +} + +} // namespace tesseract diff --git a/tesseract/src/textord/sortflts.h b/tesseract/src/textord/sortflts.h new file mode 100644 index 00000000..710a7a3d --- /dev/null +++ b/tesseract/src/textord/sortflts.h @@ -0,0 +1,76 @@ +/********************************************************************** + * File: sortflts.h (Formerly sfloats.h) + * Description: Code to maintain a sorted list of floats. + * Author: Ray Smith + * + * (C) Copyright 1993, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef SORTFLTS_H +#define SORTFLTS_H + +#include "elst.h" + +namespace tesseract { + +class SORTED_FLOAT : public ELIST_LINK +{ + friend class SORTED_FLOATS; + + public: + SORTED_FLOAT() = default; + SORTED_FLOAT( //create one + float value, //value of entry + int32_t key) { //reference + entry = value; + address = key; + } + private: + float entry; //value of float + int32_t address; //key +}; + +ELISTIZEH (SORTED_FLOAT) +class SORTED_FLOATS +{ + public: + /** empty constructor */ + SORTED_FLOATS() { + it.set_to_list (&list); + } + /** + * add sample + * @param value sample float + * @param key retrieval key + */ + void add(float value, + int32_t key); + /** + * delete sample + * @param key key to delete + */ + void remove(int32_t key); + /** + * index to list + * @param index item to get + */ + float operator[] (int32_t index); + + private: + SORTED_FLOAT_LIST list; //list of floats + SORTED_FLOAT_IT it; //iterator built-in +}; + +} // namespace tesseract + +#endif diff --git a/tesseract/src/textord/strokewidth.cpp b/tesseract/src/textord/strokewidth.cpp new file mode 100644 index 00000000..6543c6ac --- /dev/null +++ b/tesseract/src/textord/strokewidth.cpp @@ -0,0 +1,2030 @@ +/////////////////////////////////////////////////////////////////////// +// File: strokewidth.cpp +// Description: Subclass of BBGrid to find uniformity of strokewidth. +// Author: Ray Smith +// +// (C) Copyright 2008, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#include "strokewidth.h" + +#include <algorithm> +#include <cmath> + +#include "blobbox.h" +#include "colpartition.h" +#include "colpartitiongrid.h" +#include "imagefind.h" +#include "linlsq.h" +#include "statistc.h" +#include "tabfind.h" +#include "textlineprojection.h" +#include "tordmain.h" // For SetBlobStrokeWidth. + +namespace tesseract { + +#ifndef GRAPHICS_DISABLED +static INT_VAR(textord_tabfind_show_strokewidths, 0, "Show stroke widths (ScrollView)"); +#else +static INT_VAR(textord_tabfind_show_strokewidths, 0, "Show stroke widths"); +#endif +static BOOL_VAR(textord_tabfind_only_strokewidths, false, "Only run stroke widths"); + +/** Allowed proportional change in stroke width to be the same font. */ +const double kStrokeWidthFractionTolerance = 0.125; +/** + * Allowed constant change in stroke width to be the same font. + * Really 1.5 pixels. + */ +const double kStrokeWidthTolerance = 1.5; +// Same but for CJK we are a bit more generous. +const double kStrokeWidthFractionCJK = 0.25; +const double kStrokeWidthCJK = 2.0; +// Radius in grid cells of search for broken CJK. Doesn't need to be very +// large as the grid size should be about the size of a character anyway. +const int kCJKRadius = 2; +// Max distance fraction of size to join close but broken CJK characters. +const double kCJKBrokenDistanceFraction = 0.25; +// Max number of components in a broken CJK character. +const int kCJKMaxComponents = 8; +// Max aspect ratio of CJK broken characters when put back together. +const double kCJKAspectRatio = 1.25; +// Max increase in aspect ratio of CJK broken characters when merged. +const double kCJKAspectRatioIncrease = 1.0625; +// Max multiple of the grid size that will be used in computing median CJKsize. +const int kMaxCJKSizeRatio = 5; +// Min fraction of blobs broken CJK to iterate and run it again. +const double kBrokenCJKIterationFraction = 0.125; +// Multiple of gridsize as x-padding for a search box for diacritic base +// characters. +const double kDiacriticXPadRatio = 7.0; +// Multiple of gridsize as y-padding for a search box for diacritic base +// characters. +const double kDiacriticYPadRatio = 1.75; +// Min multiple of diacritic height that a neighbour must be to be a +// convincing base character. +const double kMinDiacriticSizeRatio = 1.0625; +// Max multiple of a textline's median height as a threshold for the sum of +// a diacritic's farthest x and y distances (gap + size). +const double kMaxDiacriticDistanceRatio = 1.25; +// Max x-gap between a diacritic and its base char as a fraction of the height +// of the base char (allowing other blobs to fill the gap.) +const double kMaxDiacriticGapToBaseCharHeight = 1.0; +// Ratio between longest side of a line and longest side of a character. +// (neighbor_min > blob_min * kLineTrapShortest && +// neighbor_max < blob_max / kLineTrapLongest) +// => neighbor is a grapheme and blob is a line. +const int kLineTrapLongest = 4; +// Ratio between shortest side of a line and shortest side of a character. +const int kLineTrapShortest = 2; +// Max aspect ratio of the total box before CountNeighbourGaps +// decides immediately based on the aspect ratio. +const int kMostlyOneDirRatio = 3; +// Aspect ratio for a blob to be considered as line residue. +const double kLineResidueAspectRatio = 8.0; +// Padding ratio for line residue search box. +const int kLineResiduePadRatio = 3; +// Min multiple of neighbour size for a line residue to be genuine. +const double kLineResidueSizeRatio = 1.75; +// Aspect ratio filter for OSD. +const float kSizeRatioToReject = 2.0; +// Expansion factor for search box for good neighbours. +const double kNeighbourSearchFactor = 2.5; +// Factor of increase of overlap when adding diacritics to make an image noisy. +const double kNoiseOverlapGrowthFactor = 4.0; +// Fraction of the image size to add overlap when adding diacritics for an +// image to qualify as noisy. +const double kNoiseOverlapAreaFactor = 1.0 / 512; + +StrokeWidth::StrokeWidth(int gridsize, + const ICOORD& bleft, const ICOORD& tright) + : BlobGrid(gridsize, bleft, tright), nontext_map_(nullptr), projection_(nullptr), + denorm_(nullptr), grid_box_(bleft, tright), rerotation_(1.0f, 0.0f) { + leaders_win_ = nullptr; + widths_win_ = nullptr; + initial_widths_win_ = nullptr; + chains_win_ = nullptr; + diacritics_win_ = nullptr; + textlines_win_ = nullptr; + smoothed_win_ = nullptr; +} + +StrokeWidth::~StrokeWidth() { + if (widths_win_ != nullptr) { + #ifndef GRAPHICS_DISABLED + delete widths_win_->AwaitEvent(SVET_DESTROY); + #endif // !GRAPHICS_DISABLED + if (textord_tabfind_only_strokewidths) + exit(0); + delete widths_win_; + } + delete leaders_win_; + delete initial_widths_win_; + delete chains_win_; + delete textlines_win_; + delete smoothed_win_; + delete diacritics_win_; +} + +// Sets the neighbours member of the medium-sized blobs in the block. +// Searches on 4 sides of each blob for similar-sized, similar-strokewidth +// blobs and sets pointers to the good neighbours. +void StrokeWidth::SetNeighboursOnMediumBlobs(TO_BLOCK* block) { + // Run a preliminary strokewidth neighbour detection on the medium blobs. + InsertBlobList(&block->blobs); + BLOBNBOX_IT blob_it(&block->blobs); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + SetNeighbours(false, false, blob_it.data()); + } + Clear(); +} + +// Sets the neighbour/textline writing direction members of the medium +// and large blobs with optional repair of broken CJK characters first. +// Repair of broken CJK is needed here because broken CJK characters +// can fool the textline direction detection algorithm. +void StrokeWidth::FindTextlineDirectionAndFixBrokenCJK(PageSegMode pageseg_mode, + bool cjk_merge, + TO_BLOCK* input_block) { + // Setup the grid with the remaining (non-noise) blobs. + InsertBlobs(input_block); + // Repair broken CJK characters if needed. + while (cjk_merge && FixBrokenCJK(input_block)); + // Grade blobs by inspection of neighbours. + FindTextlineFlowDirection(pageseg_mode, false); + // Clear the grid ready for rotation or leader finding. + Clear(); +} + +// Helper to collect and count horizontal and vertical blobs from a list. +static void CollectHorizVertBlobs(BLOBNBOX_LIST* input_blobs, + int* num_vertical_blobs, + int* num_horizontal_blobs, + BLOBNBOX_CLIST* vertical_blobs, + BLOBNBOX_CLIST* horizontal_blobs, + BLOBNBOX_CLIST* nondescript_blobs) { + BLOBNBOX_C_IT v_it(vertical_blobs); + BLOBNBOX_C_IT h_it(horizontal_blobs); + BLOBNBOX_C_IT n_it(nondescript_blobs); + BLOBNBOX_IT blob_it(input_blobs); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + BLOBNBOX* blob = blob_it.data(); + const TBOX& box = blob->bounding_box(); + float y_x = static_cast<float>(box.height()) / box.width(); + float x_y = 1.0f / y_x; + // Select a >= 1.0 ratio + float ratio = x_y > y_x ? x_y : y_x; + // If the aspect ratio is small and we want them for osd, save the blob. + bool ok_blob = ratio <= kSizeRatioToReject; + if (blob->UniquelyVertical()) { + ++*num_vertical_blobs; + if (ok_blob) v_it.add_after_then_move(blob); + } else if (blob->UniquelyHorizontal()) { + ++*num_horizontal_blobs; + if (ok_blob) h_it.add_after_then_move(blob); + } else if (ok_blob) { + n_it.add_after_then_move(blob); + } + } +} + + +// Types all the blobs as vertical or horizontal text or unknown and +// returns true if the majority are vertical. +// If the blobs are rotated, it is necessary to call CorrectForRotation +// after rotating everything, otherwise the work done here will be enough. +// If osd_blobs is not null, a list of blobs from the dominant textline +// direction are returned for use in orientation and script detection. +bool StrokeWidth::TestVerticalTextDirection(double find_vertical_text_ratio, + TO_BLOCK* block, + BLOBNBOX_CLIST* osd_blobs) { + int vertical_boxes = 0; + int horizontal_boxes = 0; + // Count vertical normal and large blobs. + BLOBNBOX_CLIST vertical_blobs; + BLOBNBOX_CLIST horizontal_blobs; + BLOBNBOX_CLIST nondescript_blobs; + CollectHorizVertBlobs(&block->blobs, &vertical_boxes, &horizontal_boxes, + &vertical_blobs, &horizontal_blobs, &nondescript_blobs); + CollectHorizVertBlobs(&block->large_blobs, &vertical_boxes, &horizontal_boxes, + &vertical_blobs, &horizontal_blobs, &nondescript_blobs); + if (textord_debug_tabfind) + tprintf("TextDir hbox=%d vs vbox=%d, %dH, %dV, %dN osd blobs\n", + horizontal_boxes, vertical_boxes, + horizontal_blobs.length(), vertical_blobs.length(), + nondescript_blobs.length()); + if (osd_blobs != nullptr && vertical_boxes == 0 && horizontal_boxes == 0) { + // Only nondescript blobs available, so return those. + BLOBNBOX_C_IT osd_it(osd_blobs); + osd_it.add_list_after(&nondescript_blobs); + return false; + } + int min_vert_boxes = static_cast<int>((vertical_boxes + horizontal_boxes) * + find_vertical_text_ratio); + if (vertical_boxes >= min_vert_boxes) { + if (osd_blobs != nullptr) { + BLOBNBOX_C_IT osd_it(osd_blobs); + osd_it.add_list_after(&vertical_blobs); + } + return true; + } else { + if (osd_blobs != nullptr) { + BLOBNBOX_C_IT osd_it(osd_blobs); + osd_it.add_list_after(&horizontal_blobs); + } + return false; + } +} + +// Corrects the data structures for the given rotation. +void StrokeWidth::CorrectForRotation(const FCOORD& rotation, + ColPartitionGrid* part_grid) { + Init(part_grid->gridsize(), part_grid->bleft(), part_grid->tright()); + grid_box_ = TBOX(bleft(), tright()); + rerotation_.set_x(rotation.x()); + rerotation_.set_y(-rotation.y()); +} + +// Finds leader partitions and inserts them into the given part_grid. +void StrokeWidth::FindLeaderPartitions(TO_BLOCK* block, + ColPartitionGrid* part_grid) { + Clear(); + // Find and isolate leaders in the noise list. + ColPartition_LIST leader_parts; + FindLeadersAndMarkNoise(block, &leader_parts); + // Setup the strokewidth grid with the block's remaining (non-noise) blobs. + InsertBlobList(&block->blobs); + // Mark blobs that have leader neighbours. + for (ColPartition_IT it(&leader_parts); !it.empty(); it.forward()) { + ColPartition* part = it.extract(); + part->ClaimBoxes(); + MarkLeaderNeighbours(part, LR_LEFT); + MarkLeaderNeighbours(part, LR_RIGHT); + part_grid->InsertBBox(true, true, part); + } +} + +// Finds and marks noise those blobs that look like bits of vertical lines +// that would otherwise screw up layout analysis. +void StrokeWidth::RemoveLineResidue(ColPartition_LIST* big_part_list) { + BlobGridSearch gsearch(this); + BLOBNBOX* bbox; + // For every vertical line-like bbox in the grid, search its neighbours + // to find the tallest, and if the original box is taller by sufficient + // margin, then call it line residue and delete it. + gsearch.StartFullSearch(); + while ((bbox = gsearch.NextFullSearch()) != nullptr) { + TBOX box = bbox->bounding_box(); + if (box.height() < box.width() * kLineResidueAspectRatio) + continue; + // Set up a rectangle search around the blob to find the size of its + // neighbours. + int padding = box.height() * kLineResiduePadRatio; + TBOX search_box = box; + search_box.pad(padding, padding); + bool debug = AlignedBlob::WithinTestRegion(2, box.left(), + box.bottom()); + // Find the largest object in the search box not equal to bbox. + BlobGridSearch rsearch(this); + int max_height = 0; + BLOBNBOX* n; + rsearch.StartRectSearch(search_box); + while ((n = rsearch.NextRectSearch()) != nullptr) { + if (n == bbox) continue; + TBOX nbox = n->bounding_box(); + if (nbox.height() > max_height) { + max_height = nbox.height(); + } + } + if (debug) { + tprintf("Max neighbour size=%d for candidate line box at:", max_height); + box.print(); + } + if (max_height * kLineResidueSizeRatio < box.height()) { + #ifndef GRAPHICS_DISABLED + if (leaders_win_ != nullptr) { + // We are debugging, so display deleted in pink blobs in the same + // window that we use to display leader detection. + leaders_win_->Pen(ScrollView::PINK); + leaders_win_->Rectangle(box.left(), box.bottom(), + box.right(), box.top()); + } + #endif // !GRAPHICS_DISABLED + ColPartition::MakeBigPartition(bbox, big_part_list); + } + } +} + +// Types all the blobs as vertical text or horizontal text or unknown and +// puts them into initial ColPartitions in the supplied part_grid. +// rerotation determines how to get back to the image coordinates from the +// blob coordinates (since they may have been rotated for vertical text). +// block is the single block for the whole page or rectangle to be OCRed. +// nontext_pix (full-size), is a binary mask used to prevent merges across +// photo/text boundaries. It is not kept beyond this function. +// denorm provides a mapping back to the image from the current blob +// coordinate space. +// projection provides a measure of textline density over the image and +// provides functions to assist with diacritic detection. It should be a +// pointer to a new TextlineProjection, and will be setup here. +// part_grid is the output grid of textline partitions. +// Large blobs that cause overlap are put in separate partitions and added +// to the big_parts list. +void StrokeWidth::GradeBlobsIntoPartitions( + PageSegMode pageseg_mode, const FCOORD& rerotation, TO_BLOCK* block, + Pix* nontext_pix, const DENORM* denorm, bool cjk_script, + TextlineProjection* projection, BLOBNBOX_LIST* diacritic_blobs, + ColPartitionGrid* part_grid, ColPartition_LIST* big_parts) { + nontext_map_ = nontext_pix; + projection_ = projection; + denorm_ = denorm; + // Clear and re Insert to take advantage of the tab stops in the blobs. + Clear(); + // Setup the strokewidth grid with the remaining non-noise, non-leader blobs. + InsertBlobs(block); + + // Run FixBrokenCJK() again if the page is CJK. + if (cjk_script) { + FixBrokenCJK(block); + } + FindTextlineFlowDirection(pageseg_mode, false); + projection_->ConstructProjection(block, rerotation, nontext_map_); +#ifndef GRAPHICS_DISABLED + if (textord_tabfind_show_strokewidths) { + ScrollView* line_blobs_win = MakeWindow(0, 0, "Initial textline Blobs"); + projection_->PlotGradedBlobs(&block->blobs, line_blobs_win); + projection_->PlotGradedBlobs(&block->small_blobs, line_blobs_win); + } +#endif + projection_->MoveNonTextlineBlobs(&block->blobs, &block->noise_blobs); + projection_->MoveNonTextlineBlobs(&block->small_blobs, &block->noise_blobs); + // Clear and re Insert to take advantage of the removed diacritics. + Clear(); + InsertBlobs(block); + FCOORD skew; + FindTextlineFlowDirection(pageseg_mode, true); + PartitionFindResult r = + FindInitialPartitions(pageseg_mode, rerotation, true, block, + diacritic_blobs, part_grid, big_parts, &skew); + if (r == PFR_NOISE) { + tprintf("Detected %d diacritics\n", diacritic_blobs->length()); + // Noise was found, and removed. + Clear(); + InsertBlobs(block); + FindTextlineFlowDirection(pageseg_mode, true); + r = FindInitialPartitions(pageseg_mode, rerotation, false, block, + diacritic_blobs, part_grid, big_parts, &skew); + } + nontext_map_ = nullptr; + projection_ = nullptr; + denorm_ = nullptr; +} + +static void PrintBoxWidths(BLOBNBOX* neighbour) { + const TBOX& nbox = neighbour->bounding_box(); + tprintf("Box (%d,%d)->(%d,%d): h-width=%.1f, v-width=%.1f p-width=%1.f\n", + nbox.left(), nbox.bottom(), nbox.right(), nbox.top(), + neighbour->horz_stroke_width(), neighbour->vert_stroke_width(), + 2.0 * neighbour->cblob()->area()/neighbour->cblob()->perimeter()); +} + +/** Handles a click event in a display window. */ +void StrokeWidth::HandleClick(int x, int y) { + BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>::HandleClick(x, y); + // Run a radial search for blobs that overlap. + BlobGridSearch radsearch(this); + radsearch.StartRadSearch(x, y, 1); + BLOBNBOX* neighbour; + FCOORD click(static_cast<float>(x), static_cast<float>(y)); + while ((neighbour = radsearch.NextRadSearch()) != nullptr) { + TBOX nbox = neighbour->bounding_box(); + if (nbox.contains(click) && neighbour->cblob() != nullptr) { + PrintBoxWidths(neighbour); + if (neighbour->neighbour(BND_LEFT) != nullptr) + PrintBoxWidths(neighbour->neighbour(BND_LEFT)); + if (neighbour->neighbour(BND_RIGHT) != nullptr) + PrintBoxWidths(neighbour->neighbour(BND_RIGHT)); + if (neighbour->neighbour(BND_ABOVE) != nullptr) + PrintBoxWidths(neighbour->neighbour(BND_ABOVE)); + if (neighbour->neighbour(BND_BELOW) != nullptr) + PrintBoxWidths(neighbour->neighbour(BND_BELOW)); + int gaps[BND_COUNT]; + neighbour->NeighbourGaps(gaps); + tprintf("Left gap=%d, right=%d, above=%d, below=%d, horz=%d, vert=%d\n" + "Good= %d %d %d %d\n", + gaps[BND_LEFT], gaps[BND_RIGHT], + gaps[BND_ABOVE], gaps[BND_BELOW], + neighbour->horz_possible(), + neighbour->vert_possible(), + neighbour->good_stroke_neighbour(BND_LEFT), + neighbour->good_stroke_neighbour(BND_RIGHT), + neighbour->good_stroke_neighbour(BND_ABOVE), + neighbour->good_stroke_neighbour(BND_BELOW)); + break; + } + } +} + +// Detects and marks leader dots/dashes. +// Leaders are horizontal chains of small or noise blobs that look +// monospace according to ColPartition::MarkAsLeaderIfMonospaced(). +// Detected leaders become the only occupants of the block->small_blobs list. +// Non-leader small blobs get moved to the blobs list. +// Non-leader noise blobs remain singletons in the noise list. +// All small and noise blobs in high density regions are marked BTFT_NONTEXT. +// block is the single block for the whole page or rectangle to be OCRed. +// leader_parts is the output. +void StrokeWidth::FindLeadersAndMarkNoise(TO_BLOCK* block, + ColPartition_LIST* leader_parts) { + InsertBlobList(&block->small_blobs); + InsertBlobList(&block->noise_blobs); + BlobGridSearch gsearch(this); + BLOBNBOX* bbox; + // For every bbox in the grid, set its neighbours. + gsearch.StartFullSearch(); + while ((bbox = gsearch.NextFullSearch()) != nullptr) { + SetNeighbours(true, false, bbox); + } + ColPartition_IT part_it(leader_parts); + gsearch.StartFullSearch(); + while ((bbox = gsearch.NextFullSearch()) != nullptr) { + if (bbox->flow() == BTFT_NONE) { + if (bbox->neighbour(BND_RIGHT) == nullptr && + bbox->neighbour(BND_LEFT) == nullptr) + continue; + // Put all the linked blobs into a ColPartition. + ColPartition* part = new ColPartition(BRT_UNKNOWN, ICOORD(0, 1)); + BLOBNBOX* blob; + for (blob = bbox; blob != nullptr && blob->flow() == BTFT_NONE; + blob = blob->neighbour(BND_RIGHT)) + part->AddBox(blob); + for (blob = bbox->neighbour(BND_LEFT); blob != nullptr && + blob->flow() == BTFT_NONE; + blob = blob->neighbour(BND_LEFT)) + part->AddBox(blob); + if (part->MarkAsLeaderIfMonospaced()) + part_it.add_after_then_move(part); + else + delete part; + } + } +#ifndef GRAPHICS_DISABLED + if (textord_tabfind_show_strokewidths) { + leaders_win_ = DisplayGoodBlobs("LeaderNeighbours", 0, 0); + } +#endif + // Move any non-leaders from the small to the blobs list, as they are + // most likely dashes or broken characters. + BLOBNBOX_IT blob_it(&block->blobs); + BLOBNBOX_IT small_it(&block->small_blobs); + for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) { + BLOBNBOX* blob = small_it.data(); + if (blob->flow() != BTFT_LEADER) { + if (blob->flow() == BTFT_NEIGHBOURS) + blob->set_flow(BTFT_NONE); + blob->ClearNeighbours(); + blob_it.add_to_end(small_it.extract()); + } + } + // Move leaders from the noise list to the small list, leaving the small + // list exclusively leaders, so they don't get processed further, + // and the remaining small blobs all in the noise list. + BLOBNBOX_IT noise_it(&block->noise_blobs); + for (noise_it.mark_cycle_pt(); !noise_it.cycled_list(); noise_it.forward()) { + BLOBNBOX* blob = noise_it.data(); + if (blob->flow() == BTFT_LEADER || blob->joined_to_prev()) { + small_it.add_to_end(noise_it.extract()); + } else if (blob->flow() == BTFT_NEIGHBOURS) { + blob->set_flow(BTFT_NONE); + blob->ClearNeighbours(); + } + } + // Clear the grid as we don't want the small stuff hanging around in it. + Clear(); +} + +/** Inserts the block blobs (normal and large) into this grid. + * Blobs remain owned by the block. */ +void StrokeWidth::InsertBlobs(TO_BLOCK* block) { + InsertBlobList(&block->blobs); + InsertBlobList(&block->large_blobs); +} + +// Checks the left or right side of the given leader partition and sets the +// (opposite) leader_on_right or leader_on_left flags for blobs +// that are next to the given side of the given leader partition. +void StrokeWidth::MarkLeaderNeighbours(const ColPartition* part, + LeftOrRight side) { + const TBOX& part_box = part->bounding_box(); + BlobGridSearch blobsearch(this); + // Search to the side of the leader for the nearest neighbour. + BLOBNBOX* best_blob = nullptr; + int best_gap = 0; + blobsearch.StartSideSearch(side == LR_LEFT ? part_box.left() + : part_box.right(), + part_box.bottom(), part_box.top()); + BLOBNBOX* blob; + while ((blob = blobsearch.NextSideSearch(side == LR_LEFT)) != nullptr) { + const TBOX& blob_box = blob->bounding_box(); + if (!blob_box.y_overlap(part_box)) + continue; + int x_gap = blob_box.x_gap(part_box); + if (x_gap > 2 * gridsize()) { + break; + } else if (best_blob == nullptr || x_gap < best_gap) { + best_blob = blob; + best_gap = x_gap; + } + } + if (best_blob != nullptr) { + if (side == LR_LEFT) + best_blob->set_leader_on_right(true); + else + best_blob->set_leader_on_left(true); + #ifndef GRAPHICS_DISABLED + if (leaders_win_ != nullptr) { + leaders_win_->Pen(side == LR_LEFT ? ScrollView::RED : ScrollView::GREEN); + const TBOX& blob_box = best_blob->bounding_box(); + leaders_win_->Rectangle(blob_box.left(), blob_box.bottom(), + blob_box.right(), blob_box.top()); + } + #endif // !GRAPHICS_DISABLED + } +} + +// Helper to compute the UQ of the square-ish CJK characters. +static int UpperQuartileCJKSize(int gridsize, BLOBNBOX_LIST* blobs) { + STATS sizes(0, gridsize * kMaxCJKSizeRatio); + BLOBNBOX_IT it(blobs); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + BLOBNBOX* blob = it.data(); + int width = blob->bounding_box().width(); + int height = blob->bounding_box().height(); + if (width <= height * kCJKAspectRatio && height < width * kCJKAspectRatio) + sizes.add(height, 1); + } + return static_cast<int>(sizes.ile(0.75f) + 0.5); +} + +// Fix broken CJK characters, using the fake joined blobs mechanism. +// Blobs are really merged, ie the master takes all the outlines and the +// others are deleted. +// Returns true if sufficient blobs are merged that it may be worth running +// again, due to a better estimate of character size. +bool StrokeWidth::FixBrokenCJK(TO_BLOCK* block) { + BLOBNBOX_LIST* blobs = &block->blobs; + int median_height = UpperQuartileCJKSize(gridsize(), blobs); + int max_dist = static_cast<int>(median_height * kCJKBrokenDistanceFraction); + int max_height = static_cast<int>(median_height * kCJKAspectRatio); + int num_fixed = 0; + BLOBNBOX_IT blob_it(blobs); + + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + BLOBNBOX* blob = blob_it.data(); + if (blob->cblob() == nullptr || blob->cblob()->out_list()->empty()) + continue; + TBOX bbox = blob->bounding_box(); + bool debug = AlignedBlob::WithinTestRegion(3, bbox.left(), + bbox.bottom()); + if (debug) { + tprintf("Checking for Broken CJK (max size=%d):", max_height); + bbox.print(); + } + // Generate a list of blobs that overlap or are near enough to merge. + BLOBNBOX_CLIST overlapped_blobs; + AccumulateOverlaps(blob, debug, max_height, max_dist, + &bbox, &overlapped_blobs); + if (!overlapped_blobs.empty()) { + // There are overlapping blobs, so qualify them as being satisfactory + // before removing them from the grid and replacing them with the union. + // The final box must be roughly square. + if (bbox.width() > bbox.height() * kCJKAspectRatio || + bbox.height() > bbox.width() * kCJKAspectRatio) { + if (debug) { + tprintf("Bad final aspectratio:"); + bbox.print(); + } + continue; + } + // There can't be too many blobs to merge. + if (overlapped_blobs.length() >= kCJKMaxComponents) { + if (debug) + tprintf("Too many neighbours: %d\n", overlapped_blobs.length()); + continue; + } + // The strokewidths must match amongst the join candidates. + BLOBNBOX_C_IT n_it(&overlapped_blobs); + for (n_it.mark_cycle_pt(); !n_it.cycled_list(); n_it.forward()) { + BLOBNBOX* neighbour = nullptr; + neighbour = n_it.data(); + if (!blob->MatchingStrokeWidth(*neighbour, kStrokeWidthFractionCJK, + kStrokeWidthCJK)) + break; + } + if (!n_it.cycled_list()) { + if (debug) { + tprintf("Bad stroke widths:"); + PrintBoxWidths(blob); + } + continue; // Not good enough. + } + + // Merge all the candidates into blob. + // We must remove blob from the grid and reinsert it after merging + // to maintain the integrity of the grid. + RemoveBBox(blob); + // Everything else will be calculated later. + for (n_it.mark_cycle_pt(); !n_it.cycled_list(); n_it.forward()) { + BLOBNBOX* neighbour = n_it.data(); + RemoveBBox(neighbour); + // Mark empty blob for deletion. + neighbour->set_region_type(BRT_NOISE); + blob->really_merge(neighbour); + if (rerotation_.x() != 1.0f || rerotation_.y() != 0.0f) { + blob->rotate_box(rerotation_); + } + } + InsertBBox(true, true, blob); + ++num_fixed; + if (debug) { + tprintf("Done! Final box:"); + bbox.print(); + } + } + } + // Count remaining blobs. + int num_remaining = 0; + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + BLOBNBOX* blob = blob_it.data(); + if (blob->cblob() != nullptr && !blob->cblob()->out_list()->empty()) { + ++num_remaining; + } + } + // Permanently delete all the marked blobs after first removing all + // references in the neighbour members. + block->DeleteUnownedNoise(); + return num_fixed > num_remaining * kBrokenCJKIterationFraction; +} + +// Helper function to determine whether it is reasonable to merge the +// bbox and the nbox for repairing broken CJK. +// The distance apart must not exceed max_dist, the combined size must +// not exceed max_size, and the aspect ratio must either improve or at +// least not get worse by much. +static bool AcceptableCJKMerge(const TBOX& bbox, const TBOX& nbox, + bool debug, int max_size, int max_dist, + int* x_gap, int* y_gap) { + *x_gap = bbox.x_gap(nbox); + *y_gap = bbox.y_gap(nbox); + TBOX merged(nbox); + merged += bbox; + if (debug) { + tprintf("gaps = %d, %d, merged_box:", *x_gap, *y_gap); + merged.print(); + } + if (*x_gap <= max_dist && *y_gap <= max_dist && + merged.width() <= max_size && merged.height() <= max_size) { + // Close enough to call overlapping. Check aspect ratios. + double old_ratio = static_cast<double>(bbox.width()) / bbox.height(); + if (old_ratio < 1.0) old_ratio = 1.0 / old_ratio; + double new_ratio = static_cast<double>(merged.width()) / merged.height(); + if (new_ratio < 1.0) new_ratio = 1.0 / new_ratio; + if (new_ratio <= old_ratio * kCJKAspectRatioIncrease) + return true; + } + return false; +} + +// Collect blobs that overlap or are within max_dist of the input bbox. +// Return them in the list of blobs and expand the bbox to be the union +// of all the boxes. not_this is excluded from the search, as are blobs +// that cause the merged box to exceed max_size in either dimension. +void StrokeWidth::AccumulateOverlaps(const BLOBNBOX* not_this, bool debug, + int max_size, int max_dist, + TBOX* bbox, BLOBNBOX_CLIST* blobs) { + // While searching, nearests holds the nearest failed blob in each + // direction. When we have a nearest in each of the 4 directions, then + // the search is over, and at this point the final bbox must not overlap + // any of the nearests. + BLOBNBOX* nearests[BND_COUNT]; + for (auto & nearest : nearests) { + nearest = nullptr; + } + int x = (bbox->left() + bbox->right()) / 2; + int y = (bbox->bottom() + bbox->top()) / 2; + // Run a radial search for blobs that overlap or are sufficiently close. + BlobGridSearch radsearch(this); + radsearch.StartRadSearch(x, y, kCJKRadius); + BLOBNBOX* neighbour; + while ((neighbour = radsearch.NextRadSearch()) != nullptr) { + if (neighbour == not_this) continue; + TBOX nbox = neighbour->bounding_box(); + int x_gap, y_gap; + if (AcceptableCJKMerge(*bbox, nbox, debug, max_size, max_dist, + &x_gap, &y_gap)) { + // Close enough to call overlapping. Merge boxes. + *bbox += nbox; + blobs->add_sorted(SortByBoxLeft<BLOBNBOX>, true, neighbour); + if (debug) { + tprintf("Added:"); + nbox.print(); + } + // Since we merged, search the nearests, as some might now me mergeable. + for (int dir = 0; dir < BND_COUNT; ++dir) { + if (nearests[dir] == nullptr) continue; + nbox = nearests[dir]->bounding_box(); + if (AcceptableCJKMerge(*bbox, nbox, debug, max_size, + max_dist, &x_gap, &y_gap)) { + // Close enough to call overlapping. Merge boxes. + *bbox += nbox; + blobs->add_sorted(SortByBoxLeft<BLOBNBOX>, true, nearests[dir]); + if (debug) { + tprintf("Added:"); + nbox.print(); + } + nearests[dir] = nullptr; + dir = -1; // Restart the search. + } + } + } else if (x_gap < 0 && x_gap <= y_gap) { + // A vertical neighbour. Record the nearest. + BlobNeighbourDir dir = nbox.top() > bbox->top() ? BND_ABOVE : BND_BELOW; + if (nearests[dir] == nullptr || + y_gap < bbox->y_gap(nearests[dir]->bounding_box())) { + nearests[dir] = neighbour; + } + } else if (y_gap < 0 && y_gap <= x_gap) { + // A horizontal neighbour. Record the nearest. + BlobNeighbourDir dir = nbox.left() > bbox->left() ? BND_RIGHT : BND_LEFT; + if (nearests[dir] == nullptr || + x_gap < bbox->x_gap(nearests[dir]->bounding_box())) { + nearests[dir] = neighbour; + } + } + // If all nearests are non-null, then we have finished. + if (nearests[BND_LEFT] && nearests[BND_RIGHT] && + nearests[BND_ABOVE] && nearests[BND_BELOW]) + break; + } + // Final overlap with a nearest is not allowed. + for (auto & nearest : nearests) { + if (nearest == nullptr) continue; + const TBOX& nbox = nearest->bounding_box(); + if (debug) { + tprintf("Testing for overlap with:"); + nbox.print(); + } + if (bbox->overlap(nbox)) { + blobs->shallow_clear(); + if (debug) + tprintf("Final box overlaps nearest\n"); + return; + } + } +} + +// For each blob in this grid, Finds the textline direction to be horizontal +// or vertical according to distance to neighbours and 1st and 2nd order +// neighbours. Non-text tends to end up without a definite direction. +// Result is setting of the neighbours and vert_possible/horz_possible +// flags in the BLOBNBOXes currently in this grid. +// This function is called more than once if page orientation is uncertain, +// so display_if_debugging is true on the final call to display the results. +void StrokeWidth::FindTextlineFlowDirection(PageSegMode pageseg_mode, + bool display_if_debugging) { + BlobGridSearch gsearch(this); + BLOBNBOX* bbox; + // For every bbox in the grid, set its neighbours. + gsearch.StartFullSearch(); + while ((bbox = gsearch.NextFullSearch()) != nullptr) { + SetNeighbours(false, display_if_debugging, bbox); + } + // Where vertical or horizontal wins by a big margin, clarify it. + gsearch.StartFullSearch(); + while ((bbox = gsearch.NextFullSearch()) != nullptr) { + SimplifyObviousNeighbours(bbox); + } + // Now try to make the blobs only vertical or horizontal using neighbours. + gsearch.StartFullSearch(); + while ((bbox = gsearch.NextFullSearch()) != nullptr) { + if (FindingVerticalOnly(pageseg_mode)) { + bbox->set_vert_possible(true); + bbox->set_horz_possible(false); + } else if (FindingHorizontalOnly(pageseg_mode)) { + bbox->set_vert_possible(false); + bbox->set_horz_possible(true); + } else { + SetNeighbourFlows(bbox); + } + } +#ifndef GRAPHICS_DISABLED + if ((textord_tabfind_show_strokewidths && display_if_debugging) || + textord_tabfind_show_strokewidths > 1) { + initial_widths_win_ = DisplayGoodBlobs("InitialStrokewidths", 400, 0); + } +#endif + // Improve flow direction with neighbours. + gsearch.StartFullSearch(); + while ((bbox = gsearch.NextFullSearch()) != nullptr) { + SmoothNeighbourTypes(pageseg_mode, false, bbox); + } + // Now allow reset of firm values to fix renegades. + gsearch.StartFullSearch(); + while ((bbox = gsearch.NextFullSearch()) != nullptr) { + SmoothNeighbourTypes(pageseg_mode, true, bbox); + } + // Repeat. + gsearch.StartFullSearch(); + while ((bbox = gsearch.NextFullSearch()) != nullptr) { + SmoothNeighbourTypes(pageseg_mode, true, bbox); + } +#ifndef GRAPHICS_DISABLED + if ((textord_tabfind_show_strokewidths && display_if_debugging) || + textord_tabfind_show_strokewidths > 1) { + widths_win_ = DisplayGoodBlobs("ImprovedStrokewidths", 800, 0); + } +#endif +} + +// Sets the neighbours and good_stroke_neighbours members of the blob by +// searching close on all 4 sides. +// When finding leader dots/dashes, there is a slightly different rule for +// what makes a good neighbour. +void StrokeWidth::SetNeighbours(bool leaders, bool activate_line_trap, + BLOBNBOX* blob) { + int line_trap_count = 0; + for (int dir = 0; dir < BND_COUNT; ++dir) { + auto bnd = static_cast<BlobNeighbourDir>(dir); + line_trap_count += FindGoodNeighbour(bnd, leaders, blob); + } + if (line_trap_count > 0 && activate_line_trap) { + // It looks like a line so isolate it by clearing its neighbours. + blob->ClearNeighbours(); + const TBOX& box = blob->bounding_box(); + blob->set_region_type(box.width() > box.height() ? BRT_HLINE : BRT_VLINE); + } +} + + +// Sets the good_stroke_neighbours member of the blob if it has a +// GoodNeighbour on the given side. +// Also sets the neighbour in the blob, whether or not a good one is found. +// Returns the number of blobs in the nearby search area that would lead us to +// believe that this blob is a line separator. +// Leaders get extra special lenient treatment. +int StrokeWidth::FindGoodNeighbour(BlobNeighbourDir dir, bool leaders, + BLOBNBOX* blob) { + // Search for neighbours that overlap vertically. + TBOX blob_box = blob->bounding_box(); + bool debug = AlignedBlob::WithinTestRegion(2, blob_box.left(), + blob_box.bottom()); + if (debug) { + tprintf("FGN in dir %d for blob:", dir); + blob_box.print(); + } + int top = blob_box.top(); + int bottom = blob_box.bottom(); + int left = blob_box.left(); + int right = blob_box.right(); + int width = right - left; + int height = top - bottom; + + // A trap to detect lines tests for the min dimension of neighbours + // being larger than a multiple of the min dimension of the line + // and the larger dimension being smaller than a fraction of the max + // dimension of the line. + int line_trap_max = std::max(width, height) / kLineTrapLongest; + int line_trap_min = std::min(width, height) * kLineTrapShortest; + int line_trap_count = 0; + + int min_good_overlap = (dir == BND_LEFT || dir == BND_RIGHT) + ? height / 2 : width / 2; + int min_decent_overlap = (dir == BND_LEFT || dir == BND_RIGHT) + ? height / 3 : width / 3; + if (leaders) + min_good_overlap = min_decent_overlap = 1; + + int search_pad = static_cast<int>( + sqrt(static_cast<double>(width * height)) * kNeighbourSearchFactor); + if (gridsize() > search_pad) + search_pad = gridsize(); + TBOX search_box = blob_box; + // Pad the search in the appropriate direction. + switch (dir) { + case BND_LEFT: + search_box.set_left(search_box.left() - search_pad); + break; + case BND_RIGHT: + search_box.set_right(search_box.right() + search_pad); + break; + case BND_BELOW: + search_box.set_bottom(search_box.bottom() - search_pad); + break; + case BND_ABOVE: + search_box.set_top(search_box.top() + search_pad); + break; + case BND_COUNT: + return 0; + } + + BlobGridSearch rectsearch(this); + rectsearch.StartRectSearch(search_box); + BLOBNBOX* best_neighbour = nullptr; + double best_goodness = 0.0; + bool best_is_good = false; + BLOBNBOX* neighbour; + while ((neighbour = rectsearch.NextRectSearch()) != nullptr) { + TBOX nbox = neighbour->bounding_box(); + if (neighbour == blob) + continue; + int mid_x = (nbox.left() + nbox.right()) / 2; + if (mid_x < blob->left_rule() || mid_x > blob->right_rule()) + continue; // In a different column. + if (debug) { + tprintf("Neighbour at:"); + nbox.print(); + } + + // Last-minute line detector. There is a small upper limit to the line + // width accepted by the morphological line detector. + int n_width = nbox.width(); + int n_height = nbox.height(); + if (std::min(n_width, n_height) > line_trap_min && + std::max(n_width, n_height) < line_trap_max) + ++line_trap_count; + // Heavily joined text, such as Arabic may have very different sizes when + // looking at the maxes, but the heights may be almost identical, so check + // for a difference in height if looking sideways or width vertically. + if (TabFind::VeryDifferentSizes(std::max(n_width, n_height), + std::max(width, height)) && + (((dir == BND_LEFT || dir ==BND_RIGHT) && + TabFind::DifferentSizes(n_height, height)) || + ((dir == BND_BELOW || dir ==BND_ABOVE) && + TabFind::DifferentSizes(n_width, width)))) { + if (debug) tprintf("Bad size\n"); + continue; // Could be a different font size or non-text. + } + // Amount of vertical overlap between the blobs. + int overlap; + // If the overlap is along the short side of the neighbour, and it + // is fully overlapped, then perp_overlap holds the length of the long + // side of the neighbour. A measure to include hyphens and dashes as + // legitimate neighbours. + int perp_overlap; + int gap; + if (dir == BND_LEFT || dir == BND_RIGHT) { + overlap = std::min(static_cast<int>(nbox.top()), top) - std::max(static_cast<int>(nbox.bottom()), bottom); + if (overlap == nbox.height() && nbox.width() > nbox.height()) + perp_overlap = nbox.width(); + else + perp_overlap = overlap; + gap = dir == BND_LEFT ? left - nbox.left() : nbox.right() - right; + if (gap <= 0) { + if (debug) tprintf("On wrong side\n"); + continue; // On the wrong side. + } + gap -= n_width; + } else { + overlap = std::min(static_cast<int>(nbox.right()), right) - std::max(static_cast<int>(nbox.left()), left); + if (overlap == nbox.width() && nbox.height() > nbox.width()) + perp_overlap = nbox.height(); + else + perp_overlap = overlap; + gap = dir == BND_BELOW ? bottom - nbox.bottom() : nbox.top() - top; + if (gap <= 0) { + if (debug) tprintf("On wrong side\n"); + continue; // On the wrong side. + } + gap -= n_height; + } + if (-gap > overlap) { + if (debug) tprintf("Overlaps wrong way\n"); + continue; // Overlaps the wrong way. + } + if (perp_overlap < min_decent_overlap) { + if (debug) tprintf("Doesn't overlap enough\n"); + continue; // Doesn't overlap enough. + } + bool bad_sizes = TabFind::DifferentSizes(height, n_height) && + TabFind::DifferentSizes(width, n_width); + bool is_good = overlap >= min_good_overlap && !bad_sizes && + blob->MatchingStrokeWidth(*neighbour, + kStrokeWidthFractionTolerance, + kStrokeWidthTolerance); + // Best is a fuzzy combination of gap, overlap and is good. + // Basically if you make one thing twice as good without making + // anything else twice as bad, then it is better. + if (gap < 1) gap = 1; + double goodness = (1.0 + is_good) * overlap / gap; + if (debug) { + tprintf("goodness = %g vs best of %g, good=%d, overlap=%d, gap=%d\n", + goodness, best_goodness, is_good, overlap, gap); + } + if (goodness > best_goodness) { + best_neighbour = neighbour; + best_goodness = goodness; + best_is_good = is_good; + } + } + blob->set_neighbour(dir, best_neighbour, best_is_good); + return line_trap_count; +} + +// Helper to get a list of 1st-order neighbours. +static void ListNeighbours(const BLOBNBOX* blob, + BLOBNBOX_CLIST* neighbours) { + for (int dir = 0; dir < BND_COUNT; ++dir) { + auto bnd = static_cast<BlobNeighbourDir>(dir); + BLOBNBOX* neighbour = blob->neighbour(bnd); + if (neighbour != nullptr) { + neighbours->add_sorted(SortByBoxLeft<BLOBNBOX>, true, neighbour); + } + } +} + +// Helper to get a list of 1st and 2nd order neighbours. +static void List2ndNeighbours(const BLOBNBOX* blob, + BLOBNBOX_CLIST* neighbours) { + ListNeighbours(blob, neighbours); + for (int dir = 0; dir < BND_COUNT; ++dir) { + auto bnd = static_cast<BlobNeighbourDir>(dir); + BLOBNBOX* neighbour = blob->neighbour(bnd); + if (neighbour != nullptr) { + ListNeighbours(neighbour, neighbours); + } + } +} + +// Helper to get a list of 1st, 2nd and 3rd order neighbours. +static void List3rdNeighbours(const BLOBNBOX* blob, + BLOBNBOX_CLIST* neighbours) { + List2ndNeighbours(blob, neighbours); + for (int dir = 0; dir < BND_COUNT; ++dir) { + auto bnd = static_cast<BlobNeighbourDir>(dir); + BLOBNBOX* neighbour = blob->neighbour(bnd); + if (neighbour != nullptr) { + List2ndNeighbours(neighbour, neighbours); + } + } +} + +// Helper to count the evidence for verticalness or horizontalness +// in a list of neighbours. +static void CountNeighbourGaps(bool debug, BLOBNBOX_CLIST* neighbours, + int* pure_h_count, int* pure_v_count) { + if (neighbours->length() <= kMostlyOneDirRatio) + return; + BLOBNBOX_C_IT it(neighbours); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + BLOBNBOX* blob = it.data(); + int h_min, h_max, v_min, v_max; + blob->MinMaxGapsClipped(&h_min, &h_max, &v_min, &v_max); + if (debug) + tprintf("Hgaps [%d,%d], vgaps [%d,%d]:", h_min, h_max, v_min, v_max); + if (h_max < v_min || + blob->leader_on_left() || blob->leader_on_right()) { + // Horizontal gaps are clear winners. Count a pure horizontal. + ++*pure_h_count; + if (debug) tprintf("Horz at:"); + } else if (v_max < h_min) { + // Vertical gaps are clear winners. Clear a pure vertical. + ++*pure_v_count; + if (debug) tprintf("Vert at:"); + } else { + if (debug) tprintf("Neither at:"); + } + if (debug) + blob->bounding_box().print(); + } +} + +// Makes the blob to be only horizontal or vertical where evidence +// is clear based on gaps of 2nd order neighbours, or definite individual +// blobs. +void StrokeWidth::SetNeighbourFlows(BLOBNBOX* blob) { + if (blob->DefiniteIndividualFlow()) + return; + bool debug = AlignedBlob::WithinTestRegion(2, blob->bounding_box().left(), + blob->bounding_box().bottom()); + if (debug) { + tprintf("SetNeighbourFlows (current flow=%d, type=%d) on:", + blob->flow(), blob->region_type()); + blob->bounding_box().print(); + } + BLOBNBOX_CLIST neighbours; + List3rdNeighbours(blob, &neighbours); + // The number of pure horizontal and vertical neighbours. + int pure_h_count = 0; + int pure_v_count = 0; + CountNeighbourGaps(debug, &neighbours, &pure_h_count, &pure_v_count); + if (debug) { + HandleClick(blob->bounding_box().left() + 1, + blob->bounding_box().bottom() + 1); + tprintf("SetFlows: h_count=%d, v_count=%d\n", + pure_h_count, pure_v_count); + } + if (!neighbours.empty()) { + blob->set_vert_possible(true); + blob->set_horz_possible(true); + if (pure_h_count > 2 * pure_v_count) { + // Horizontal gaps are clear winners. Clear vertical neighbours. + blob->set_vert_possible(false); + } else if (pure_v_count > 2 * pure_h_count) { + // Vertical gaps are clear winners. Clear horizontal neighbours. + blob->set_horz_possible(false); + } + } else { + // Lonely blob. Can't tell its flow direction. + blob->set_vert_possible(false); + blob->set_horz_possible(false); + } +} + + +// Helper to count the number of horizontal and vertical blobs in a list. +static void CountNeighbourTypes(BLOBNBOX_CLIST* neighbours, + int* pure_h_count, int* pure_v_count) { + BLOBNBOX_C_IT it(neighbours); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + BLOBNBOX* blob = it.data(); + if (blob->UniquelyHorizontal()) + ++*pure_h_count; + if (blob->UniquelyVertical()) + ++*pure_v_count; + } +} + +// Nullify the neighbours in the wrong directions where the direction +// is clear-cut based on a distance margin. Good for isolating vertical +// text from neighbouring horizontal text. +void StrokeWidth::SimplifyObviousNeighbours(BLOBNBOX* blob) { + // Case 1: We have text that is likely several characters, blurry and joined + // together. + if ((blob->bounding_box().width() > 3 * blob->area_stroke_width() && + blob->bounding_box().height() > 3 * blob->area_stroke_width())) { + // The blob is complex (not stick-like). + if (blob->bounding_box().width() > 4 * blob->bounding_box().height()) { + // Horizontal conjoined text. + blob->set_neighbour(BND_ABOVE, nullptr, false); + blob->set_neighbour(BND_BELOW, nullptr, false); + return; + } + if (blob->bounding_box().height() > 4 * blob->bounding_box().width()) { + // Vertical conjoined text. + blob->set_neighbour(BND_LEFT, nullptr, false); + blob->set_neighbour(BND_RIGHT, nullptr, false); + return; + } + } + + // Case 2: This blob is likely a single character. + int margin = gridsize() / 2; + int h_min, h_max, v_min, v_max; + blob->MinMaxGapsClipped(&h_min, &h_max, &v_min, &v_max); + if ((h_max + margin < v_min && h_max < margin / 2) || + blob->leader_on_left() || blob->leader_on_right()) { + // Horizontal gaps are clear winners. Clear vertical neighbours. + blob->set_neighbour(BND_ABOVE, nullptr, false); + blob->set_neighbour(BND_BELOW, nullptr, false); + } else if (v_max + margin < h_min && v_max < margin / 2) { + // Vertical gaps are clear winners. Clear horizontal neighbours. + blob->set_neighbour(BND_LEFT, nullptr, false); + blob->set_neighbour(BND_RIGHT, nullptr, false); + } +} + +// Smoothes the vertical/horizontal type of the blob based on the +// 2nd-order neighbours. If reset_all is true, then all blobs are +// changed. Otherwise, only ambiguous blobs are processed. +void StrokeWidth::SmoothNeighbourTypes(PageSegMode pageseg_mode, bool reset_all, + BLOBNBOX* blob) { + if ((blob->vert_possible() && blob->horz_possible()) || reset_all) { + // There are both horizontal and vertical so try to fix it. + BLOBNBOX_CLIST neighbours; + List2ndNeighbours(blob, &neighbours); + // The number of pure horizontal and vertical neighbours. + int pure_h_count = 0; + int pure_v_count = 0; + CountNeighbourTypes(&neighbours, &pure_h_count, &pure_v_count); + if (AlignedBlob::WithinTestRegion(2, blob->bounding_box().left(), + blob->bounding_box().bottom())) { + HandleClick(blob->bounding_box().left() + 1, + blob->bounding_box().bottom() + 1); + tprintf("pure_h=%d, pure_v=%d\n", + pure_h_count, pure_v_count); + } + if (pure_h_count > pure_v_count && !FindingVerticalOnly(pageseg_mode)) { + // Horizontal gaps are clear winners. Clear vertical neighbours. + blob->set_vert_possible(false); + blob->set_horz_possible(true); + } else if (pure_v_count > pure_h_count && + !FindingHorizontalOnly(pageseg_mode)) { + // Vertical gaps are clear winners. Clear horizontal neighbours. + blob->set_horz_possible(false); + blob->set_vert_possible(true); + } + } else if (AlignedBlob::WithinTestRegion(2, blob->bounding_box().left(), + blob->bounding_box().bottom())) { + HandleClick(blob->bounding_box().left() + 1, + blob->bounding_box().bottom() + 1); + tprintf("Clean on pass 3!\n"); + } +} + +// Partition creation. Accumulates vertical and horizontal text chains, +// puts the remaining blobs in as unknowns, and then merges/splits to +// minimize overlap and smoothes the types with neighbours and the color +// image if provided. rerotation is used to rotate the coordinate space +// back to the nontext_map_ image. +// If find_problems is true, detects possible noise pollution by the amount +// of partition overlap that is created by the diacritics. If excessive, the +// noise is separated out into diacritic blobs, and PFR_NOISE is returned. +// [TODO(rays): if the partition overlap is caused by heavy skew, deskews +// the components, saves the skew_angle and returns PFR_SKEW.] If the return +// is not PFR_OK, the job is incomplete, and FindInitialPartitions must be +// called again after cleaning up the partly done work. +PartitionFindResult StrokeWidth::FindInitialPartitions( + PageSegMode pageseg_mode, const FCOORD& rerotation, bool find_problems, + TO_BLOCK* block, BLOBNBOX_LIST* diacritic_blobs, + ColPartitionGrid* part_grid, ColPartition_LIST* big_parts, + FCOORD* skew_angle) { + if (!FindingHorizontalOnly(pageseg_mode)) FindVerticalTextChains(part_grid); + if (!FindingVerticalOnly(pageseg_mode)) FindHorizontalTextChains(part_grid); +#ifndef GRAPHICS_DISABLED + if (textord_tabfind_show_strokewidths) { + chains_win_ = MakeWindow(0, 400, "Initial text chains"); + part_grid->DisplayBoxes(chains_win_); + projection_->DisplayProjection(); + } +#endif + if (find_problems) { + // TODO(rays) Do something to find skew, set skew_angle and return if there + // is some. + } + part_grid->SplitOverlappingPartitions(big_parts); + EasyMerges(part_grid); + RemoveLargeUnusedBlobs(block, part_grid, big_parts); + TBOX grid_box(bleft(), tright()); + while (part_grid->GridSmoothNeighbours(BTFT_CHAIN, nontext_map_, grid_box, + rerotation)); + while (part_grid->GridSmoothNeighbours(BTFT_NEIGHBOURS, nontext_map_, + grid_box, rerotation)); + int pre_overlap = part_grid->ComputeTotalOverlap(nullptr); + TestDiacritics(part_grid, block); + MergeDiacritics(block, part_grid); + if (find_problems && diacritic_blobs != nullptr && + DetectAndRemoveNoise(pre_overlap, grid_box, block, part_grid, + diacritic_blobs)) { + return PFR_NOISE; + } +#ifndef GRAPHICS_DISABLED + if (textord_tabfind_show_strokewidths) { + textlines_win_ = MakeWindow(400, 400, "GoodTextline blobs"); + part_grid->DisplayBoxes(textlines_win_); + diacritics_win_ = DisplayDiacritics("Diacritics", 0, 0, block); + } +#endif + PartitionRemainingBlobs(pageseg_mode, part_grid); + part_grid->SplitOverlappingPartitions(big_parts); + EasyMerges(part_grid); + while (part_grid->GridSmoothNeighbours(BTFT_CHAIN, nontext_map_, grid_box, + rerotation)); + while (part_grid->GridSmoothNeighbours(BTFT_NEIGHBOURS, nontext_map_, + grid_box, rerotation)); + // Now eliminate strong stuff in a sea of the opposite. + while (part_grid->GridSmoothNeighbours(BTFT_STRONG_CHAIN, nontext_map_, + grid_box, rerotation)); +#ifndef GRAPHICS_DISABLED + if (textord_tabfind_show_strokewidths) { + smoothed_win_ = MakeWindow(800, 400, "Smoothed blobs"); + part_grid->DisplayBoxes(smoothed_win_); + } +#endif + return PFR_OK; +} + +// Detects noise by a significant increase in partition overlap from +// pre_overlap to now, and removes noise from the union of all the overlapping +// partitions, placing the blobs in diacritic_blobs. Returns true if any noise +// was found and removed. +bool StrokeWidth::DetectAndRemoveNoise(int pre_overlap, const TBOX& grid_box, + TO_BLOCK* block, + ColPartitionGrid* part_grid, + BLOBNBOX_LIST* diacritic_blobs) { + ColPartitionGrid* noise_grid = nullptr; + int post_overlap = part_grid->ComputeTotalOverlap(&noise_grid); + if (pre_overlap == 0) pre_overlap = 1; + BLOBNBOX_IT diacritic_it(diacritic_blobs); + if (noise_grid != nullptr) { + if (post_overlap > pre_overlap * kNoiseOverlapGrowthFactor && + post_overlap > grid_box.area() * kNoiseOverlapAreaFactor) { + // This is noisy enough to fix. +#ifndef GRAPHICS_DISABLED + if (textord_tabfind_show_strokewidths) { + ScrollView* noise_win = MakeWindow(1000, 500, "Noise Areas"); + noise_grid->DisplayBoxes(noise_win); + } +#endif + part_grid->DeleteNonLeaderParts(); + BLOBNBOX_IT blob_it(&block->noise_blobs); + ColPartitionGridSearch rsearch(noise_grid); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + BLOBNBOX* blob = blob_it.data(); + blob->ClearNeighbours(); + if (!blob->IsDiacritic() || blob->owner() != nullptr) + continue; // Not a noise candidate. + TBOX search_box(blob->bounding_box()); + search_box.pad(gridsize(), gridsize()); + rsearch.StartRectSearch(search_box); + ColPartition* part = rsearch.NextRectSearch(); + if (part != nullptr) { + // Consider blob as possible noise. + blob->set_owns_cblob(true); + blob->compute_bounding_box(); + diacritic_it.add_after_then_move(blob_it.extract()); + } + } + noise_grid->DeleteParts(); + delete noise_grid; + return true; + } + noise_grid->DeleteParts(); + delete noise_grid; + } + return false; +} + +// Helper verifies that blob's neighbour in direction dir is good to add to a +// vertical text chain by returning the neighbour if it is not null, not owned, +// and not uniquely horizontal, as well as its neighbour in the opposite +// direction is blob. +static BLOBNBOX* MutualUnusedVNeighbour(const BLOBNBOX* blob, + BlobNeighbourDir dir) { + BLOBNBOX* next_blob = blob->neighbour(dir); + if (next_blob == nullptr || next_blob->owner() != nullptr || + next_blob->UniquelyHorizontal()) + return nullptr; + if (next_blob->neighbour(DirOtherWay(dir)) == blob) + return next_blob; + return nullptr; +} + +// Finds vertical chains of text-like blobs and puts them in ColPartitions. +void StrokeWidth::FindVerticalTextChains(ColPartitionGrid* part_grid) { + // A PageSegMode that forces vertical textlines with the current rotation. + PageSegMode pageseg_mode = + rerotation_.y() == 0.0f ? PSM_SINGLE_BLOCK_VERT_TEXT : PSM_SINGLE_COLUMN; + BlobGridSearch gsearch(this); + BLOBNBOX* bbox; + gsearch.StartFullSearch(); + while ((bbox = gsearch.NextFullSearch()) != nullptr) { + // Only process boxes that have no horizontal hope and have not yet + // been included in a chain. + BLOBNBOX* blob; + if (bbox->owner() == nullptr && bbox->UniquelyVertical() && + (blob = MutualUnusedVNeighbour(bbox, BND_ABOVE)) != nullptr) { + // Put all the linked blobs into a ColPartition. + ColPartition* part = new ColPartition(BRT_VERT_TEXT, ICOORD(0, 1)); + part->AddBox(bbox); + while (blob != nullptr) { + part->AddBox(blob); + blob = MutualUnusedVNeighbour(blob, BND_ABOVE); + } + blob = MutualUnusedVNeighbour(bbox, BND_BELOW); + while (blob != nullptr) { + part->AddBox(blob); + blob = MutualUnusedVNeighbour(blob, BND_BELOW); + } + CompletePartition(pageseg_mode, part, part_grid); + } + } +} + +// Helper verifies that blob's neighbour in direction dir is good to add to a +// horizontal text chain by returning the neighbour if it is not null, not +// owned, and not uniquely vertical, as well as its neighbour in the opposite +// direction is blob. +static BLOBNBOX* MutualUnusedHNeighbour(const BLOBNBOX* blob, + BlobNeighbourDir dir) { + BLOBNBOX* next_blob = blob->neighbour(dir); + if (next_blob == nullptr || next_blob->owner() != nullptr || + next_blob->UniquelyVertical()) + return nullptr; + if (next_blob->neighbour(DirOtherWay(dir)) == blob) + return next_blob; + return nullptr; +} + +// Finds horizontal chains of text-like blobs and puts them in ColPartitions. +void StrokeWidth::FindHorizontalTextChains(ColPartitionGrid* part_grid) { + // A PageSegMode that forces horizontal textlines with the current rotation. + PageSegMode pageseg_mode = + rerotation_.y() == 0.0f ? PSM_SINGLE_COLUMN : PSM_SINGLE_BLOCK_VERT_TEXT; + BlobGridSearch gsearch(this); + BLOBNBOX* bbox; + gsearch.StartFullSearch(); + while ((bbox = gsearch.NextFullSearch()) != nullptr) { + BLOBNBOX* blob; + if (bbox->owner() == nullptr && bbox->UniquelyHorizontal() && + (blob = MutualUnusedHNeighbour(bbox, BND_RIGHT)) != nullptr) { + // Put all the linked blobs into a ColPartition. + ColPartition* part = new ColPartition(BRT_TEXT, ICOORD(0, 1)); + part->AddBox(bbox); + while (blob != nullptr) { + part->AddBox(blob); + blob = MutualUnusedHNeighbour(blob, BND_RIGHT); + } + blob = MutualUnusedHNeighbour(bbox, BND_LEFT); + while (blob != nullptr) { + part->AddBox(blob); + blob = MutualUnusedVNeighbour(blob, BND_LEFT); + } + CompletePartition(pageseg_mode, part, part_grid); + } + } +} + +// Finds diacritics and saves their base character in the blob. +// The objective is to move all diacritics to the noise_blobs list, so +// they don't mess up early textline finding/merging, or force splits +// on textlines that overlap a bit. Blobs that become diacritics must be +// either part of no ColPartition (nullptr owner) or in a small partition in +// which ALL the blobs are diacritics, in which case the partition is +// exploded (deleted) back to its blobs. +void StrokeWidth::TestDiacritics(ColPartitionGrid* part_grid, TO_BLOCK* block) { + BlobGrid small_grid(gridsize(), bleft(), tright()); + small_grid.InsertBlobList(&block->noise_blobs); + small_grid.InsertBlobList(&block->blobs); + int medium_diacritics = 0; + int small_diacritics = 0; + BLOBNBOX_IT small_it(&block->noise_blobs); + for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) { + BLOBNBOX* blob = small_it.data(); + if (blob->owner() == nullptr && !blob->IsDiacritic() && + DiacriticBlob(&small_grid, blob)) { + ++small_diacritics; + } + } + BLOBNBOX_IT blob_it(&block->blobs); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + BLOBNBOX* blob = blob_it.data(); + if (blob->IsDiacritic()) { + small_it.add_to_end(blob_it.extract()); + continue; // Already a diacritic. + } + ColPartition* part = blob->owner(); + if (part == nullptr && DiacriticBlob(&small_grid, blob)) { + ++medium_diacritics; + RemoveBBox(blob); + small_it.add_to_end(blob_it.extract()); + } else if (part != nullptr && !part->block_owned() && + part->boxes_count() < 3) { + // We allow blobs in small partitions to become diacritics if ALL the + // blobs in the partition qualify as we can then cleanly delete the + // partition, turn all the blobs in it to diacritics and they can be + // merged into the base character partition more easily than merging + // the partitions. + BLOBNBOX_C_IT box_it(part->boxes()); + for (box_it.mark_cycle_pt(); !box_it.cycled_list() && + DiacriticBlob(&small_grid, box_it.data()); + box_it.forward()); + if (box_it.cycled_list()) { + // They are all good. + while (!box_it.empty()) { + // Liberate the blob from its partition so it can be treated + // as a diacritic and merged explicitly with the base part. + // The blob is really owned by the block. The partition "owner" + // is nulled to allow the blob to get merged with its base character + // partition. + BLOBNBOX* box = box_it.extract(); + box->set_owner(nullptr); + box_it.forward(); + ++medium_diacritics; + // We remove the blob from the grid so it isn't found by subsequent + // searches where we might not want to include diacritics. + RemoveBBox(box); + } + // We only move the one blob to the small list here, but the others + // all get moved by the test at the top of the loop. + small_it.add_to_end(blob_it.extract()); + part_grid->RemoveBBox(part); + delete part; + } + } else if (AlignedBlob::WithinTestRegion(2, blob->bounding_box().left(), + blob->bounding_box().bottom())) { + tprintf("Blob not available to be a diacritic at:"); + blob->bounding_box().print(); + } + } + if (textord_tabfind_show_strokewidths) { + tprintf("Found %d small diacritics, %d medium\n", + small_diacritics, medium_diacritics); + } +} + +// Searches this grid for an appropriately close and sized neighbour of the +// given [small] blob. If such a blob is found, the diacritic base is saved +// in the blob and true is returned. +// The small_grid is a secondary grid that contains the small/noise objects +// that are not in this grid, but may be useful for determining a connection +// between blob and its potential base character. (See DiacriticXGapFilled.) +bool StrokeWidth::DiacriticBlob(BlobGrid* small_grid, BLOBNBOX* blob) { + if (BLOBNBOX::UnMergeableType(blob->region_type()) || + blob->region_type() == BRT_VERT_TEXT) + return false; + TBOX small_box(blob->bounding_box()); + bool debug = AlignedBlob::WithinTestRegion(2, small_box.left(), + small_box.bottom()); + if (debug) { + tprintf("Testing blob for diacriticness at:"); + small_box.print(); + } + int x = (small_box.left() + small_box.right()) / 2; + int y = (small_box.bottom() + small_box.top()) / 2; + int grid_x, grid_y; + GridCoords(x, y, &grid_x, &grid_y); + int height = small_box.height(); + // Setup a rectangle search to find its nearest base-character neighbour. + // We keep 2 different best candidates: + // best_x_overlap is a category of base characters that have an overlap in x + // (like a acute) in which we look for the least y-gap, computed using the + // projection to favor base characters in the same textline. + // best_y_overlap is a category of base characters that have no x overlap, + // (nominally a y-overlap is preferrecd but not essential) in which we + // look for the least weighted sum of x-gap and y-gap, with x-gap getting + // a lower weight to catch quotes at the end of a textline. + // NOTE that x-gap and y-gap are measured from the nearest side of the base + // character to the FARTHEST side of the diacritic to allow small diacritics + // to be a reasonable distance away, but not big diacritics. + BLOBNBOX* best_x_overlap = nullptr; + BLOBNBOX* best_y_overlap = nullptr; + int best_total_dist = 0; + int best_y_gap = 0; + TBOX best_xbox; + // TODO(rays) the search box could be setup using the projection as a guide. + TBOX search_box(small_box); + int x_pad = IntCastRounded(gridsize() * kDiacriticXPadRatio); + int y_pad = IntCastRounded(gridsize() * kDiacriticYPadRatio); + search_box.pad(x_pad, y_pad); + BlobGridSearch rsearch(this); + rsearch.SetUniqueMode(true); + int min_height = height * kMinDiacriticSizeRatio; + rsearch.StartRectSearch(search_box); + BLOBNBOX* neighbour; + while ((neighbour = rsearch.NextRectSearch()) != nullptr) { + if (BLOBNBOX::UnMergeableType(neighbour->region_type()) || + neighbour == blob || neighbour->owner() == blob->owner()) + continue; + TBOX nbox = neighbour->bounding_box(); + if (neighbour->owner() == nullptr || neighbour->owner()->IsVerticalType() || + (neighbour->flow() != BTFT_CHAIN && + neighbour->flow() != BTFT_STRONG_CHAIN)) { + if (debug) { + tprintf("Neighbour not strong enough:"); + nbox.print(); + } + continue; // Diacritics must be attached to strong text. + } + if (nbox.height() < min_height) { + if (debug) { + tprintf("Neighbour not big enough:"); + nbox.print(); + } + continue; // Too small to be the base character. + } + int x_gap = small_box.x_gap(nbox); + int y_gap = small_box.y_gap(nbox); + int total_distance = projection_->DistanceOfBoxFromBox(small_box, nbox, + true, denorm_, + debug); + if (debug) tprintf("xgap=%d, y=%d, total dist=%d\n", + x_gap, y_gap, total_distance); + if (total_distance > + neighbour->owner()->median_height() * kMaxDiacriticDistanceRatio) { + if (debug) { + tprintf("Neighbour with median size %d too far away:", + neighbour->owner()->median_height()); + neighbour->bounding_box().print(); + } + continue; // Diacritics must not be too distant. + } + if (x_gap <= 0) { + if (debug) { + tprintf("Computing reduced box for :"); + nbox.print(); + } + int left = small_box.left() - small_box.width(); + int right = small_box.right() + small_box.width(); + nbox = neighbour->BoundsWithinLimits(left, right); + y_gap = small_box.y_gap(nbox); + if (best_x_overlap == nullptr || y_gap < best_y_gap) { + best_x_overlap = neighbour; + best_xbox = nbox; + best_y_gap = y_gap; + if (debug) { + tprintf("New best:"); + nbox.print(); + } + } else if (debug) { + tprintf("Shrunken box doesn't win:"); + nbox.print(); + } + } else if (blob->ConfirmNoTabViolation(*neighbour)) { + if (best_y_overlap == nullptr || total_distance < best_total_dist) { + if (debug) { + tprintf("New best y overlap:"); + nbox.print(); + } + best_y_overlap = neighbour; + best_total_dist = total_distance; + } else if (debug) { + tprintf("New y overlap box doesn't win:"); + nbox.print(); + } + } else if (debug) { + tprintf("Neighbour wrong side of a tab:"); + nbox.print(); + } + } + if (best_x_overlap != nullptr && + (best_y_overlap == nullptr || + best_xbox.major_y_overlap(best_y_overlap->bounding_box()))) { + blob->set_diacritic_box(best_xbox); + blob->set_base_char_blob(best_x_overlap); + if (debug) { + tprintf("DiacriticBlob OK! (x-overlap:"); + small_box.print(); + best_xbox.print(); + } + return true; + } + if (best_y_overlap != nullptr && + DiacriticXGapFilled(small_grid, small_box, + best_y_overlap->bounding_box()) && + NoNoiseInBetween(small_box, best_y_overlap->bounding_box())) { + blob->set_diacritic_box(best_y_overlap->bounding_box()); + blob->set_base_char_blob(best_y_overlap); + if (debug) { + tprintf("DiacriticBlob OK! (y-overlap:"); + small_box.print(); + best_y_overlap->bounding_box().print(); + } + return true; + } + if (debug) { + tprintf("DiacriticBlob fails:"); + small_box.print(); + tprintf("Best x+y gap = %d, y = %d\n", best_total_dist, best_y_gap); + if (best_y_overlap != nullptr) { + tprintf("XGapFilled=%d, NoiseBetween=%d\n", + DiacriticXGapFilled(small_grid, small_box, + best_y_overlap->bounding_box()), + NoNoiseInBetween(small_box, best_y_overlap->bounding_box())); + } + } + return false; +} + +// Returns true if there is no gap between the base char and the diacritic +// bigger than a fraction of the height of the base char: +// Eg: line end.....' +// The quote is a long way from the end of the line, yet it needs to be a +// diacritic. To determine that the quote is not part of an image, or +// a different text block, we check for other marks in the gap between +// the base char and the diacritic. +// '<--Diacritic +// |---------| +// | |<-toobig-gap-> +// | Base |<ok gap> +// |---------| x<-----Dot occupying gap +// The grid is const really. +bool StrokeWidth::DiacriticXGapFilled(BlobGrid* grid, + const TBOX& diacritic_box, + const TBOX& base_box) { + // Since most gaps are small, use an iterative algorithm to search the gap. + int max_gap = IntCastRounded(base_box.height() * + kMaxDiacriticGapToBaseCharHeight); + TBOX occupied_box(base_box); + int diacritic_gap; + while ((diacritic_gap = diacritic_box.x_gap(occupied_box)) > max_gap) { + TBOX search_box(occupied_box); + if (diacritic_box.left() > search_box.right()) { + // We are looking right. + search_box.set_left(search_box.right()); + search_box.set_right(search_box.left() + max_gap); + } else { + // We are looking left. + search_box.set_right(search_box.left()); + search_box.set_left(search_box.left() - max_gap); + } + BlobGridSearch rsearch(grid); + rsearch.StartRectSearch(search_box); + BLOBNBOX* neighbour; + while ((neighbour = rsearch.NextRectSearch()) != nullptr) { + const TBOX& nbox = neighbour->bounding_box(); + if (nbox.x_gap(diacritic_box) < diacritic_gap) { + if (nbox.left() < occupied_box.left()) + occupied_box.set_left(nbox.left()); + if (nbox.right() > occupied_box.right()) + occupied_box.set_right(nbox.right()); + break; + } + } + if (neighbour == nullptr) + return false; // Found a big gap. + } + return true; // The gap was filled. +} + +// Merges diacritics with the ColPartition of the base character blob. +void StrokeWidth::MergeDiacritics(TO_BLOCK* block, + ColPartitionGrid* part_grid) { + BLOBNBOX_IT small_it(&block->noise_blobs); + for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) { + BLOBNBOX* blob = small_it.data(); + if (blob->base_char_blob() != nullptr) { + ColPartition* part = blob->base_char_blob()->owner(); + // The base character must be owned by a partition and that partition + // must not be on the big_parts list (not block owned). + if (part != nullptr && !part->block_owned() && blob->owner() == nullptr && + blob->IsDiacritic()) { + // The partition has to be removed from the grid and reinserted + // because its bounding box may change. + part_grid->RemoveBBox(part); + part->AddBox(blob); + blob->set_region_type(part->blob_type()); + blob->set_flow(part->flow()); + blob->set_owner(part); + part_grid->InsertBBox(true, true, part); + } + // Set all base chars to nullptr before any blobs get deleted. + blob->set_base_char_blob(nullptr); + } + } +} + +// Any blobs on the large_blobs list of block that are still unowned by a +// ColPartition, are probably drop-cap or vertically touching so the blobs +// are removed to the big_parts list and treated separately. +void StrokeWidth::RemoveLargeUnusedBlobs(TO_BLOCK* block, + ColPartitionGrid* part_grid, + ColPartition_LIST* big_parts) { + BLOBNBOX_IT large_it(&block->large_blobs); + for (large_it.mark_cycle_pt(); !large_it.cycled_list(); large_it.forward()) { + BLOBNBOX* blob = large_it.data(); + ColPartition* big_part = blob->owner(); + if (big_part == nullptr) { + // Large blobs should have gone into partitions by now if they are + // genuine characters, so move any unowned ones out to the big parts + // list. This will include drop caps and vertically touching characters. + ColPartition::MakeBigPartition(blob, big_parts); + } + } +} + +// All remaining unused blobs are put in individual ColPartitions. +void StrokeWidth::PartitionRemainingBlobs(PageSegMode pageseg_mode, + ColPartitionGrid* part_grid) { + BlobGridSearch gsearch(this); + BLOBNBOX* bbox; + int prev_grid_x = -1; + int prev_grid_y = -1; + BLOBNBOX_CLIST cell_list; + BLOBNBOX_C_IT cell_it(&cell_list); + bool cell_all_noise = true; + gsearch.StartFullSearch(); + while ((bbox = gsearch.NextFullSearch()) != nullptr) { + int grid_x = gsearch.GridX(); + int grid_y = gsearch.GridY(); + if (grid_x != prev_grid_x || grid_y != prev_grid_y) { + // New cell. Process old cell. + MakePartitionsFromCellList(pageseg_mode, cell_all_noise, part_grid, + &cell_list); + cell_it.set_to_list(&cell_list); + prev_grid_x = grid_x; + prev_grid_y = grid_y; + cell_all_noise = true; + } + if (bbox->owner() == nullptr) { + cell_it.add_to_end(bbox); + if (bbox->flow() != BTFT_NONTEXT) + cell_all_noise = false; + } else { + cell_all_noise = false; + } + } + MakePartitionsFromCellList(pageseg_mode, cell_all_noise, part_grid, + &cell_list); +} + +// If combine, put all blobs in the cell_list into a single partition, otherwise +// put each one into its own partition. +void StrokeWidth::MakePartitionsFromCellList(PageSegMode pageseg_mode, + bool combine, + ColPartitionGrid* part_grid, + BLOBNBOX_CLIST* cell_list) { + if (cell_list->empty()) + return; + BLOBNBOX_C_IT cell_it(cell_list); + if (combine) { + BLOBNBOX* bbox = cell_it.extract(); + ColPartition* part = new ColPartition(bbox->region_type(), ICOORD(0, 1)); + part->AddBox(bbox); + part->set_flow(bbox->flow()); + for (cell_it.forward(); !cell_it.empty(); cell_it.forward()) { + part->AddBox(cell_it.extract()); + } + CompletePartition(pageseg_mode, part, part_grid); + } else { + for (; !cell_it.empty(); cell_it.forward()) { + BLOBNBOX* bbox = cell_it.extract(); + ColPartition* part = new ColPartition(bbox->region_type(), ICOORD(0, 1)); + part->set_flow(bbox->flow()); + part->AddBox(bbox); + CompletePartition(pageseg_mode, part, part_grid); + } + } +} + +// Helper function to finish setting up a ColPartition and insert into +// part_grid. +void StrokeWidth::CompletePartition(PageSegMode pageseg_mode, + ColPartition* part, + ColPartitionGrid* part_grid) { + part->ComputeLimits(); + TBOX box = part->bounding_box(); + bool debug = AlignedBlob::WithinTestRegion(2, box.left(), + box.bottom()); + int value = projection_->EvaluateColPartition(*part, denorm_, debug); + // Override value if pageseg_mode disagrees. + if (value > 0 && FindingVerticalOnly(pageseg_mode)) { + value = part->boxes_count() == 1 ? 0 : -2; + } else if (value < 0 && FindingHorizontalOnly(pageseg_mode)) { + value = part->boxes_count() == 1 ? 0 : 2; + } + part->SetRegionAndFlowTypesFromProjectionValue(value); + part->ClaimBoxes(); + part_grid->InsertBBox(true, true, part); +} + +// Merge partitions where the merge appears harmless. +// As this +void StrokeWidth::EasyMerges(ColPartitionGrid* part_grid) { + using namespace std::placeholders; // for _1, _2 + part_grid->Merges( + std::bind(&StrokeWidth::OrientationSearchBox, this, _1, _2), + std::bind(&StrokeWidth::ConfirmEasyMerge, this, _1, _2)); +} + +// Compute a search box based on the orientation of the partition. +// Returns true if a suitable box can be calculated. +// Callback for EasyMerges. +bool StrokeWidth::OrientationSearchBox(ColPartition* part, TBOX* box) { + if (part->IsVerticalType()) { + box->set_top(box->top() + box->width()); + box->set_bottom(box->bottom() - box->width()); + } else { + box->set_left(box->left() - box->height()); + box->set_right(box->right() + box->height()); + } + return true; +} + +// Merge confirmation callback for EasyMerges. +bool StrokeWidth::ConfirmEasyMerge(const ColPartition* p1, + const ColPartition* p2) { + ASSERT_HOST(p1 != nullptr && p2 != nullptr); + ASSERT_HOST(!p1->IsEmpty() && !p2->IsEmpty()); + if ((p1->flow() == BTFT_NONTEXT && p2->flow() >= BTFT_CHAIN) || + (p1->flow() >= BTFT_CHAIN && p2->flow() == BTFT_NONTEXT)) + return false; // Don't merge confirmed image with text. + if ((p1->IsVerticalType() || p2->IsVerticalType()) && + p1->HCoreOverlap(*p2) <= 0 && + ((!p1->IsSingleton() && + !p2->IsSingleton()) || + !p1->bounding_box().major_overlap(p2->bounding_box()))) + return false; // Overlap must be in the text line. + if ((p1->IsHorizontalType() || p2->IsHorizontalType()) && + p1->VCoreOverlap(*p2) <= 0 && + ((!p1->IsSingleton() && + !p2->IsSingleton()) || + (!p1->bounding_box().major_overlap(p2->bounding_box()) && + !p1->OKDiacriticMerge(*p2, false) && + !p2->OKDiacriticMerge(*p1, false)))) + return false; // Overlap must be in the text line. + if (!p1->ConfirmNoTabViolation(*p2)) + return false; + if (p1->flow() <= BTFT_NONTEXT && p2->flow() <= BTFT_NONTEXT) + return true; + return NoNoiseInBetween(p1->bounding_box(), p2->bounding_box()); +} + +// Returns true if there is no significant noise in between the boxes. +bool StrokeWidth::NoNoiseInBetween(const TBOX& box1, const TBOX& box2) const { + return ImageFind::BlankImageInBetween(box1, box2, grid_box_, rerotation_, + nontext_map_); +} + +#ifndef GRAPHICS_DISABLED + +/** Displays the blobs colored according to the number of good neighbours + * and the vertical/horizontal flow. + */ +ScrollView* StrokeWidth::DisplayGoodBlobs(const char* window_name, + int x, int y) { + auto window = MakeWindow(x, y, window_name); + // For every blob in the grid, display it. + window->Brush(ScrollView::NONE); + + // For every bbox in the grid, display it. + BlobGridSearch gsearch(this); + gsearch.StartFullSearch(); + BLOBNBOX* bbox; + while ((bbox = gsearch.NextFullSearch()) != nullptr) { + const TBOX& box = bbox->bounding_box(); + int left_x = box.left(); + int right_x = box.right(); + int top_y = box.top(); + int bottom_y = box.bottom(); + int goodness = bbox->GoodTextBlob(); + BlobRegionType blob_type = bbox->region_type(); + if (bbox->UniquelyVertical()) + blob_type = BRT_VERT_TEXT; + if (bbox->UniquelyHorizontal()) + blob_type = BRT_TEXT; + BlobTextFlowType flow = bbox->flow(); + if (flow == BTFT_NONE) { + if (goodness == 0) + flow = BTFT_NEIGHBOURS; + else if (goodness == 1) + flow = BTFT_CHAIN; + else + flow = BTFT_STRONG_CHAIN; + } + window->Pen(BLOBNBOX::TextlineColor(blob_type, flow)); + window->Rectangle(left_x, bottom_y, right_x, top_y); + } + window->Update(); + return window; +} + +static void DrawDiacriticJoiner(const BLOBNBOX* blob, ScrollView* window) { + const TBOX& blob_box(blob->bounding_box()); + int top = std::max(static_cast<int>(blob_box.top()), blob->base_char_top()); + int bottom = std::min(static_cast<int>(blob_box.bottom()), blob->base_char_bottom()); + int x = (blob_box.left() + blob_box.right()) / 2; + window->Line(x, top, x, bottom); +} + +// Displays blobs colored according to whether or not they are diacritics. +ScrollView* StrokeWidth::DisplayDiacritics(const char* window_name, + int x, int y, TO_BLOCK* block) { + auto window = MakeWindow(x, y, window_name); + // For every blob in the grid, display it. + window->Brush(ScrollView::NONE); + + BLOBNBOX_IT it(&block->blobs); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + BLOBNBOX* blob = it.data(); + if (blob->IsDiacritic()) { + window->Pen(ScrollView::GREEN); + DrawDiacriticJoiner(blob, window); + } else { + window->Pen(blob->BoxColor()); + } + const TBOX& box = blob->bounding_box(); + window->Rectangle(box.left(), box. bottom(), box.right(), box.top()); + } + it.set_to_list(&block->noise_blobs); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + BLOBNBOX* blob = it.data(); + if (blob->IsDiacritic()) { + window->Pen(ScrollView::GREEN); + DrawDiacriticJoiner(blob, window); + } else { + window->Pen(ScrollView::WHITE); + } + const TBOX& box = blob->bounding_box(); + window->Rectangle(box.left(), box. bottom(), box.right(), box.top()); + } + window->Update(); + return window; +} + +#endif // !GRAPHICS_DISABLED + +} // namespace tesseract. diff --git a/tesseract/src/textord/strokewidth.h b/tesseract/src/textord/strokewidth.h new file mode 100644 index 00000000..81b07c55 --- /dev/null +++ b/tesseract/src/textord/strokewidth.h @@ -0,0 +1,355 @@ +/////////////////////////////////////////////////////////////////////// +// File: strokewidth.h +// Description: Subclass of BBGrid to find uniformity of strokewidth. +// Author: Ray Smith +// Created: Mon Mar 31 16:17:01 PST 2008 +// +// (C) Copyright 2008, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_TEXTORD_STROKEWIDTH_H_ +#define TESSERACT_TEXTORD_STROKEWIDTH_H_ + +#include "blobbox.h" // BlobNeighourDir. +#include "blobgrid.h" // Base class. +#include "colpartitiongrid.h" +#include "textlineprojection.h" + +class DENORM; +class ScrollView; +class TO_BLOCK; + +namespace tesseract { + +class ColPartition_LIST; +class TabFind; +class TextlineProjection; + +// Misc enums to clarify bool arguments for direction-controlling args. +enum LeftOrRight { + LR_LEFT, + LR_RIGHT +}; + +// Return value from FindInitialPartitions indicates detection of severe +// skew or noise. +enum PartitionFindResult { + PFR_OK, // Everything is OK. + PFR_SKEW, // Skew was detected and rotated. + PFR_NOISE // Noise was detected and removed. +}; + +/** + * The StrokeWidth class holds all the normal and large blobs. + * It is used to find good large blobs and move them to the normal blobs + * by virtue of having a reasonable strokewidth compatible neighbour. + */ +class StrokeWidth : public BlobGrid { + public: + StrokeWidth(int gridsize, const ICOORD& bleft, const ICOORD& tright); + ~StrokeWidth() override; + + // Sets the neighbours member of the medium-sized blobs in the block. + // Searches on 4 sides of each blob for similar-sized, similar-strokewidth + // blobs and sets pointers to the good neighbours. + void SetNeighboursOnMediumBlobs(TO_BLOCK* block); + + // Sets the neighbour/textline writing direction members of the medium + // and large blobs with optional repair of broken CJK characters first. + // Repair of broken CJK is needed here because broken CJK characters + // can fool the textline direction detection algorithm. + void FindTextlineDirectionAndFixBrokenCJK(PageSegMode pageseg_mode, + bool cjk_merge, + TO_BLOCK* input_block); + + // To save computation, the process of generating partitions is broken + // into the following 4 steps: + // TestVerticalTextDirection + // CorrectForRotation (used only if a rotation is to be applied) + // FindLeaderPartitions + // GradeBlobsIntoPartitions. + // These functions are all required, in sequence, except for + // CorrectForRotation, which is not needed if no rotation is applied. + + // Types all the blobs as vertical or horizontal text or unknown and + // returns true if the majority are vertical. + // If the blobs are rotated, it is necessary to call CorrectForRotation + // after rotating everything, otherwise the work done here will be enough. + // If osd_blobs is not null, a list of blobs from the dominant textline + // direction are returned for use in orientation and script detection. + // find_vertical_text_ratio should be textord_tabfind_vertical_text_ratio. + bool TestVerticalTextDirection(double find_vertical_text_ratio, + TO_BLOCK* block, + BLOBNBOX_CLIST* osd_blobs); + + // Corrects the data structures for the given rotation. + void CorrectForRotation(const FCOORD& rerotation, + ColPartitionGrid* part_grid); + + // Finds leader partitions and inserts them into the give grid. + void FindLeaderPartitions(TO_BLOCK* block, + ColPartitionGrid* part_grid); + + // Finds and marks noise those blobs that look like bits of vertical lines + // that would otherwise screw up layout analysis. + void RemoveLineResidue(ColPartition_LIST* big_part_list); + + // Types all the blobs as vertical text or horizontal text or unknown and + // puts them into initial ColPartitions in the supplied part_grid. + // rerotation determines how to get back to the image coordinates from the + // blob coordinates (since they may have been rotated for vertical text). + // block is the single block for the whole page or rectangle to be OCRed. + // nontext_pix (full-size), is a binary mask used to prevent merges across + // photo/text boundaries. It is not kept beyond this function. + // denorm provides a mapping back to the image from the current blob + // coordinate space. + // projection provides a measure of textline density over the image and + // provides functions to assist with diacritic detection. It should be a + // pointer to a new TextlineProjection, and will be setup here. + // part_grid is the output grid of textline partitions. + // Large blobs that cause overlap are put in separate partitions and added + // to the big_parts list. + void GradeBlobsIntoPartitions(PageSegMode pageseg_mode, + const FCOORD& rerotation, TO_BLOCK* block, + Pix* nontext_pix, const DENORM* denorm, + bool cjk_script, TextlineProjection* projection, + BLOBNBOX_LIST* diacritic_blobs, + ColPartitionGrid* part_grid, + ColPartition_LIST* big_parts); + + // Handles a click event in a display window. + void HandleClick(int x, int y) override; + + private: + // Computes the noise_density_ by summing the number of elements in a + // neighbourhood of each grid cell. + void ComputeNoiseDensity(TO_BLOCK* block, TabFind* line_grid); + + // Detects and marks leader dots/dashes. + // Leaders are horizontal chains of small or noise blobs that look + // monospace according to ColPartition::MarkAsLeaderIfMonospaced(). + // Detected leaders become the only occupants of the block->small_blobs list. + // Non-leader small blobs get moved to the blobs list. + // Non-leader noise blobs remain singletons in the noise list. + // All small and noise blobs in high density regions are marked BTFT_NONTEXT. + // block is the single block for the whole page or rectangle to be OCRed. + // leader_parts is the output. + void FindLeadersAndMarkNoise(TO_BLOCK* block, + ColPartition_LIST* leader_parts); + + /** Inserts the block blobs (normal and large) into this grid. + * Blobs remain owned by the block. */ + void InsertBlobs(TO_BLOCK* block); + + // Fix broken CJK characters, using the fake joined blobs mechanism. + // Blobs are really merged, ie the master takes all the outlines and the + // others are deleted. + // Returns true if sufficient blobs are merged that it may be worth running + // again, due to a better estimate of character size. + bool FixBrokenCJK(TO_BLOCK* block); + + // Collect blobs that overlap or are within max_dist of the input bbox. + // Return them in the list of blobs and expand the bbox to be the union + // of all the boxes. not_this is excluded from the search, as are blobs + // that cause the merged box to exceed max_size in either dimension. + void AccumulateOverlaps(const BLOBNBOX* not_this, bool debug, + int max_size, int max_dist, + TBOX* bbox, BLOBNBOX_CLIST* blobs); + + // For each blob in this grid, Finds the textline direction to be horizontal + // or vertical according to distance to neighbours and 1st and 2nd order + // neighbours. Non-text tends to end up without a definite direction. + // Result is setting of the neighbours and vert_possible/horz_possible + // flags in the BLOBNBOXes currently in this grid. + // This function is called more than once if page orientation is uncertain, + // so display_if_debugging is true on the final call to display the results. + void FindTextlineFlowDirection(PageSegMode pageseg_mode, + bool display_if_debugging); + + // Sets the neighbours and good_stroke_neighbours members of the blob by + // searching close on all 4 sides. + // When finding leader dots/dashes, there is a slightly different rule for + // what makes a good neighbour. + // If activate_line_trap, then line-like objects are found and isolated. + void SetNeighbours(bool leaders, bool activate_line_trap, BLOBNBOX* blob); + + // Sets the good_stroke_neighbours member of the blob if it has a + // GoodNeighbour on the given side. + // Also sets the neighbour in the blob, whether or not a good one is found. + // Return value is the number of neighbours in the line trap size range. + // Leaders get extra special lenient treatment. + int FindGoodNeighbour(BlobNeighbourDir dir, bool leaders, BLOBNBOX* blob); + + // Makes the blob to be only horizontal or vertical where evidence + // is clear based on gaps of 2nd order neighbours. + void SetNeighbourFlows(BLOBNBOX* blob); + + // Nullify the neighbours in the wrong directions where the direction + // is clear-cut based on a distance margin. Good for isolating vertical + // text from neighbouring horizontal text. + void SimplifyObviousNeighbours(BLOBNBOX* blob); + + // Smoothes the vertical/horizontal type of the blob based on the + // 2nd-order neighbours. If reset_all is true, then all blobs are + // changed. Otherwise, only ambiguous blobs are processed. + void SmoothNeighbourTypes(PageSegMode pageseg_mode, bool desperate, + BLOBNBOX* blob); + + // Checks the left or right side of the given leader partition and sets the + // (opposite) leader_on_right or leader_on_left flags for blobs + // that are next to the given side of the given leader partition. + void MarkLeaderNeighbours(const ColPartition* part, LeftOrRight side); + + // Partition creation. Accumulates vertical and horizontal text chains, + // puts the remaining blobs in as unknowns, and then merges/splits to + // minimize overlap and smoothes the types with neighbours and the color + // image if provided. rerotation is used to rotate the coordinate space + // back to the nontext_map_ image. + // If find_problems is true, detects possible noise pollution by the amount + // of partition overlap that is created by the diacritics. If excessive, the + // noise is separated out into diacritic blobs, and PFR_NOISE is returned. + // [TODO(rays): if the partition overlap is caused by heavy skew, deskews + // the components, saves the skew_angle and returns PFR_SKEW.] If the return + // is not PFR_OK, the job is incomplete, and FindInitialPartitions must be + // called again after cleaning up the partly done work. + PartitionFindResult FindInitialPartitions(PageSegMode pageseg_mode, + const FCOORD& rerotation, + bool find_problems, TO_BLOCK* block, + BLOBNBOX_LIST* diacritic_blobs, + ColPartitionGrid* part_grid, + ColPartition_LIST* big_parts, + FCOORD* skew_angle); + // Detects noise by a significant increase in partition overlap from + // pre_overlap to now, and removes noise from the union of all the overlapping + // partitions, placing the blobs in diacritic_blobs. Returns true if any noise + // was found and removed. + bool DetectAndRemoveNoise(int pre_overlap, const TBOX& grid_box, + TO_BLOCK* block, ColPartitionGrid* part_grid, + BLOBNBOX_LIST* diacritic_blobs); + // Finds vertical chains of text-like blobs and puts them in ColPartitions. + void FindVerticalTextChains(ColPartitionGrid* part_grid); + // Finds horizontal chains of text-like blobs and puts them in ColPartitions. + void FindHorizontalTextChains(ColPartitionGrid* part_grid); + // Finds diacritics and saves their base character in the blob. + void TestDiacritics(ColPartitionGrid* part_grid, TO_BLOCK* block); + // Searches this grid for an appropriately close and sized neighbour of the + // given [small] blob. If such a blob is found, the diacritic base is saved + // in the blob and true is returned. + // The small_grid is a secondary grid that contains the small/noise objects + // that are not in this grid, but may be useful for determining a connection + // between blob and its potential base character. (See DiacriticXGapFilled.) + bool DiacriticBlob(BlobGrid* small_grid, BLOBNBOX* blob); + // Returns true if there is no gap between the base char and the diacritic + // bigger than a fraction of the height of the base char: + // Eg: line end.....' + // The quote is a long way from the end of the line, yet it needs to be a + // diacritic. To determine that the quote is not part of an image, or + // a different text block, we check for other marks in the gap between + // the base char and the diacritic. + // '<--Diacritic + // |---------| + // | |<-toobig-gap-> + // | Base |<ok gap> + // |---------| x<-----Dot occupying gap + // The grid is const really. + bool DiacriticXGapFilled(BlobGrid* grid, const TBOX& diacritic_box, + const TBOX& base_box); + // Merges diacritics with the ColPartition of the base character blob. + void MergeDiacritics(TO_BLOCK* block, ColPartitionGrid* part_grid); + // Any blobs on the large_blobs list of block that are still unowned by a + // ColPartition, are probably drop-cap or vertically touching so the blobs + // are removed to the big_parts list and treated separately. + void RemoveLargeUnusedBlobs(TO_BLOCK* block, + ColPartitionGrid* part_grid, + ColPartition_LIST* big_parts); + + // All remaining unused blobs are put in individual ColPartitions. + void PartitionRemainingBlobs(PageSegMode pageseg_mode, + ColPartitionGrid* part_grid); + + // If combine, put all blobs in the cell_list into a single partition, + // otherwise put each one into its own partition. + void MakePartitionsFromCellList(PageSegMode pageseg_mode, bool combine, + ColPartitionGrid* part_grid, + BLOBNBOX_CLIST* cell_list); + + // Helper function to finish setting up a ColPartition and insert into + // part_grid. + void CompletePartition(PageSegMode pageseg_mode, ColPartition* part, + ColPartitionGrid* part_grid); + + // Helper returns true if we are looking only for vertical textlines, + // taking into account any rotation that has been done. + bool FindingVerticalOnly(PageSegMode pageseg_mode) const { + if (rerotation_.y() == 0.0f) { + return pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT; + } + return !PSM_ORIENTATION_ENABLED(pageseg_mode) && + pageseg_mode != PSM_SINGLE_BLOCK_VERT_TEXT; + } + // Helper returns true if we are looking only for horizontal textlines, + // taking into account any rotation that has been done. + bool FindingHorizontalOnly(PageSegMode pageseg_mode) const { + if (rerotation_.y() == 0.0f) { + return !PSM_ORIENTATION_ENABLED(pageseg_mode) && + pageseg_mode != PSM_SINGLE_BLOCK_VERT_TEXT; + } + return pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT; + } + + // Merge partitions where the merge appears harmless. + void EasyMerges(ColPartitionGrid* part_grid); + + // Compute a search box based on the orientation of the partition. + // Returns true if a suitable box can be calculated. + // Callback for EasyMerges. + bool OrientationSearchBox(ColPartition* part, TBOX* box); + + // Merge confirmation callback for EasyMerges. + bool ConfirmEasyMerge(const ColPartition* p1, const ColPartition* p2); + + // Returns true if there is no significant noise in between the boxes. + bool NoNoiseInBetween(const TBOX& box1, const TBOX& box2) const; + + // Displays the blobs colored according to the number of good neighbours + // and the vertical/horizontal flow. + ScrollView* DisplayGoodBlobs(const char* window_name, int x, int y); + + // Displays blobs colored according to whether or not they are diacritics. + ScrollView* DisplayDiacritics(const char* window_name, + int x, int y, TO_BLOCK* block); + + private: + // Image map of photo/noise areas on the page. Borrowed pointer (not owned.) + Pix* nontext_map_; + // Textline projection map. Borrowed pointer. + TextlineProjection* projection_; + // DENORM used by projection_ to get back to image coords. Borrowed pointer. + const DENORM* denorm_; + // Bounding box of the grid. + TBOX grid_box_; + // Rerotation to get back to the original image. + FCOORD rerotation_; + // Windows for debug display. + ScrollView* leaders_win_; + ScrollView* initial_widths_win_; + ScrollView* widths_win_; + ScrollView* chains_win_; + ScrollView* diacritics_win_; + ScrollView* textlines_win_; + ScrollView* smoothed_win_; +}; + +} // namespace tesseract. + +#endif // TESSERACT_TEXTORD_STROKEWIDTH_H_ diff --git a/tesseract/src/textord/tabfind.cpp b/tesseract/src/textord/tabfind.cpp new file mode 100644 index 00000000..c88421c1 --- /dev/null +++ b/tesseract/src/textord/tabfind.cpp @@ -0,0 +1,1438 @@ +/////////////////////////////////////////////////////////////////////// +// File: tabfind.cpp +// Description: Subclass of BBGrid to find vertically aligned blobs. +// Author: Ray Smith +// +// (C) Copyright 2008, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#include "tabfind.h" +#include "alignedblob.h" +#include "colpartitiongrid.h" +#include "detlinefit.h" +#include "host.h" // for NearlyEqual +#include "linefind.h" + +#include <algorithm> + +namespace tesseract { + +// Multiple of box size to search for initial gaps. +const int kTabRadiusFactor = 5; +// Min and Max multiple of height to search vertically when extrapolating. +const int kMinVerticalSearch = 3; +const int kMaxVerticalSearch = 12; +const int kMaxRaggedSearch = 25; +// Minimum number of lines in a column width to make it interesting. +const int kMinLinesInColumn = 10; +// Minimum width of a column to be interesting. +const int kMinColumnWidth = 200; +// Minimum fraction of total column lines for a column to be interesting. +const double kMinFractionalLinesInColumn = 0.125; +// Fraction of height used as alignment tolerance for aligned tabs. +const double kAlignedFraction = 0.03125; +// Maximum gutter width (in absolute inch) that we care about +const double kMaxGutterWidthAbsolute = 2.00; +// Multiplier of gridsize for min gutter width of TT_MAYBE_RAGGED blobs. +const int kRaggedGutterMultiple = 5; +// Min aspect ratio of tall objects to be considered a separator line. +// (These will be ignored in searching the gutter for obstructions.) +const double kLineFragmentAspectRatio = 10.0; +// Min number of points to accept after evaluation. +const int kMinEvaluatedTabs = 3; +// Up to 30 degrees is allowed for rotations of diacritic blobs. +// Keep this value slightly larger than kCosSmallAngle in blobbox.cpp +// so that the assert there never fails. +const double kCosMaxSkewAngle = 0.866025; + +static BOOL_VAR(textord_tabfind_show_initialtabs, false, "Show tab candidates"); +static BOOL_VAR(textord_tabfind_show_finaltabs, false, "Show tab vectors"); + +TabFind::TabFind(int gridsize, const ICOORD& bleft, const ICOORD& tright, + TabVector_LIST* vlines, int vertical_x, int vertical_y, + int resolution) + : AlignedBlob(gridsize, bleft, tright), + resolution_(resolution), + image_origin_(0, tright.y() - 1), + v_it_(&vectors_) { + width_cb_ = nullptr; + v_it_.add_list_after(vlines); + SetVerticalSkewAndParallelize(vertical_x, vertical_y); + using namespace std::placeholders; // for _1 + width_cb_ = std::bind(&TabFind::CommonWidth, this, _1); +} + +TabFind::~TabFind() { +} + +///////////////// PUBLIC functions (mostly used by TabVector). ////////////// + +// Insert a list of blobs into the given grid (not necessarily this). +// If take_ownership is true, then the blobs are removed from the source list. +// See InsertBlob for the other arguments. +// It would seem to make more sense to swap this and grid, but this way +// around allows grid to not be derived from TabFind, eg a ColPartitionGrid, +// while the grid that provides the tab stops(this) has to be derived from +// TabFind. +void TabFind::InsertBlobsToGrid(bool h_spread, bool v_spread, + BLOBNBOX_LIST* blobs, + BBGrid<BLOBNBOX, BLOBNBOX_CLIST, + BLOBNBOX_C_IT>* grid) { + BLOBNBOX_IT blob_it(blobs); + int b_count = 0; + int reject_count = 0; + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + BLOBNBOX* blob = blob_it.data(); +// if (InsertBlob(true, true, blob, grid)) { + if (InsertBlob(h_spread, v_spread, blob, grid)) { + ++b_count; + } else { + ++reject_count; + } + } + if (textord_debug_tabfind) { + tprintf("Inserted %d blobs into grid, %d rejected.\n", + b_count, reject_count); + } +} + +// Insert a single blob into the given grid (not necessarily this). +// If h_spread, then all cells covered horizontally by the box are +// used, otherwise, just the bottom-left. Similarly for v_spread. +// A side effect is that the left and right rule edges of the blob are +// set according to the tab vectors in this (not grid). +bool TabFind::InsertBlob(bool h_spread, bool v_spread, BLOBNBOX* blob, + BBGrid<BLOBNBOX, BLOBNBOX_CLIST, + BLOBNBOX_C_IT>* grid) { + TBOX box = blob->bounding_box(); + blob->set_left_rule(LeftEdgeForBox(box, false, false)); + blob->set_right_rule(RightEdgeForBox(box, false, false)); + blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false)); + blob->set_right_crossing_rule(RightEdgeForBox(box, true, false)); + if (blob->joined_to_prev()) + return false; + grid->InsertBBox(h_spread, v_spread, blob); + return true; +} + +// Calls SetBlobRuleEdges for all the blobs in the given block. +void TabFind::SetBlockRuleEdges(TO_BLOCK* block) { + SetBlobRuleEdges(&block->blobs); + SetBlobRuleEdges(&block->small_blobs); + SetBlobRuleEdges(&block->noise_blobs); + SetBlobRuleEdges(&block->large_blobs); +} + +// Sets the left and right rule and crossing_rules for the blobs in the given +// list by fiding the next outermost tabvectors for each blob. +void TabFind::SetBlobRuleEdges(BLOBNBOX_LIST* blobs) { + BLOBNBOX_IT blob_it(blobs); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + BLOBNBOX* blob = blob_it.data(); + TBOX box = blob->bounding_box(); + blob->set_left_rule(LeftEdgeForBox(box, false, false)); + blob->set_right_rule(RightEdgeForBox(box, false, false)); + blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false)); + blob->set_right_crossing_rule(RightEdgeForBox(box, true, false)); + } +} + +// Returns the gutter width of the given TabVector between the given y limits. +// Also returns x-shift to be added to the vector to clear any intersecting +// blobs. The shift is deducted from the returned gutter. +// If ignore_unmergeables is true, then blobs of UnMergeableType are +// ignored as if they don't exist. (Used for text on image.) +// max_gutter_width is used as the maximum width worth searching for in case +// there is nothing near the TabVector. +int TabFind::GutterWidth(int bottom_y, int top_y, const TabVector& v, + bool ignore_unmergeables, int max_gutter_width, + int* required_shift) { + bool right_to_left = v.IsLeftTab(); + int bottom_x = v.XAtY(bottom_y); + int top_x = v.XAtY(top_y); + int start_x = right_to_left ? std::max(top_x, bottom_x) : std::min(top_x, bottom_x); + BlobGridSearch sidesearch(this); + sidesearch.StartSideSearch(start_x, bottom_y, top_y); + int min_gap = max_gutter_width; + *required_shift = 0; + BLOBNBOX* blob = nullptr; + while ((blob = sidesearch.NextSideSearch(right_to_left)) != nullptr) { + const TBOX& box = blob->bounding_box(); + if (box.bottom() >= top_y || box.top() <= bottom_y) + continue; // Doesn't overlap enough. + if (box.height() >= gridsize() * 2 && + box.height() > box.width() * kLineFragmentAspectRatio) { + // Skip likely separator line residue. + continue; + } + if (ignore_unmergeables && BLOBNBOX::UnMergeableType(blob->region_type())) + continue; // Skip non-text if required. + int mid_y = (box.bottom() + box.top()) / 2; + // We use the x at the mid-y so that the required_shift guarantees + // to clear all the blobs on the tab-stop. If we use the min/max + // of x at top/bottom of the blob, then exactness would be required, + // which is not a good thing. + int tab_x = v.XAtY(mid_y); + int gap; + if (right_to_left) { + gap = tab_x - box.right(); + if (gap < 0 && box.left() - tab_x < *required_shift) + *required_shift = box.left() - tab_x; + } else { + gap = box.left() - tab_x; + if (gap < 0 && box.right() - tab_x > *required_shift) + *required_shift = box.right() - tab_x; + } + if (gap > 0 && gap < min_gap) + min_gap = gap; + } + // Result may be negative, in which case, this is a really bad tabstop. + return min_gap - abs(*required_shift); +} + +// Find the gutter width and distance to inner neighbour for the given blob. +void TabFind::GutterWidthAndNeighbourGap(int tab_x, int mean_height, + int max_gutter, bool left, + BLOBNBOX* bbox, int* gutter_width, + int* neighbour_gap) { + const TBOX& box = bbox->bounding_box(); + // The gutter and internal sides of the box. + int gutter_x = left ? box.left() : box.right(); + int internal_x = left ? box.right() : box.left(); + // On ragged edges, the gutter side of the box is away from the tabstop. + int tab_gap = left ? gutter_x - tab_x : tab_x - gutter_x; + *gutter_width = max_gutter; + // If the box is away from the tabstop, we need to increase + // the allowed gutter width. + if (tab_gap > 0) + *gutter_width += tab_gap; + bool debug = WithinTestRegion(2, box.left(), box.bottom()); + if (debug) + tprintf("Looking in gutter\n"); + // Find the nearest blob on the outside of the column. + BLOBNBOX* gutter_bbox = AdjacentBlob(bbox, left, + bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0, + *gutter_width, box.top(), box.bottom()); + if (gutter_bbox != nullptr) { + const TBOX& gutter_box = gutter_bbox->bounding_box(); + *gutter_width = left ? tab_x - gutter_box.right() + : gutter_box.left() - tab_x; + } + if (*gutter_width >= max_gutter) { + // If there is no box because a tab was in the way, get the tab coord. + TBOX gutter_box(box); + if (left) { + gutter_box.set_left(tab_x - max_gutter - 1); + gutter_box.set_right(tab_x - max_gutter); + int tab_gutter = RightEdgeForBox(gutter_box, true, false); + if (tab_gutter < tab_x - 1) + *gutter_width = tab_x - tab_gutter; + } else { + gutter_box.set_left(tab_x + max_gutter); + gutter_box.set_right(tab_x + max_gutter + 1); + int tab_gutter = LeftEdgeForBox(gutter_box, true, false); + if (tab_gutter > tab_x + 1) + *gutter_width = tab_gutter - tab_x; + } + } + if (*gutter_width > max_gutter) + *gutter_width = max_gutter; + // Now look for a neighbour on the inside. + if (debug) + tprintf("Looking for neighbour\n"); + BLOBNBOX* neighbour = AdjacentBlob(bbox, !left, + bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0, + *gutter_width, box.top(), box.bottom()); + int neighbour_edge = left ? RightEdgeForBox(box, true, false) + : LeftEdgeForBox(box, true, false); + if (neighbour != nullptr) { + const TBOX& n_box = neighbour->bounding_box(); + if (debug) { + tprintf("Found neighbour:"); + n_box.print(); + } + if (left && n_box.left() < neighbour_edge) + neighbour_edge = n_box.left(); + else if (!left && n_box.right() > neighbour_edge) + neighbour_edge = n_box.right(); + } + *neighbour_gap = left ? neighbour_edge - internal_x + : internal_x - neighbour_edge; +} + +// Return the x-coord that corresponds to the right edge for the given +// box. If there is a rule line to the right that vertically overlaps it, +// then return the x-coord of the rule line, otherwise return the right +// edge of the page. For details see RightTabForBox below. +int TabFind::RightEdgeForBox(const TBOX& box, bool crossing, bool extended) { + TabVector* v = RightTabForBox(box, crossing, extended); + return v == nullptr ? tright_.x() : v->XAtY((box.top() + box.bottom()) / 2); +} +// As RightEdgeForBox, but finds the left Edge instead. +int TabFind::LeftEdgeForBox(const TBOX& box, bool crossing, bool extended) { + TabVector* v = LeftTabForBox(box, crossing, extended); + return v == nullptr ? bleft_.x() : v->XAtY((box.top() + box.bottom()) / 2); +} + +// This comment documents how this function works. +// For its purpose and arguments, see the comment in tabfind.h. +// TabVectors are stored sorted by perpendicular distance of middle from +// the global mean vertical vector. Since the individual vectors can have +// differing directions, their XAtY for a given y is not necessarily in the +// right order. Therefore the search has to be run with a margin. +// The middle of a vector that passes through (x,y) cannot be higher than +// halfway from y to the top, or lower than halfway from y to the bottom +// of the coordinate range; therefore, the search margin is the range of +// sort keys between these halfway points. Any vector with a sort key greater +// than the upper margin must be to the right of x at y, and likewise any +// vector with a sort key less than the lower margin must pass to the left +// of x at y. +TabVector* TabFind::RightTabForBox(const TBOX& box, bool crossing, + bool extended) { + if (v_it_.empty()) + return nullptr; + int top_y = box.top(); + int bottom_y = box.bottom(); + int mid_y = (top_y + bottom_y) / 2; + int right = crossing ? (box.left() + box.right()) / 2 : box.right(); + int min_key, max_key; + SetupTabSearch(right, mid_y, &min_key, &max_key); + // Position the iterator at the first TabVector with sort_key >= min_key. + while (!v_it_.at_first() && v_it_.data()->sort_key() >= min_key) + v_it_.backward(); + while (!v_it_.at_last() && v_it_.data()->sort_key() < min_key) + v_it_.forward(); + // Find the leftmost tab vector that overlaps and has XAtY(mid_y) >= right. + TabVector* best_v = nullptr; + int best_x = -1; + int key_limit = -1; + do { + TabVector* v = v_it_.data(); + int x = v->XAtY(mid_y); + if (x >= right && + (v->VOverlap(top_y, bottom_y) > 0 || + (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) { + if (best_v == nullptr || x < best_x) { + best_v = v; + best_x = x; + // We can guarantee that no better vector can be found if the + // sort key exceeds that of the best by max_key - min_key. + key_limit = v->sort_key() + max_key - min_key; + } + } + // Break when the search is done to avoid wrapping the iterator and + // thereby potentially slowing the next search. + if (v_it_.at_last() || + (best_v != nullptr && v->sort_key() > key_limit)) + break; // Prevent restarting list for next call. + v_it_.forward(); + } while (!v_it_.at_first()); + return best_v; +} + +// As RightTabForBox, but finds the left TabVector instead. +TabVector* TabFind::LeftTabForBox(const TBOX& box, bool crossing, + bool extended) { + if (v_it_.empty()) + return nullptr; + int top_y = box.top(); + int bottom_y = box.bottom(); + int mid_y = (top_y + bottom_y) / 2; + int left = crossing ? (box.left() + box.right()) / 2 : box.left(); + int min_key, max_key; + SetupTabSearch(left, mid_y, &min_key, &max_key); + // Position the iterator at the last TabVector with sort_key <= max_key. + while (!v_it_.at_last() && v_it_.data()->sort_key() <= max_key) + v_it_.forward(); + while (!v_it_.at_first() && v_it_.data()->sort_key() > max_key) { + v_it_.backward(); + } + // Find the rightmost tab vector that overlaps and has XAtY(mid_y) <= left. + TabVector* best_v = nullptr; + int best_x = -1; + int key_limit = -1; + do { + TabVector* v = v_it_.data(); + int x = v->XAtY(mid_y); + if (x <= left && + (v->VOverlap(top_y, bottom_y) > 0 || + (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) { + if (best_v == nullptr || x > best_x) { + best_v = v; + best_x = x; + // We can guarantee that no better vector can be found if the + // sort key is less than that of the best by max_key - min_key. + key_limit = v->sort_key() - (max_key - min_key); + } + } + // Break when the search is done to avoid wrapping the iterator and + // thereby potentially slowing the next search. + if (v_it_.at_first() || + (best_v != nullptr && v->sort_key() < key_limit)) + break; // Prevent restarting list for next call. + v_it_.backward(); + } while (!v_it_.at_last()); + return best_v; +} + +// Return true if the given width is close to one of the common +// widths in column_widths_. +bool TabFind::CommonWidth(int width) { + width /= kColumnWidthFactor; + ICOORDELT_IT it(&column_widths_); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + ICOORDELT* w = it.data(); + if (w->x() - 1 <= width && width <= w->y() + 1) + return true; + } + return false; +} + +// Return true if the sizes are more than a +// factor of 2 different. +bool TabFind::DifferentSizes(int size1, int size2) { + return size1 > size2 * 2 || size2 > size1 * 2; +} + +// Return true if the sizes are more than a +// factor of 5 different. +bool TabFind::VeryDifferentSizes(int size1, int size2) { + return size1 > size2 * 5 || size2 > size1 * 5; +} + +///////////////// PROTECTED functions (used by ColumnFinder). ////////////// + +// Top-level function to find TabVectors in an input page block. +// Returns false if the detected skew angle is impossible. +// Applies the detected skew angle to deskew the tabs, blobs and part_grid. +bool TabFind::FindTabVectors(TabVector_LIST* hlines, + BLOBNBOX_LIST* image_blobs, TO_BLOCK* block, + int min_gutter_width, + double tabfind_aligned_gap_fraction, + ColPartitionGrid* part_grid, + FCOORD* deskew, FCOORD* reskew) { + ScrollView* tab_win = FindInitialTabVectors(image_blobs, min_gutter_width, + tabfind_aligned_gap_fraction, + block); + ComputeColumnWidths(tab_win, part_grid); + TabVector::MergeSimilarTabVectors(vertical_skew_, &vectors_, this); + SortVectors(); + CleanupTabs(); + if (!Deskew(hlines, image_blobs, block, deskew, reskew)) + return false; // Skew angle is too large. + part_grid->Deskew(*deskew); + ApplyTabConstraints(); + #ifndef GRAPHICS_DISABLED + if (textord_tabfind_show_finaltabs) { + tab_win = MakeWindow(640, 50, "FinalTabs"); + DisplayBoxes(tab_win); + DisplayTabs("FinalTabs", tab_win); + tab_win = DisplayTabVectors(tab_win); + } + #endif // !GRAPHICS_DISABLED + return true; +} + +// Top-level function to not find TabVectors in an input page block, +// but setup for single column mode. +void TabFind::DontFindTabVectors(BLOBNBOX_LIST* image_blobs, TO_BLOCK* block, + FCOORD* deskew, FCOORD* reskew) { + InsertBlobsToGrid(false, false, image_blobs, this); + InsertBlobsToGrid(true, false, &block->blobs, this); + deskew->set_x(1.0f); + deskew->set_y(0.0f); + reskew->set_x(1.0f); + reskew->set_y(0.0f); +} + +// Cleans up the lists of blobs in the block ready for use by TabFind. +// Large blobs that look like text are moved to the main blobs list. +// Main blobs that are superseded by the image blobs are deleted. +void TabFind::TidyBlobs(TO_BLOCK* block) { + BLOBNBOX_IT large_it = &block->large_blobs; + BLOBNBOX_IT blob_it = &block->blobs; + int b_count = 0; + for (large_it.mark_cycle_pt(); !large_it.cycled_list(); large_it.forward()) { + BLOBNBOX* large_blob = large_it.data(); + if (large_blob->owner() != nullptr) { + blob_it.add_to_end(large_it.extract()); + ++b_count; + } + } + if (textord_debug_tabfind) { + tprintf("Moved %d large blobs to normal list\n", + b_count); + #ifndef GRAPHICS_DISABLED + ScrollView* rej_win = MakeWindow(500, 300, "Image blobs"); + block->plot_graded_blobs(rej_win); + block->plot_noise_blobs(rej_win); + rej_win->Update(); + #endif // !GRAPHICS_DISABLED + } + block->DeleteUnownedNoise(); +} + +// Helper function to setup search limits for *TabForBox. +void TabFind::SetupTabSearch(int x, int y, int* min_key, int* max_key) { + int key1 = TabVector::SortKey(vertical_skew_, x, (y + tright_.y()) / 2); + int key2 = TabVector::SortKey(vertical_skew_, x, (y + bleft_.y()) / 2); + *min_key = std::min(key1, key2); + *max_key = std::max(key1, key2); +} + +#ifndef GRAPHICS_DISABLED + +ScrollView* TabFind::DisplayTabVectors(ScrollView* tab_win) { + // For every vector, display it. + TabVector_IT it(&vectors_); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + TabVector* vector = it.data(); + vector->Display(tab_win); + } + tab_win->Update(); + return tab_win; +} + +#endif + +// PRIVATE CODE. +// +// First part of FindTabVectors, which may be used twice if the text +// is mostly of vertical alignment. +ScrollView* TabFind::FindInitialTabVectors(BLOBNBOX_LIST* image_blobs, + int min_gutter_width, + double tabfind_aligned_gap_fraction, + TO_BLOCK* block) { +#ifndef GRAPHICS_DISABLED + if (textord_tabfind_show_initialtabs) { + ScrollView* line_win = MakeWindow(0, 0, "VerticalLines"); + line_win = DisplayTabVectors(line_win); + } +#endif + // Prepare the grid. + if (image_blobs != nullptr) + InsertBlobsToGrid(true, false, image_blobs, this); + InsertBlobsToGrid(true, false, &block->blobs, this); + ScrollView* initial_win = FindTabBoxes(min_gutter_width, + tabfind_aligned_gap_fraction); + FindAllTabVectors(min_gutter_width); + + TabVector::MergeSimilarTabVectors(vertical_skew_, &vectors_, this); + SortVectors(); + EvaluateTabs(); +#ifndef GRAPHICS_DISABLED + if (textord_tabfind_show_initialtabs && initial_win != nullptr) + initial_win = DisplayTabVectors(initial_win); +#endif + MarkVerticalText(); + return initial_win; +} + +#ifndef GRAPHICS_DISABLED + +// Helper displays all the boxes in the given vector on the given window. +static void DisplayBoxVector(const GenericVector<BLOBNBOX*>& boxes, + ScrollView* win) { + for (int i = 0; i < boxes.size(); ++i) { + TBOX box = boxes[i]->bounding_box(); + int left_x = box.left(); + int right_x = box.right(); + int top_y = box.top(); + int bottom_y = box.bottom(); + ScrollView::Color box_color = boxes[i]->BoxColor(); + win->Pen(box_color); + win->Rectangle(left_x, bottom_y, right_x, top_y); + } + win->Update(); +} + +#endif // !GRAPHICS_DISABLED + +// For each box in the grid, decide whether it is a candidate tab-stop, +// and if so add it to the left/right tab boxes. +ScrollView* TabFind::FindTabBoxes(int min_gutter_width, + double tabfind_aligned_gap_fraction) { + left_tab_boxes_.clear(); + right_tab_boxes_.clear(); + // For every bbox in the grid, determine whether it uses a tab on an edge. + GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> gsearch(this); + gsearch.StartFullSearch(); + BLOBNBOX* bbox; + while ((bbox = gsearch.NextFullSearch()) != nullptr) { + if (TestBoxForTabs(bbox, min_gutter_width, tabfind_aligned_gap_fraction)) { + // If it is any kind of tab, insert it into the vectors. + if (bbox->left_tab_type() != TT_NONE) + left_tab_boxes_.push_back(bbox); + if (bbox->right_tab_type() != TT_NONE) + right_tab_boxes_.push_back(bbox); + } + } + // Sort left tabs by left and right by right to see the outermost one first + // on a ragged tab. + left_tab_boxes_.sort(SortByBoxLeft<BLOBNBOX>); + right_tab_boxes_.sort(SortRightToLeft<BLOBNBOX>); + ScrollView* tab_win = nullptr; + #ifndef GRAPHICS_DISABLED + if (textord_tabfind_show_initialtabs) { + tab_win = MakeWindow(0, 100, "InitialTabs"); + tab_win->Pen(ScrollView::BLUE); + tab_win->Brush(ScrollView::NONE); + // Display the left and right tab boxes. + DisplayBoxVector(left_tab_boxes_, tab_win); + DisplayBoxVector(right_tab_boxes_, tab_win); + tab_win = DisplayTabs("Tabs", tab_win); + } + #endif // !GRAPHICS_DISABLED + return tab_win; +} + +bool TabFind::TestBoxForTabs(BLOBNBOX* bbox, int min_gutter_width, + double tabfind_aligned_gap_fraction) { + GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> radsearch(this); + TBOX box = bbox->bounding_box(); + // If there are separator lines, get the column edges. + int left_column_edge = bbox->left_rule(); + int right_column_edge = bbox->right_rule(); + // The edges of the bounding box of the blob being processed. + int left_x = box.left(); + int right_x = box.right(); + int top_y = box.top(); + int bottom_y = box.bottom(); + int height = box.height(); + bool debug = WithinTestRegion(3, left_x, top_y); + if (debug) { + tprintf("Column edges for blob at (%d,%d)->(%d,%d) are [%d, %d]\n", + left_x, top_y, right_x, bottom_y, + left_column_edge, right_column_edge); + } + // Compute a search radius based on a multiple of the height. + int radius = (height * kTabRadiusFactor + gridsize_ - 1) / gridsize_; + radsearch.StartRadSearch((left_x + right_x)/2, (top_y + bottom_y)/2, radius); + // In Vertical Page mode, once we have an estimate of the vertical line + // spacing, the minimum amount of gutter space before a possible tab is + // increased under the assumption that column partition is always larger + // than line spacing. + int min_spacing = + static_cast<int>(height * tabfind_aligned_gap_fraction); + if (min_gutter_width > min_spacing) + min_spacing = min_gutter_width; + int min_ragged_gutter = kRaggedGutterMultiple * gridsize(); + if (min_gutter_width > min_ragged_gutter) + min_ragged_gutter = min_gutter_width; + int target_right = left_x - min_spacing; + int target_left = right_x + min_spacing; + // We will be evaluating whether the left edge could be a left tab, and + // whether the right edge could be a right tab. + // A box can be a tab if its bool is_(left/right)_tab remains true, meaning + // that no blobs have been found in the gutter during the radial search. + // A box can also be a tab if there are objects in the gutter only above + // or only below, and there are aligned objects on the opposite side, but + // not too many unaligned objects. The maybe_(left/right)_tab_up counts + // aligned objects above and negatively counts unaligned objects above, + // and is set to -INT32_MAX if a gutter object is found above. + // The other 3 maybe ints work similarly for the other sides. + // These conditions are very strict, to minimize false positives, and really + // only aligned tabs and outermost ragged tab blobs will qualify, so we + // also have maybe_ragged_left/right with less stringent rules. + // A blob that is maybe_ragged_left/right will be further qualified later, + // using the min_ragged_gutter. + bool is_left_tab = true; + bool is_right_tab = true; + bool maybe_ragged_left = true; + bool maybe_ragged_right = true; + int maybe_left_tab_up = 0; + int maybe_right_tab_up = 0; + int maybe_left_tab_down = 0; + int maybe_right_tab_down = 0; + if (bbox->leader_on_left()) { + is_left_tab = false; + maybe_ragged_left = false; + maybe_left_tab_up = -INT32_MAX; + maybe_left_tab_down = -INT32_MAX; + } + if (bbox->leader_on_right()) { + is_right_tab = false; + maybe_ragged_right = false; + maybe_right_tab_up = -INT32_MAX; + maybe_right_tab_down = -INT32_MAX; + } + int alignment_tolerance = static_cast<int>(resolution_ * kAlignedFraction); + BLOBNBOX* neighbour = nullptr; + while ((neighbour = radsearch.NextRadSearch()) != nullptr) { + if (neighbour == bbox) + continue; + TBOX nbox = neighbour->bounding_box(); + int n_left = nbox.left(); + int n_right = nbox.right(); + if (debug) + tprintf("Neighbour at (%d,%d)->(%d,%d)\n", + n_left, nbox.bottom(), n_right, nbox.top()); + // If the neighbouring blob is the wrong side of a separator line, then it + // "doesn't exist" as far as we are concerned. + if (n_right > right_column_edge || n_left < left_column_edge || + left_x < neighbour->left_rule() || right_x > neighbour->right_rule()) + continue; // Separator line in the way. + int n_mid_x = (n_left + n_right) / 2; + int n_mid_y = (nbox.top() + nbox.bottom()) / 2; + if (n_mid_x <= left_x && n_right >= target_right) { + if (debug) + tprintf("Not a left tab\n"); + is_left_tab = false; + if (n_mid_y < top_y) + maybe_left_tab_down = -INT32_MAX; + if (n_mid_y > bottom_y) + maybe_left_tab_up = -INT32_MAX; + } else if (NearlyEqual(left_x, n_left, alignment_tolerance)) { + if (debug) + tprintf("Maybe a left tab\n"); + if (n_mid_y > top_y && maybe_left_tab_up > -INT32_MAX) + ++maybe_left_tab_up; + if (n_mid_y < bottom_y && maybe_left_tab_down > -INT32_MAX) + ++maybe_left_tab_down; + } else if (n_left < left_x && n_right >= left_x) { + // Overlaps but not aligned so negative points on a maybe. + if (debug) + tprintf("Maybe Not a left tab\n"); + if (n_mid_y > top_y && maybe_left_tab_up > -INT32_MAX) + --maybe_left_tab_up; + if (n_mid_y < bottom_y && maybe_left_tab_down > -INT32_MAX) + --maybe_left_tab_down; + } + if (n_left < left_x && nbox.y_overlap(box) && n_right >= target_right) { + maybe_ragged_left = false; + if (debug) + tprintf("Not a ragged left\n"); + } + if (n_mid_x >= right_x && n_left <= target_left) { + if (debug) + tprintf("Not a right tab\n"); + is_right_tab = false; + if (n_mid_y < top_y) + maybe_right_tab_down = -INT32_MAX; + if (n_mid_y > bottom_y) + maybe_right_tab_up = -INT32_MAX; + } else if (NearlyEqual(right_x, n_right, alignment_tolerance)) { + if (debug) + tprintf("Maybe a right tab\n"); + if (n_mid_y > top_y && maybe_right_tab_up > -INT32_MAX) + ++maybe_right_tab_up; + if (n_mid_y < bottom_y && maybe_right_tab_down > -INT32_MAX) + ++maybe_right_tab_down; + } else if (n_right > right_x && n_left <= right_x) { + // Overlaps but not aligned so negative points on a maybe. + if (debug) + tprintf("Maybe Not a right tab\n"); + if (n_mid_y > top_y && maybe_right_tab_up > -INT32_MAX) + --maybe_right_tab_up; + if (n_mid_y < bottom_y && maybe_right_tab_down > -INT32_MAX) + --maybe_right_tab_down; + } + if (n_right > right_x && nbox.y_overlap(box) && n_left <= target_left) { + maybe_ragged_right = false; + if (debug) + tprintf("Not a ragged right\n"); + } + if (maybe_left_tab_down == -INT32_MAX && maybe_left_tab_up == -INT32_MAX && + maybe_right_tab_down == -INT32_MAX && maybe_right_tab_up == -INT32_MAX) + break; + } + if (is_left_tab || maybe_left_tab_up > 1 || maybe_left_tab_down > 1) { + bbox->set_left_tab_type(TT_MAYBE_ALIGNED); + } else if (maybe_ragged_left && ConfirmRaggedLeft(bbox, min_ragged_gutter)) { + bbox->set_left_tab_type(TT_MAYBE_RAGGED); + } else { + bbox->set_left_tab_type(TT_NONE); + } + if (is_right_tab || maybe_right_tab_up > 1 || maybe_right_tab_down > 1) { + bbox->set_right_tab_type(TT_MAYBE_ALIGNED); + } else if (maybe_ragged_right && + ConfirmRaggedRight(bbox, min_ragged_gutter)) { + bbox->set_right_tab_type(TT_MAYBE_RAGGED); + } else { + bbox->set_right_tab_type(TT_NONE); + } + if (debug) { + tprintf("Left result = %s, Right result=%s\n", + bbox->left_tab_type() == TT_MAYBE_ALIGNED ? "Aligned" : + (bbox->left_tab_type() == TT_MAYBE_RAGGED ? "Ragged" : "None"), + bbox->right_tab_type() == TT_MAYBE_ALIGNED ? "Aligned" : + (bbox->right_tab_type() == TT_MAYBE_RAGGED ? "Ragged" : "None")); + } + return bbox->left_tab_type() != TT_NONE || bbox->right_tab_type() != TT_NONE; +} + +// Returns true if there is nothing in the rectangle of width min_gutter to +// the left of bbox. +bool TabFind::ConfirmRaggedLeft(BLOBNBOX* bbox, int min_gutter) { + TBOX search_box(bbox->bounding_box()); + search_box.set_right(search_box.left()); + search_box.set_left(search_box.left() - min_gutter); + return NothingYOverlapsInBox(search_box, bbox->bounding_box()); +} + +// Returns true if there is nothing in the rectangle of width min_gutter to +// the right of bbox. +bool TabFind::ConfirmRaggedRight(BLOBNBOX* bbox, int min_gutter) { + TBOX search_box(bbox->bounding_box()); + search_box.set_left(search_box.right()); + search_box.set_right(search_box.right() + min_gutter); + return NothingYOverlapsInBox(search_box, bbox->bounding_box()); +} + +// Returns true if there is nothing in the given search_box that vertically +// overlaps target_box other than target_box itself. +bool TabFind::NothingYOverlapsInBox(const TBOX& search_box, + const TBOX& target_box) { + BlobGridSearch rsearch(this); + rsearch.StartRectSearch(search_box); + BLOBNBOX* blob; + while ((blob = rsearch.NextRectSearch()) != nullptr) { + const TBOX& box = blob->bounding_box(); + if (box.y_overlap(target_box) && !(box == target_box)) + return false; + } + return true; +} + +void TabFind::FindAllTabVectors(int min_gutter_width) { + // A list of vectors that will be created in estimating the skew. + TabVector_LIST dummy_vectors; + // An estimate of the vertical direction, revised as more lines are added. + int vertical_x = 0; + int vertical_y = 1; + // Find an estimate of the vertical direction by finding some tab vectors. + // Slowly up the search size until we get some vectors. + for (int search_size = kMinVerticalSearch; search_size < kMaxVerticalSearch; + search_size += kMinVerticalSearch) { + int vector_count = FindTabVectors(search_size, TA_LEFT_ALIGNED, + min_gutter_width, + &dummy_vectors, + &vertical_x, &vertical_y); + vector_count += FindTabVectors(search_size, TA_RIGHT_ALIGNED, + min_gutter_width, + &dummy_vectors, + &vertical_x, &vertical_y); + if (vector_count > 0) + break; + } + // Get rid of the test vectors and reset the types of the tabs. + dummy_vectors.clear(); + for (int i = 0; i < left_tab_boxes_.size(); ++i) { + BLOBNBOX* bbox = left_tab_boxes_[i]; + if (bbox->left_tab_type() == TT_CONFIRMED) + bbox->set_left_tab_type(TT_MAYBE_ALIGNED); + } + for (int i = 0; i < right_tab_boxes_.size(); ++i) { + BLOBNBOX* bbox = right_tab_boxes_[i]; + if (bbox->right_tab_type() == TT_CONFIRMED) + bbox->set_right_tab_type(TT_MAYBE_ALIGNED); + } + if (textord_debug_tabfind) { + tprintf("Beginning real tab search with vertical = %d,%d...\n", + vertical_x, vertical_y); + } + // Now do the real thing ,but keep the vectors in the dummy_vectors list + // until they are all done, so we don't get the tab vectors confused with + // the rule line vectors. + FindTabVectors(kMaxVerticalSearch, TA_LEFT_ALIGNED, min_gutter_width, + &dummy_vectors, &vertical_x, &vertical_y); + FindTabVectors(kMaxVerticalSearch, TA_RIGHT_ALIGNED, min_gutter_width, + &dummy_vectors, &vertical_x, &vertical_y); + FindTabVectors(kMaxRaggedSearch, TA_LEFT_RAGGED, min_gutter_width, + &dummy_vectors, &vertical_x, &vertical_y); + FindTabVectors(kMaxRaggedSearch, TA_RIGHT_RAGGED, min_gutter_width, + &dummy_vectors, &vertical_x, &vertical_y); + // Now add the vectors to the vectors_ list. + TabVector_IT v_it(&vectors_); + v_it.add_list_after(&dummy_vectors); + // Now use the summed (mean) vertical vector as the direction for everything. + SetVerticalSkewAndParallelize(vertical_x, vertical_y); +} + +// Helper for FindAllTabVectors finds the vectors of a particular type. +int TabFind::FindTabVectors(int search_size_multiple, TabAlignment alignment, + int min_gutter_width, TabVector_LIST* vectors, + int* vertical_x, int* vertical_y) { + TabVector_IT vector_it(vectors); + int vector_count = 0; + // Search the right or left tab boxes, looking for tab vectors. + bool right = alignment == TA_RIGHT_ALIGNED || alignment == TA_RIGHT_RAGGED; + const GenericVector<BLOBNBOX*>& boxes = right ? right_tab_boxes_ + : left_tab_boxes_; + for (int i = 0; i < boxes.size(); ++i) { + BLOBNBOX* bbox = boxes[i]; + if ((!right && bbox->left_tab_type() == TT_MAYBE_ALIGNED) || + (right && bbox->right_tab_type() == TT_MAYBE_ALIGNED)) { + TabVector* vector = FindTabVector(search_size_multiple, min_gutter_width, + alignment, + bbox, vertical_x, vertical_y); + if (vector != nullptr) { + ++vector_count; + vector_it.add_to_end(vector); + } + } + } + return vector_count; +} + +// Finds a vector corresponding to a tabstop running through the +// given box of the given alignment type. +// search_size_multiple is a multiple of height used to control +// the size of the search. +// vertical_x and y are updated with an estimate of the real +// vertical direction. (skew finding.) +// Returns nullptr if no decent tabstop can be found. +TabVector* TabFind::FindTabVector(int search_size_multiple, + int min_gutter_width, + TabAlignment alignment, + BLOBNBOX* bbox, + int* vertical_x, int* vertical_y) { + int height = std::max(static_cast<int>(bbox->bounding_box().height()), gridsize()); + AlignedBlobParams align_params(*vertical_x, *vertical_y, + height, + search_size_multiple, min_gutter_width, + resolution_, alignment); + // FindVerticalAlignment is in the parent (AlignedBlob) class. + return FindVerticalAlignment(align_params, bbox, vertical_x, vertical_y); +} + +// Set the vertical_skew_ member from the given vector and refit +// all vectors parallel to the skew vector. +void TabFind::SetVerticalSkewAndParallelize(int vertical_x, int vertical_y) { + // Fit the vertical vector into an ICOORD, which is 16 bit. + vertical_skew_.set_with_shrink(vertical_x, vertical_y); + if (textord_debug_tabfind) + tprintf("Vertical skew vector=(%d,%d)\n", + vertical_skew_.x(), vertical_skew_.y()); + v_it_.set_to_list(&vectors_); + for (v_it_.mark_cycle_pt(); !v_it_.cycled_list(); v_it_.forward()) { + TabVector* v = v_it_.data(); + v->Fit(vertical_skew_, true); + } + // Now sort the vectors as their direction has potentially changed. + SortVectors(); +} + +// Sort all the current vectors using the given vertical direction vector. +void TabFind::SortVectors() { + vectors_.sort(TabVector::SortVectorsByKey); + v_it_.set_to_list(&vectors_); +} + +// Evaluate all the current tab vectors. +void TabFind::EvaluateTabs() { + TabVector_IT rule_it(&vectors_); + for (rule_it.mark_cycle_pt(); !rule_it.cycled_list(); rule_it.forward()) { + TabVector* tab = rule_it.data(); + if (!tab->IsSeparator()) { + tab->Evaluate(vertical_skew_, this); + if (tab->BoxCount() < kMinEvaluatedTabs) { + if (textord_debug_tabfind > 2) + tab->Print("Too few boxes"); + delete rule_it.extract(); + v_it_.set_to_list(&vectors_); + } else if (WithinTestRegion(3, tab->startpt().x(), tab->startpt().y())) { + tab->Print("Evaluated tab"); + } + } + } +} + +// Trace textlines from one side to the other of each tab vector, saving +// the most frequent column widths found in a list so that a given width +// can be tested for being a common width with a simple callback function. +void TabFind::ComputeColumnWidths(ScrollView* tab_win, + ColPartitionGrid* part_grid) { + #ifndef GRAPHICS_DISABLED + if (tab_win != nullptr) + tab_win->Pen(ScrollView::WHITE); + #endif // !GRAPHICS_DISABLED + // Accumulate column sections into a STATS + int col_widths_size = (tright_.x() - bleft_.x()) / kColumnWidthFactor; + STATS col_widths(0, col_widths_size + 1); + ApplyPartitionsToColumnWidths(part_grid, &col_widths); + #ifndef GRAPHICS_DISABLED + if (tab_win != nullptr) { + tab_win->Update(); + } + #endif // !GRAPHICS_DISABLED + if (textord_debug_tabfind > 1) + col_widths.print(); + // Now make a list of column widths. + MakeColumnWidths(col_widths_size, &col_widths); + // Turn the column width into a range. + ApplyPartitionsToColumnWidths(part_grid, nullptr); +} + +// Finds column width and: +// if col_widths is not null (pass1): +// pair-up tab vectors with existing ColPartitions and accumulate widths. +// else (pass2): +// find the largest real partition width for each recorded column width, +// to be used as the minimum acceptable width. +void TabFind::ApplyPartitionsToColumnWidths(ColPartitionGrid* part_grid, + STATS* col_widths) { + // For every ColPartition in the part_grid, add partners to the tabvectors + // and accumulate the column widths. + ColPartitionGridSearch gsearch(part_grid); + gsearch.StartFullSearch(); + ColPartition* part; + while ((part = gsearch.NextFullSearch()) != nullptr) { + BLOBNBOX_C_IT blob_it(part->boxes()); + if (blob_it.empty()) + continue; + BLOBNBOX* left_blob = blob_it.data(); + blob_it.move_to_last(); + BLOBNBOX* right_blob = blob_it.data(); + TabVector* left_vector = LeftTabForBox(left_blob->bounding_box(), + true, false); + if (left_vector == nullptr || left_vector->IsRightTab()) + continue; + TabVector* right_vector = RightTabForBox(right_blob->bounding_box(), + true, false); + if (right_vector == nullptr || right_vector->IsLeftTab()) + continue; + + int line_left = left_vector->XAtY(left_blob->bounding_box().bottom()); + int line_right = right_vector->XAtY(right_blob->bounding_box().bottom()); + // Add to STATS of measurements if the width is significant. + int width = line_right - line_left; + if (col_widths != nullptr) { + AddPartnerVector(left_blob, right_blob, left_vector, right_vector); + if (width >= kMinColumnWidth) + col_widths->add(width / kColumnWidthFactor, 1); + } else { + width /= kColumnWidthFactor; + ICOORDELT_IT it(&column_widths_); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + ICOORDELT* w = it.data(); + if (NearlyEqual<int>(width, w->y(), 1)) { + int true_width = part->bounding_box().width() / kColumnWidthFactor; + if (true_width <= w->y() && true_width > w->x()) + w->set_x(true_width); + break; + } + } + } + } +} + +// Helper makes the list of common column widths in column_widths_ from the +// input col_widths. Destroys the content of col_widths by repeatedly +// finding the mode and erasing the peak. +void TabFind::MakeColumnWidths(int col_widths_size, STATS* col_widths) { + ICOORDELT_IT w_it(&column_widths_); + int total_col_count = col_widths->get_total(); + while (col_widths->get_total() > 0) { + int width = col_widths->mode(); + int col_count = col_widths->pile_count(width); + col_widths->add(width, -col_count); + // Get the entire peak. + for (int left = width - 1; left > 0 && + col_widths->pile_count(left) > 0; + --left) { + int new_count = col_widths->pile_count(left); + col_count += new_count; + col_widths->add(left, -new_count); + } + for (int right = width + 1; right < col_widths_size && + col_widths->pile_count(right) > 0; + ++right) { + int new_count = col_widths->pile_count(right); + col_count += new_count; + col_widths->add(right, -new_count); + } + if (col_count > kMinLinesInColumn && + col_count > kMinFractionalLinesInColumn * total_col_count) { + auto* w = new ICOORDELT(0, width); + w_it.add_after_then_move(w); + if (textord_debug_tabfind) + tprintf("Column of width %d has %d = %.2f%% lines\n", + width * kColumnWidthFactor, col_count, + 100.0 * col_count / total_col_count); + } + } +} + +// Mark blobs as being in a vertical text line where that is the case. +// Returns true if the majority of the image is vertical text lines. +void TabFind::MarkVerticalText() { + if (textord_debug_tabfind) + tprintf("Checking for vertical lines\n"); + BlobGridSearch gsearch(this); + gsearch.StartFullSearch(); + BLOBNBOX* blob = nullptr; + while ((blob = gsearch.NextFullSearch()) != nullptr) { + if (blob->region_type() < BRT_UNKNOWN) + continue; + if (blob->UniquelyVertical()) { + blob->set_region_type(BRT_VERT_TEXT); + } + } +} + +int TabFind::FindMedianGutterWidth(TabVector_LIST *lines) { + TabVector_IT it(lines); + int prev_right = -1; + int max_gap = static_cast<int>(kMaxGutterWidthAbsolute * resolution_); + STATS gaps(0, max_gap); + STATS heights(0, max_gap); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + TabVector* v = it.data(); + TabVector* partner = v->GetSinglePartner(); + if (!v->IsLeftTab() || v->IsSeparator() || !partner) continue; + heights.add(partner->startpt().x() - v->startpt().x(), 1); + if (prev_right > 0 && v->startpt().x() > prev_right) { + gaps.add(v->startpt().x() - prev_right, 1); + } + prev_right = partner->startpt().x(); + } + if (textord_debug_tabfind) + tprintf("TabGutter total %d median_gap %.2f median_hgt %.2f\n", + gaps.get_total(), gaps.median(), heights.median()); + if (gaps.get_total() < kMinLinesInColumn) return 0; + return static_cast<int>(gaps.median()); +} + +// Find the next adjacent (looking to the left or right) blob on this text +// line, with the constraint that it must vertically significantly overlap +// the [top_y, bottom_y] range. +// If ignore_images is true, then blobs with aligned_text() < 0 are treated +// as if they do not exist. +BLOBNBOX* TabFind::AdjacentBlob(const BLOBNBOX* bbox, + bool look_left, bool ignore_images, + double min_overlap_fraction, + int gap_limit, int top_y, int bottom_y) { + GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> sidesearch(this); + const TBOX& box = bbox->bounding_box(); + int left = box.left(); + int right = box.right(); + int mid_x = (left + right) / 2; + sidesearch.StartSideSearch(mid_x, bottom_y, top_y); + int best_gap = 0; + bool debug = WithinTestRegion(3, left, bottom_y); + BLOBNBOX* result = nullptr; + BLOBNBOX* neighbour = nullptr; + while ((neighbour = sidesearch.NextSideSearch(look_left)) != nullptr) { + if (debug) { + tprintf("Adjacent blob: considering box:"); + neighbour->bounding_box().print(); + } + if (neighbour == bbox || + (ignore_images && neighbour->region_type() < BRT_UNKNOWN)) + continue; + const TBOX& nbox = neighbour->bounding_box(); + int n_top_y = nbox.top(); + int n_bottom_y = nbox.bottom(); + int v_overlap = std::min(n_top_y, top_y) - std::max(n_bottom_y, bottom_y); + int height = top_y - bottom_y; + int n_height = n_top_y - n_bottom_y; + if (v_overlap > min_overlap_fraction * std::min(height, n_height) && + (min_overlap_fraction == 0.0 || !DifferentSizes(height, n_height))) { + int n_left = nbox.left(); + int n_right = nbox.right(); + int h_gap = std::max(n_left, left) - std::min(n_right, right); + int n_mid_x = (n_left + n_right) / 2; + if (look_left == (n_mid_x < mid_x) && n_mid_x != mid_x) { + if (h_gap > gap_limit) { + // Hit a big gap before next tab so don't return anything. + if (debug) + tprintf("Giving up due to big gap = %d vs %d\n", + h_gap, gap_limit); + return result; + } + if (h_gap > 0 && (look_left ? neighbour->right_tab_type() + : neighbour->left_tab_type()) >= TT_CONFIRMED) { + // Hit a tab facing the wrong way. Stop in case we are crossing + // the column boundary. + if (debug) + tprintf("Collision with like tab of type %d at %d,%d\n", + look_left ? neighbour->right_tab_type() + : neighbour->left_tab_type(), + n_left, nbox.bottom()); + return result; + } + // This is a good fit to the line. Continue with this + // neighbour as the bbox if the best gap. + if (result == nullptr || h_gap < best_gap) { + if (debug) + tprintf("Good result\n"); + result = neighbour; + best_gap = h_gap; + } else { + // The new one is worse, so we probably already have the best result. + return result; + } + } else if (debug) { + tprintf("Wrong way\n"); + } + } else if (debug) { + tprintf("Insufficient overlap\n"); + } + } + if (WithinTestRegion(3, left, box.top())) + tprintf("Giving up due to end of search\n"); + return result; // Hit the edge and found nothing. +} + +// Add a bi-directional partner relationship between the left +// and the right. If one (or both) of the vectors is a separator, +// extend a nearby extendable vector or create a new one of the +// correct type, using the given left or right blob as a guide. +void TabFind::AddPartnerVector(BLOBNBOX* left_blob, BLOBNBOX* right_blob, + TabVector* left, TabVector* right) { + const TBOX& left_box = left_blob->bounding_box(); + const TBOX& right_box = right_blob->bounding_box(); + if (left->IsSeparator()) { + // Try to find a nearby left edge to extend. + TabVector* v = LeftTabForBox(left_box, true, true); + if (v != nullptr && v != left && v->IsLeftTab() && + v->XAtY(left_box.top()) > left->XAtY(left_box.top())) { + left = v; // Found a good replacement. + left->ExtendToBox(left_blob); + } else { + // Fake a vector. + left = new TabVector(*left, TA_LEFT_RAGGED, vertical_skew_, left_blob); + vectors_.add_sorted(TabVector::SortVectorsByKey, left); + v_it_.move_to_first(); + } + } + if (right->IsSeparator()) { + // Try to find a nearby left edge to extend. + if (WithinTestRegion(3, right_box.right(), right_box.bottom())) { + tprintf("Box edge (%d,%d-%d)", + right_box.right(), right_box.bottom(), right_box.top()); + right->Print(" looking for improvement for"); + } + TabVector* v = RightTabForBox(right_box, true, true); + if (v != nullptr && v != right && v->IsRightTab() && + v->XAtY(right_box.top()) < right->XAtY(right_box.top())) { + right = v; // Found a good replacement. + right->ExtendToBox(right_blob); + if (WithinTestRegion(3, right_box.right(), right_box.bottom())) { + right->Print("Extended vector"); + } + } else { + // Fake a vector. + right = new TabVector(*right, TA_RIGHT_RAGGED, vertical_skew_, + right_blob); + vectors_.add_sorted(TabVector::SortVectorsByKey, right); + v_it_.move_to_first(); + if (WithinTestRegion(3, right_box.right(), right_box.bottom())) { + right->Print("Created new vector"); + } + } + } + left->AddPartner(right); + right->AddPartner(left); +} + +// Remove separators and unused tabs from the main vectors_ list +// to the dead_vectors_ list. +void TabFind::CleanupTabs() { + // TODO(rays) Before getting rid of separators and unused vectors, it + // would be useful to try moving ragged vectors outwards to see if this + // allows useful extension. Could be combined with checking ends of partners. + TabVector_IT it(&vectors_); + TabVector_IT dead_it(&dead_vectors_); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + TabVector* v = it.data(); + if (v->IsSeparator() || v->Partnerless()) { + dead_it.add_after_then_move(it.extract()); + v_it_.set_to_list(&vectors_); + } else { + v->FitAndEvaluateIfNeeded(vertical_skew_, this); + } + } +} + +// Apply the given rotation to the given list of blobs. +void TabFind::RotateBlobList(const FCOORD& rotation, BLOBNBOX_LIST* blobs) { + BLOBNBOX_IT it(blobs); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + it.data()->rotate_box(rotation); + } +} + +// Recreate the grid with deskewed BLOBNBOXes. +// Returns false if the detected skew angle is impossible. +bool TabFind::Deskew(TabVector_LIST* hlines, BLOBNBOX_LIST* image_blobs, + TO_BLOCK* block, FCOORD* deskew, FCOORD* reskew) { + ComputeDeskewVectors(deskew, reskew); + if (deskew->x() < kCosMaxSkewAngle) + return false; + RotateBlobList(*deskew, image_blobs); + RotateBlobList(*deskew, &block->blobs); + RotateBlobList(*deskew, &block->small_blobs); + RotateBlobList(*deskew, &block->noise_blobs); + + // Rotate the horizontal vectors. The vertical vectors don't need + // rotating as they can just be refitted. + TabVector_IT h_it(hlines); + for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) { + TabVector* h = h_it.data(); + h->Rotate(*deskew); + } + TabVector_IT d_it(&dead_vectors_); + for (d_it.mark_cycle_pt(); !d_it.cycled_list(); d_it.forward()) { + TabVector* d = d_it.data(); + d->Rotate(*deskew); + } + SetVerticalSkewAndParallelize(0, 1); + // Rebuild the grid to the new size. + TBOX grid_box(bleft_, tright_); + grid_box.rotate_large(*deskew); + Init(gridsize(), grid_box.botleft(), grid_box.topright()); + InsertBlobsToGrid(false, false, image_blobs, this); + InsertBlobsToGrid(true, false, &block->blobs, this); + return true; +} + +// Flip the vertical and horizontal lines and rotate the grid ready +// for working on the rotated image. +// This also makes parameter adjustments for FindInitialTabVectors(). +void TabFind::ResetForVerticalText(const FCOORD& rotate, const FCOORD& rerotate, + TabVector_LIST* horizontal_lines, + int* min_gutter_width) { + // Rotate the horizontal and vertical vectors and swap them over. + // Only the separators are kept and rotated; other tabs are used + // to estimate the gutter width then thrown away. + TabVector_LIST ex_verticals; + TabVector_IT ex_v_it(&ex_verticals); + TabVector_LIST vlines; + TabVector_IT v_it(&vlines); + while (!v_it_.empty()) { + TabVector* v = v_it_.extract(); + if (v->IsSeparator()) { + v->Rotate(rotate); + ex_v_it.add_after_then_move(v); + } else { + v_it.add_after_then_move(v); + } + v_it_.forward(); + } + + // Adjust the min gutter width for better tabbox selection + // in 2nd call to FindInitialTabVectors(). + int median_gutter = FindMedianGutterWidth(&vlines); + if (median_gutter > *min_gutter_width) + *min_gutter_width = median_gutter; + + TabVector_IT h_it(horizontal_lines); + for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) { + TabVector* h = h_it.data(); + h->Rotate(rotate); + } + v_it_.add_list_after(horizontal_lines); + v_it_.move_to_first(); + h_it.set_to_list(horizontal_lines); + h_it.add_list_after(&ex_verticals); + + // Rebuild the grid to the new size. + TBOX grid_box(bleft(), tright()); + grid_box.rotate_large(rotate); + Init(gridsize(), grid_box.botleft(), grid_box.topright()); +} + +// Clear the grid and get rid of the tab vectors, but not separators, +// ready to start again. +void TabFind::Reset() { + v_it_.move_to_first(); + for (v_it_.mark_cycle_pt(); !v_it_.cycled_list(); v_it_.forward()) { + if (!v_it_.data()->IsSeparator()) + delete v_it_.extract(); + } + Clear(); +} + +// Reflect the separator tab vectors and the grids in the y-axis. +// Can only be called after Reset! +void TabFind::ReflectInYAxis() { + TabVector_LIST temp_list; + TabVector_IT temp_it(&temp_list); + v_it_.move_to_first(); + // The TabVector list only contains vertical lines, but they need to be + // reflected and the list needs to be reversed, so they are still in + // sort_key order. + while (!v_it_.empty()) { + TabVector* v = v_it_.extract(); + v_it_.forward(); + v->ReflectInYAxis(); + temp_it.add_before_then_move(v); + } + v_it_.add_list_after(&temp_list); + v_it_.move_to_first(); + // Reset this grid with reflected bounding boxes. + TBOX grid_box(bleft(), tright()); + int tmp = grid_box.left(); + grid_box.set_left(-grid_box.right()); + grid_box.set_right(-tmp); + Init(gridsize(), grid_box.botleft(), grid_box.topright()); +} + +// Compute the rotation required to deskew, and its inverse rotation. +void TabFind::ComputeDeskewVectors(FCOORD* deskew, FCOORD* reskew) { + double length = vertical_skew_ % vertical_skew_; + length = sqrt(length); + deskew->set_x(static_cast<float>(vertical_skew_.y() / length)); + deskew->set_y(static_cast<float>(vertical_skew_.x() / length)); + reskew->set_x(deskew->x()); + reskew->set_y(-deskew->y()); +} + +// Compute and apply constraints to the end positions of TabVectors so +// that where possible partners end at the same y coordinate. +void TabFind::ApplyTabConstraints() { + TabVector_IT it(&vectors_); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + TabVector* v = it.data(); + v->SetupConstraints(); + } + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + TabVector* v = it.data(); + // With the first and last partner, we want a common bottom and top, + // respectively, and for each change of partner, we want a common + // top of first with bottom of next. + v->SetupPartnerConstraints(); + } + // TODO(rays) The back-to-back pairs should really be done like the + // front-to-front pairs, but there is no convenient way of producing the + // list of partners like there is with the front-to-front. + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + TabVector* v = it.data(); + if (!v->IsRightTab()) + continue; + // For each back-to-back pair of vectors, try for common top and bottom. + TabVector_IT partner_it(it); + for (partner_it.forward(); !partner_it.at_first(); partner_it.forward()) { + TabVector* partner = partner_it.data(); + if (!partner->IsLeftTab() || !v->VOverlap(*partner)) + continue; + v->SetupPartnerConstraints(partner); + } + } + // Now actually apply the constraints to get common start/end points. + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + TabVector* v = it.data(); + if (!v->IsSeparator()) + v->ApplyConstraints(); + } + // TODO(rays) Where constraint application fails, it would be good to try + // checking the ends to see if they really should be moved. +} + +} // namespace tesseract. diff --git a/tesseract/src/textord/tabfind.h b/tesseract/src/textord/tabfind.h new file mode 100644 index 00000000..d16a533c --- /dev/null +++ b/tesseract/src/textord/tabfind.h @@ -0,0 +1,384 @@ +/////////////////////////////////////////////////////////////////////// +// File: tabfind.h +// Description: Subclass of BBGrid to find tabstops. +// Author: Ray Smith +// +// (C) Copyright 2008, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_TEXTORD_TABFIND_H_ +#define TESSERACT_TEXTORD_TABFIND_H_ + +#include <functional> // for std::function +#include "alignedblob.h" +#include "tabvector.h" +#include "linefind.h" + +class BLOBNBOX; +class BLOBNBOX_LIST; +class TO_BLOCK; +class ScrollView; +struct Pix; + +namespace tesseract { + +using WidthCallback = std::function<bool(int)>; + +struct AlignedBlobParams; +class ColPartitionGrid; + +/** Pixel resolution of column width estimates. */ +const int kColumnWidthFactor = 20; + +/** + * The TabFind class contains code to find tab-stops and maintain the + * vectors_ list of tab vectors. + * Also provides an interface to find neighbouring blobs + * in the grid of BLOBNBOXes that is used by multiple subclasses. + * Searching is a complex operation because of the need to enforce + * rule/separator lines, and tabstop boundaries, (when available), so + * as the holder of the list of TabVectors this class provides the functions. + */ +class TESS_API TabFind : public AlignedBlob { + public: + TabFind(int gridsize, const ICOORD& bleft, const ICOORD& tright, + TabVector_LIST* vlines, int vertical_x, int vertical_y, + int resolution); + ~TabFind() override; + + /** + * Insert a list of blobs into the given grid (not necessarily this). + * See InsertBlob for the other arguments. + * It would seem to make more sense to swap this and grid, but this way + * around allows grid to not be derived from TabFind, eg a ColPartitionGrid, + * while the grid that provides the tab stops(this) has to be derived from + * TabFind. + */ + void InsertBlobsToGrid(bool h_spread, bool v_spread, + BLOBNBOX_LIST* blobs, + BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>* grid); + + /** + * Insert a single blob into the given grid (not necessarily this). + * If h_spread, then all cells covered horizontally by the box are + * used, otherwise, just the bottom-left. Similarly for v_spread. + * A side effect is that the left and right rule edges of the blob are + * set according to the tab vectors in this (not grid). + */ + bool InsertBlob(bool h_spread, bool v_spread, BLOBNBOX* blob, + BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>* grid); + // Calls SetBlobRuleEdges for all the blobs in the given block. + void SetBlockRuleEdges(TO_BLOCK* block); + // Sets the left and right rule and crossing_rules for the blobs in the given + // list by finding the next outermost tabvectors for each blob. + void SetBlobRuleEdges(BLOBNBOX_LIST* blobs); + + // Returns the gutter width of the given TabVector between the given y limits. + // Also returns x-shift to be added to the vector to clear any intersecting + // blobs. The shift is deducted from the returned gutter. + // If ignore_unmergeables is true, then blobs of UnMergeableType are + // ignored as if they don't exist. (Used for text on image.) + // max_gutter_width is used as the maximum width worth searching for in case + // there is nothing near the TabVector. + int GutterWidth(int bottom_y, int top_y, const TabVector& v, + bool ignore_unmergeables, int max_gutter_width, + int* required_shift); + /** + * Find the gutter width and distance to inner neighbour for the given blob. + */ + void GutterWidthAndNeighbourGap(int tab_x, int mean_height, + int max_gutter, bool left, + BLOBNBOX* bbox, int* gutter_width, + int* neighbour_gap); + + /** + * Return the x-coord that corresponds to the right edge for the given + * box. If there is a rule line to the right that vertically overlaps it, + * then return the x-coord of the rule line, otherwise return the right + * edge of the page. For details see RightTabForBox below. + */ + int RightEdgeForBox(const TBOX& box, bool crossing, bool extended); + /** + * As RightEdgeForBox, but finds the left Edge instead. + */ + int LeftEdgeForBox(const TBOX& box, bool crossing, bool extended); + + /** + * Return the TabVector that corresponds to the right edge for the given + * box. If there is a TabVector to the right that vertically overlaps it, + * then return it, otherwise return nullptr. Note that Right and Left refer + * to the position of the TabVector, not its type, ie RightTabForBox + * returns the nearest TabVector to the right of the box, regardless of + * its type. + * If a TabVector crosses right through the box (as opposed to grazing one + * edge or missing entirely), then crossing false will ignore such a line. + * Crossing true will return the line for BOTH left and right edges. + * If extended is true, then TabVectors are considered to extend to their + * extended_start/end_y, otherwise, just the startpt_ and endpt_. + * These functions make use of an internal iterator to the vectors_ list + * for speed when used repeatedly on neighbouring boxes. The caveat is + * that the iterator must be updated whenever the list is modified. + */ + TabVector* RightTabForBox(const TBOX& box, bool crossing, bool extended); + /** + * As RightTabForBox, but finds the left TabVector instead. + */ + TabVector* LeftTabForBox(const TBOX& box, bool crossing, bool extended); + + /** + * Return true if the given width is close to one of the common + * widths in column_widths_. + */ + bool CommonWidth(int width); + /** + * Return true if the sizes are more than a + * factor of 2 different. + */ + static bool DifferentSizes(int size1, int size2); + /** + * Return true if the sizes are more than a + * factor of 5 different. + */ + static bool VeryDifferentSizes(int size1, int size2); + + /** + * Return a callback for testing CommonWidth. + */ + WidthCallback WidthCB() { + return width_cb_; + } + + /** + * Return the coords at which to draw the image backdrop. + */ + const ICOORD& image_origin() const { + return image_origin_; + } + + protected: + /** + // Accessors + */ + TabVector_LIST* vectors() { + return &vectors_; + } + TabVector_LIST* dead_vectors() { + return &dead_vectors_; + } + + /** + * Top-level function to find TabVectors in an input page block. + * Returns false if the detected skew angle is impossible. + * Applies the detected skew angle to deskew the tabs, blobs and part_grid. + * tabfind_aligned_gap_fraction should be the value of parameter + * textord_tabfind_aligned_gap_fraction + */ + bool FindTabVectors(TabVector_LIST* hlines, + BLOBNBOX_LIST* image_blobs, TO_BLOCK* block, + int min_gutter_width, double tabfind_aligned_gap_fraction, + ColPartitionGrid* part_grid, + FCOORD* deskew, FCOORD* reskew); + + // Top-level function to not find TabVectors in an input page block, + // but setup for single column mode. + void DontFindTabVectors(BLOBNBOX_LIST* image_blobs, + TO_BLOCK* block, FCOORD* deskew, FCOORD* reskew); + + // Cleans up the lists of blobs in the block ready for use by TabFind. + // Large blobs that look like text are moved to the main blobs list. + // Main blobs that are superseded by the image blobs are deleted. + void TidyBlobs(TO_BLOCK* block); + + // Helper function to setup search limits for *TabForBox. + void SetupTabSearch(int x, int y, int* min_key, int* max_key); + + /** + * Display the tab vectors found in this grid. + */ + ScrollView* DisplayTabVectors(ScrollView* tab_win); + + // First part of FindTabVectors, which may be used twice if the text + // is mostly of vertical alignment. If find_vertical_text flag is + // true, this finds vertical textlines in possibly rotated blob space. + // In other words, when the page has mostly vertical lines and is rotated, + // setting this to true will find horizontal lines on the page. + // tabfind_aligned_gap_fraction should be the value of parameter + // textord_tabfind_aligned_gap_fraction + ScrollView* FindInitialTabVectors(BLOBNBOX_LIST* image_blobs, + int min_gutter_width, + double tabfind_aligned_gap_fraction, + TO_BLOCK* block); + + // Apply the given rotation to the given list of blobs. + static void RotateBlobList(const FCOORD& rotation, BLOBNBOX_LIST* blobs); + + // Flip the vertical and horizontal lines and rotate the grid ready + // for working on the rotated image. + // The min_gutter_width will be adjusted to the median gutter width between + // vertical tabs to set a better threshold for tabboxes in the 2nd pass. + void ResetForVerticalText(const FCOORD& rotate, const FCOORD& rerotate, + TabVector_LIST* horizontal_lines, + int* min_gutter_width); + + // Clear the grid and get rid of the tab vectors, but not separators, + // ready to start again. + void Reset(); + + // Reflect the separator tab vectors and the grids in the y-axis. + // Can only be called after Reset! + void ReflectInYAxis(); + + private: + // For each box in the grid, decide whether it is a candidate tab-stop, + // and if so add it to the left and right tab boxes. + // tabfind_aligned_gap_fraction should be the value of parameter + // textord_tabfind_aligned_gap_fraction + ScrollView* FindTabBoxes(int min_gutter_width, + double tabfind_aligned_gap_fraction); + + // Return true if this box looks like a candidate tab stop, and set + // the appropriate tab type(s) to TT_UNCONFIRMED. + // tabfind_aligned_gap_fraction should be the value of parameter + // textord_tabfind_aligned_gap_fraction + bool TestBoxForTabs(BLOBNBOX* bbox, int min_gutter_width, + double tabfind_aligned_gap_fraction); + + // Returns true if there is nothing in the rectangle of width min_gutter to + // the left of bbox. + bool ConfirmRaggedLeft(BLOBNBOX* bbox, int min_gutter); + // Returns true if there is nothing in the rectangle of width min_gutter to + // the right of bbox. + bool ConfirmRaggedRight(BLOBNBOX* bbox, int min_gutter); + // Returns true if there is nothing in the given search_box that vertically + // overlaps target_box other than target_box itself. + bool NothingYOverlapsInBox(const TBOX& search_box, const TBOX& target_box); + + // Fills the list of TabVector with the tabstops found in the grid, + // and estimates the logical vertical direction. + void FindAllTabVectors(int min_gutter_width); + // Helper for FindAllTabVectors finds the vectors of a particular type. + int FindTabVectors(int search_size_multiple, + TabAlignment alignment, + int min_gutter_width, + TabVector_LIST* vectors, + int* vertical_x, int* vertical_y); + // Finds a vector corresponding to a tabstop running through the + // given box of the given alignment type. + // search_size_multiple is a multiple of height used to control + // the size of the search. + // vertical_x and y are updated with an estimate of the real + // vertical direction. (skew finding.) + // Returns nullptr if no decent tabstop can be found. + TabVector* FindTabVector(int search_size_multiple, int min_gutter_width, + TabAlignment alignment, + BLOBNBOX* bbox, + int* vertical_x, int* vertical_y); + + // Set the vertical_skew_ member from the given vector and refit + // all vectors parallel to the skew vector. + void SetVerticalSkewAndParallelize(int vertical_x, int vertical_y); + + // Sort all the current vectors using the vertical_skew_ vector. + void SortVectors(); + + // Evaluate all the current tab vectors. + void EvaluateTabs(); + + // Trace textlines from one side to the other of each tab vector, saving + // the most frequent column widths found in a list so that a given width + // can be tested for being a common width with a simple callback function. + void ComputeColumnWidths(ScrollView* tab_win, + ColPartitionGrid* part_grid); + + // Finds column width and: + // if col_widths is not null (pass1): + // pair-up tab vectors with existing ColPartitions and accumulate widths. + // else (pass2): + // find the largest real partition width for each recorded column width, + // to be used as the minimum acceptable width. + void ApplyPartitionsToColumnWidths(ColPartitionGrid* part_grid, + STATS* col_widths); + + // Helper makes the list of common column widths in column_widths_ from the + // input col_widths. Destroys the content of col_widths by repeatedly + // finding the mode and erasing the peak. + void MakeColumnWidths(int col_widths_size, STATS* col_widths); + + // Mark blobs as being in a vertical text line where that is the case. + void MarkVerticalText(); + + // Returns the median gutter width between pairs of matching tab vectors + // assuming they are sorted left-to-right. If there are too few data + // points (< kMinLinesInColumn), then 0 is returned. + int FindMedianGutterWidth(TabVector_LIST* tab_vectors); + + // Find the next adjacent (to left or right) blob on this text line, + // with the constraint that it must vertically significantly overlap + // the [top_y, bottom_y] range. + // If ignore_images is true, then blobs with aligned_text() < 0 are treated + // as if they do not exist. + BLOBNBOX* AdjacentBlob(const BLOBNBOX* bbox, + bool look_left, bool ignore_images, + double min_overlap_fraction, + int gap_limit, int top_y, int bottom_y); + + // Add a bi-directional partner relationship between the left + // and the right. If one (or both) of the vectors is a separator, + // extend a nearby extendable vector or create a new one of the + // correct type, using the given left or right blob as a guide. + void AddPartnerVector(BLOBNBOX* left_blob, BLOBNBOX* right_blob, + TabVector* left, TabVector* right); + + /** + * Remove separators and unused tabs from the main vectors_ list + * to the dead_vectors_ list. + */ + void CleanupTabs(); + + /** + * Deskew the tab vectors and blobs, computing the rotation and resetting + * the storked vertical_skew_. The deskew inverse is returned in reskew. + * Returns false if the detected skew angle is impossible. + */ + bool Deskew(TabVector_LIST* hlines, BLOBNBOX_LIST* image_blobs, + TO_BLOCK* block, FCOORD* deskew, FCOORD* reskew); + + // Compute the rotation required to deskew, and its inverse rotation. + void ComputeDeskewVectors(FCOORD* deskew, FCOORD* reskew); + + /** + * Compute and apply constraints to the end positions of TabVectors so + * that where possible partners end at the same y coordinate. + */ + void ApplyTabConstraints(); + + protected: + ICOORD vertical_skew_; ///< Estimate of true vertical in this image. + int resolution_; ///< Of source image in pixels per inch. + private: + ICOORD image_origin_; ///< Top-left of image in deskewed coords + TabVector_LIST vectors_; ///< List of rule line and tabstops. + TabVector_IT v_it_; ///< Iterator for searching vectors_. + TabVector_LIST dead_vectors_; ///< Separators and unpartnered tab vectors. + // List of commonly occurring width ranges with x=min and y=max. + ICOORDELT_LIST column_widths_; ///< List of commonly occurring width ranges. + /** Callback to test an int for being a common width. */ + WidthCallback width_cb_; + // Sets of bounding boxes that are candidate tab stops. + GenericVector<BLOBNBOX*> left_tab_boxes_; + GenericVector<BLOBNBOX*> right_tab_boxes_; +}; + +} // namespace tesseract. + +#endif // TESSERACT_TEXTORD_TABFIND_H_ diff --git a/tesseract/src/textord/tablefind.cpp b/tesseract/src/textord/tablefind.cpp new file mode 100644 index 00000000..6326b858 --- /dev/null +++ b/tesseract/src/textord/tablefind.cpp @@ -0,0 +1,2088 @@ +/////////////////////////////////////////////////////////////////////// +// File: tablefind.cpp +// Description: Helper classes to find tables from ColPartitions. +// Author: Faisal Shafait (faisal.shafait@dfki.de) +// +// (C) Copyright 2009, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#include "tablefind.h" +#include <algorithm> +#include <cmath> + +#include "allheaders.h" + +#include "colpartitionset.h" +#include "tablerecog.h" + +namespace tesseract { + +// These numbers are used to calculate the global median stats. +// They just set an upper bound on the stats objects. +// Maximum vertical spacing between neighbor partitions. +const int kMaxVerticalSpacing = 500; +// Maximum width of a blob in a partition. +const int kMaxBlobWidth = 500; + +// Minimum whitespace size to split a partition (measured as a multiple +// of a partition's median width). +const double kSplitPartitionSize = 2.0; +// To insert text, the partition must satisfy these size constraints +// in AllowTextPartition(). The idea is to filter noise partitions +// determined by the size compared to the global medians. +// TODO(nbeato): Need to find good numbers again. +const double kAllowTextHeight = 0.5; +const double kAllowTextWidth = 0.6; +const double kAllowTextArea = 0.8; +// The same thing applies to blobs (to filter noise). +// TODO(nbeato): These numbers are a shot in the dark... +// height and width are 0.5 * gridsize() in colfind.cpp +// area is a rough guess for the size of a period. +const double kAllowBlobHeight = 0.3; +const double kAllowBlobWidth = 0.4; +const double kAllowBlobArea = 0.05; + +// Minimum number of components in a text partition. A partition having fewer +// components than that is more likely a data partition and is a candidate +// table cell. +const int kMinBoxesInTextPartition = 10; + +// Maximum number of components that a data partition can have +const int kMaxBoxesInDataPartition = 20; + +// Maximum allowed gap in a text partitions as a multiple of its median size. +const double kMaxGapInTextPartition = 4.0; + +// Minimum value that the maximum gap in a text partition should have as a +// factor of its median size. +const double kMinMaxGapInTextPartition = 0.5; + +// The amount of overlap that is "normal" for adjacent blobs in a text +// partition. This is used to calculate gap between overlapping blobs. +const double kMaxBlobOverlapFactor = 4.0; + +// Maximum x-height a table partition can have as a multiple of global +// median x-height +const double kMaxTableCellXheight = 2.0; + +// Maximum line spacing between a table column header and column contents +// for merging the two (as a multiple of the partition's median_height). +const int kMaxColumnHeaderDistance = 4; + +// Minimum ratio of num_table_partitions to num_text_partitions in a column +// block to be called it a table column +const double kTableColumnThreshold = 3.0; + +// Search for horizontal ruling lines within the vertical margin as a +// multiple of grid size +// const int kRulingVerticalMargin = 3; + +// Minimum overlap that a colpartition must have with a table region +// to become part of that table +const double kMinOverlapWithTable = 0.6; + +// Maximum side space (distance from column boundary) that a typical +// text-line in flowing text should have as a multiple of its x-height +// (Median size). +const int kSideSpaceMargin = 10; + +// Fraction of the peak of x-projection of a table region to set the +// threshold for the x-projection histogram +const double kSmallTableProjectionThreshold = 0.35; +const double kLargeTableProjectionThreshold = 0.45; +// Minimum number of rows required to look for more rows in the projection. +const int kLargeTableRowCount = 6; + +// Minimum number of rows in a table +const int kMinRowsInTable = 3; + +// The amount of padding (multiplied by global_median_xheight_ during use) +// that is vertically added to the search adjacent leader search during +// ColPartition marking. +const int kAdjacentLeaderSearchPadding = 2; + +// Used when filtering false positives. When finding the last line +// of a paragraph (typically left-aligned), the previous line should have +// its center to the right of the last line by this scaled amount. +const double kParagraphEndingPreviousLineRatio = 1.3; + +// The maximum amount of whitespace allowed left of a paragraph ending. +// Do not filter a ColPartition with more than this space left of it. +const double kMaxParagraphEndingLeftSpaceMultiple = 3.0; + +// Used when filtering false positives. The last line of a paragraph +// should be preceded by a line that is predominantly text. This is the +// ratio of text to whitespace (to the right of the text) that is required +// for the previous line to be a text. +const double kMinParagraphEndingTextToWhitespaceRatio = 3.0; + +// When counting table columns, this is the required gap between two columns +// (it is multiplied by global_median_xheight_). +const double kMaxXProjectionGapFactor = 2.0; + +// Used for similarity in partitions using stroke width. Values copied +// from ColFind.cpp in Ray's CL. +const double kStrokeWidthFractionalTolerance = 0.25; +const double kStrokeWidthConstantTolerance = 2.0; + +#ifndef GRAPHICS_DISABLED +static BOOL_VAR(textord_show_tables, false, "Show table regions (ScrollView)"); +static BOOL_VAR(textord_tablefind_show_mark, false, + "Debug table marking steps in detail (ScrollView)"); +static BOOL_VAR(textord_tablefind_show_stats, false, + "Show page stats used in table finding (ScrollView)"); +#endif +static BOOL_VAR(textord_tablefind_recognize_tables, false, + "Enables the table recognizer for table layout and filtering."); + +ELISTIZE(ColSegment) +CLISTIZE(ColSegment) + +// Templated helper function used to create destructor callbacks for the +// BBGrid::ClearGridData() method. +template <typename T> void DeleteObject(T *object) { + delete object; +} + +TableFinder::TableFinder() + : resolution_(0), + global_median_xheight_(0), + global_median_blob_width_(0), + global_median_ledding_(0), + left_to_right_language_(true) { +} + +TableFinder::~TableFinder() { + // ColPartitions and ColSegments created by this class for storage in grids + // need to be deleted explicitly. + clean_part_grid_.ClearGridData(&DeleteObject<ColPartition>); + leader_and_ruling_grid_.ClearGridData(&DeleteObject<ColPartition>); + fragmented_text_grid_.ClearGridData(&DeleteObject<ColPartition>); + col_seg_grid_.ClearGridData(&DeleteObject<ColSegment>); + table_grid_.ClearGridData(&DeleteObject<ColSegment>); +} + +void TableFinder::set_left_to_right_language(bool order) { + left_to_right_language_ = order; +} + +void TableFinder::Init(int grid_size, const ICOORD& bottom_left, + const ICOORD& top_right) { + // Initialize clean partitions list and grid + clean_part_grid_.Init(grid_size, bottom_left, top_right); + leader_and_ruling_grid_.Init(grid_size, bottom_left, top_right); + fragmented_text_grid_.Init(grid_size, bottom_left, top_right); + col_seg_grid_.Init(grid_size, bottom_left, top_right); + table_grid_.Init(grid_size, bottom_left, top_right); +} + +// Copy cleaned partitions from part_grid_ to clean_part_grid_ and +// insert leaders and rulers into the leader_and_ruling_grid_ +void TableFinder::InsertCleanPartitions(ColPartitionGrid* grid, + TO_BLOCK* block) { + // Calculate stats. This lets us filter partitions in AllowTextPartition() + // and filter blobs in AllowBlob(). + SetGlobalSpacings(grid); + + // Iterate the ColPartitions in the grid. + ColPartitionGridSearch gsearch(grid); + gsearch.SetUniqueMode(true); + gsearch.StartFullSearch(); + ColPartition* part = nullptr; + while ((part = gsearch.NextFullSearch()) != nullptr) { + // Reject partitions with nothing useful inside of them. + if (part->blob_type() == BRT_NOISE || part->bounding_box().area() <= 0) + continue; + ColPartition* clean_part = part->ShallowCopy(); + ColPartition* leader_part = nullptr; + if (part->IsLineType()) { + InsertRulingPartition(clean_part); + continue; + } + // Insert all non-text partitions to clean_parts + if (!part->IsTextType()) { + InsertImagePartition(clean_part); + continue; + } + // Insert text colpartitions after removing noisy components from them + // The leaders are split into a separate grid. + BLOBNBOX_CLIST* part_boxes = part->boxes(); + BLOBNBOX_C_IT pit(part_boxes); + for (pit.mark_cycle_pt(); !pit.cycled_list(); pit.forward()) { + BLOBNBOX *pblob = pit.data(); + // Bad blobs... happens in UNLV set. + // news.3G1, page 17 (around x=6) + if (!AllowBlob(*pblob)) + continue; + if (pblob->flow() == BTFT_LEADER) { + if (leader_part == nullptr) { + leader_part = part->ShallowCopy(); + leader_part->set_flow(BTFT_LEADER); + } + leader_part->AddBox(pblob); + } else if (pblob->region_type() != BRT_NOISE) { + clean_part->AddBox(pblob); + } + } + clean_part->ComputeLimits(); + ColPartition* fragmented = clean_part->CopyButDontOwnBlobs(); + InsertTextPartition(clean_part); + SplitAndInsertFragmentedTextPartition(fragmented); + if (leader_part != nullptr) { + // TODO(nbeato): Note that ComputeLimits does not update the column + // information. So the leader may appear to span more columns than it + // really does later on when IsInSameColumnAs gets called to test + // for adjacent leaders. + leader_part->ComputeLimits(); + InsertLeaderPartition(leader_part); + } + } + + // Make the partition partners better for upper and lower neighbors. + clean_part_grid_.FindPartitionPartners(); + clean_part_grid_.RefinePartitionPartners(false); +} + +// High level function to perform table detection +void TableFinder::LocateTables(ColPartitionGrid* grid, + ColPartitionSet** all_columns, + WidthCallback width_cb, + const FCOORD& reskew) { + // initialize spacing, neighbors, and columns + InitializePartitions(all_columns); + +#ifndef GRAPHICS_DISABLED + if (textord_show_tables) { + ScrollView* table_win = MakeWindow(0, 300, "Column Partitions & Neighbors"); + DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE); + DisplayColPartitions(table_win, &leader_and_ruling_grid_, + ScrollView::AQUAMARINE); + DisplayColPartitionConnections(table_win, &clean_part_grid_, + ScrollView::ORANGE); + + table_win = MakeWindow(100, 300, "Fragmented Text"); + DisplayColPartitions(table_win, &fragmented_text_grid_, ScrollView::BLUE); + } +#endif // !GRAPHICS_DISABLED + + // mark, filter, and smooth candidate table partitions + MarkTablePartitions(); + + // Make single-column blocks from good_columns_ partitions. col_segments are + // moved to a grid later which takes the ownership + ColSegment_LIST column_blocks; + GetColumnBlocks(all_columns, &column_blocks); + // Set the ratio of candidate table partitions in each column + SetColumnsType(&column_blocks); + + // Move column segments to col_seg_grid_ + MoveColSegmentsToGrid(&column_blocks, &col_seg_grid_); + + // Detect split in column layout that might have occurred due to the + // presence of a table. In such a case, merge the corresponding columns. + GridMergeColumnBlocks(); + + // Group horizontally overlapping table partitions into table columns. + // table_columns created here get deleted at the end of this method. + ColSegment_LIST table_columns; + GetTableColumns(&table_columns); + + // Within each column, mark the range table regions occupy based on the + // table columns detected. table_regions are moved to a grid later which + // takes the ownership + ColSegment_LIST table_regions; + GetTableRegions(&table_columns, &table_regions); + +#ifndef GRAPHICS_DISABLED + if (textord_tablefind_show_mark) { + ScrollView* table_win = MakeWindow(1200, 300, "Table Columns and Regions"); + DisplayColSegments(table_win, &table_columns, ScrollView::DARK_TURQUOISE); + DisplayColSegments(table_win, &table_regions, ScrollView::YELLOW); + } +#endif // !GRAPHICS_DISABLED + + // Merge table regions across columns for tables spanning multiple + // columns + MoveColSegmentsToGrid(&table_regions, &table_grid_); + GridMergeTableRegions(); + + // Adjust table boundaries by including nearby horizontal lines and left + // out column headers + AdjustTableBoundaries(); + GridMergeTableRegions(); + + if (textord_tablefind_recognize_tables) { + // Remove false alarms consisting of a single column + DeleteSingleColumnTables(); + +#ifndef GRAPHICS_DISABLED + if (textord_show_tables) { + ScrollView* table_win = MakeWindow(1200, 300, "Detected Table Locations"); + DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE); + DisplayColSegments(table_win, &table_columns, ScrollView::KHAKI); + table_grid_.DisplayBoxes(table_win); + } +#endif // !GRAPHICS_DISABLED + + // Find table grid structure and reject tables that are malformed. + RecognizeTables(); + GridMergeTableRegions(); + RecognizeTables(); + +#ifndef GRAPHICS_DISABLED + if (textord_show_tables) { + ScrollView* table_win = MakeWindow(1400, 600, "Recognized Tables"); + DisplayColPartitions(table_win, &clean_part_grid_, + ScrollView::BLUE, ScrollView::BLUE); + table_grid_.DisplayBoxes(table_win); + } +#endif // !GRAPHICS_DISABLED + } else { + // Remove false alarms consisting of a single column + // TODO(nbeato): verify this is a NOP after structured table rejection. + // Right now it isn't. If the recognize function is doing what it is + // supposed to do, this function is obsolete. + DeleteSingleColumnTables(); + +#ifndef GRAPHICS_DISABLED + if (textord_show_tables) { + ScrollView* table_win = MakeWindow(1500, 300, "Detected Tables"); + DisplayColPartitions(table_win, &clean_part_grid_, + ScrollView::BLUE, ScrollView::BLUE); + table_grid_.DisplayBoxes(table_win); + } +#endif // !GRAPHICS_DISABLED + } + + // Merge all colpartitions in table regions to make them a single + // colpartition and revert types of isolated table cells not + // assigned to any table to their original types. + MakeTableBlocks(grid, all_columns, width_cb); +} +// All grids have the same dimensions. The clean_part_grid_ sizes are set from +// the part_grid_ that is passed to InsertCleanPartitions, which was the same as +// the grid that is the base of ColumnFinder. Just return the clean_part_grid_ +// dimensions instead of duplicated memory. +int TableFinder::gridsize() const { + return clean_part_grid_.gridsize(); +} +int TableFinder::gridwidth() const { + return clean_part_grid_.gridwidth(); +} +int TableFinder::gridheight() const { + return clean_part_grid_.gridheight(); +} +const ICOORD& TableFinder::bleft() const { + return clean_part_grid_.bleft(); +} +const ICOORD& TableFinder::tright() const { + return clean_part_grid_.tright(); +} + +void TableFinder::InsertTextPartition(ColPartition* part) { + ASSERT_HOST(part != nullptr); + if (AllowTextPartition(*part)) { + clean_part_grid_.InsertBBox(true, true, part); + } else { + delete part; + } +} +void TableFinder::InsertFragmentedTextPartition(ColPartition* part) { + ASSERT_HOST(part != nullptr); + if (AllowTextPartition(*part)) { + fragmented_text_grid_.InsertBBox(true, true, part); + } else { + delete part; + } +} +void TableFinder::InsertLeaderPartition(ColPartition* part) { + ASSERT_HOST(part != nullptr); + if (!part->IsEmpty() && part->bounding_box().area() > 0) { + leader_and_ruling_grid_.InsertBBox(true, true, part); + } else { + delete part; + } +} +void TableFinder::InsertRulingPartition(ColPartition* part) { + leader_and_ruling_grid_.InsertBBox(true, true, part); +} +void TableFinder::InsertImagePartition(ColPartition* part) { + // NOTE: If images are placed into a different grid in the future, + // the function SetPartitionSpacings needs to be updated. It should + // be the only thing that cares about image partitions. + clean_part_grid_.InsertBBox(true, true, part); +} + +// Splits a partition into its "words". The splits happen +// at locations with wide inter-blob spacing. This is useful +// because it allows the table recognize to "cut through" the +// text lines on the page. The assumption is that a table +// will have several lines with similar overlapping whitespace +// whereas text will not have this type of property. +// Note: The code Assumes that blobs are sorted by the left side x! +// This will not work (as well) if the blobs are sorted by center/right. +void TableFinder::SplitAndInsertFragmentedTextPartition(ColPartition* part) { + ASSERT_HOST(part != nullptr); + // Bye bye empty partitions! + if (part->boxes()->empty()) { + delete part; + return; + } + + // The AllowBlob function prevents this. + ASSERT_HOST(part->median_width() > 0); + const double kThreshold = part->median_width() * kSplitPartitionSize; + + ColPartition* right_part = part; + bool found_split = true; + while (found_split) { + found_split = false; + BLOBNBOX_C_IT box_it(right_part->boxes()); + // Blobs are sorted left side first. If blobs overlap, + // the previous blob may have a "more right" right side. + // Account for this by always keeping the largest "right" + // so far. + int previous_right = INT32_MIN; + + // Look for the next split in the partition. + for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) { + const TBOX& box = box_it.data()->bounding_box(); + if (previous_right != INT32_MIN && + box.left() - previous_right > kThreshold) { + // We have a split position. Split the partition in two pieces. + // Insert the left piece in the grid and keep processing the right. + int mid_x = (box.left() + previous_right) / 2; + ColPartition* left_part = right_part; + right_part = left_part->SplitAt(mid_x); + + InsertFragmentedTextPartition(left_part); + found_split = true; + break; + } + + // The right side of the previous blobs. + previous_right = std::max(previous_right, static_cast<int>(box.right())); + } + } + // When a split is not found, the right part is minimized + // as much as possible, so process it. + InsertFragmentedTextPartition(right_part); +} + +// Some simple criteria to filter out now. We want to make sure the +// average blob size in the partition is consistent with the +// global page stats. +// The area metric will almost always pass for multi-blob partitions. +// It is useful when filtering out noise caused by an isolated blob. +bool TableFinder::AllowTextPartition(const ColPartition& part) const { + const double kHeightRequired = global_median_xheight_ * kAllowTextHeight; + const double kWidthRequired = global_median_blob_width_ * kAllowTextWidth; + const int median_area = global_median_xheight_ * global_median_blob_width_; + const double kAreaPerBlobRequired = median_area * kAllowTextArea; + // Keep comparisons strictly greater to disallow 0! + return part.median_height() > kHeightRequired && + part.median_width() > kWidthRequired && + part.bounding_box().area() > kAreaPerBlobRequired * part.boxes_count(); +} + +// Same as above, applied to blobs. Keep in mind that +// leaders, commas, and periods are important in tables. +bool TableFinder::AllowBlob(const BLOBNBOX& blob) const { + const TBOX& box = blob.bounding_box(); + const double kHeightRequired = global_median_xheight_ * kAllowBlobHeight; + const double kWidthRequired = global_median_blob_width_ * kAllowBlobWidth; + const int median_area = global_median_xheight_ * global_median_blob_width_; + const double kAreaRequired = median_area * kAllowBlobArea; + // Keep comparisons strictly greater to disallow 0! + return box.height() > kHeightRequired && + box.width() > kWidthRequired && + box.area() > kAreaRequired; +} + +// TODO(nbeato): The grid that makes the window doesn't seem to matter. +// The only downside is that window messages will be caught by +// clean_part_grid_ instead of a useful object. This is a temporary solution +// for the debug windows created by the TableFinder. +#ifndef GRAPHICS_DISABLED +ScrollView* TableFinder::MakeWindow(int x, int y, const char* window_name) { + return clean_part_grid_.MakeWindow(x, y, window_name); +} +#endif + +// Make single-column blocks from good_columns_ partitions. +void TableFinder::GetColumnBlocks(ColPartitionSet** all_columns, + ColSegment_LIST* column_blocks) { + for (int i = 0; i < gridheight(); ++i) { + ColPartitionSet* columns = all_columns[i]; + if (columns != nullptr) { + ColSegment_LIST new_blocks; + // Get boxes from the current vertical position on the grid + columns->GetColumnBoxes(i * gridsize(), (i+1) * gridsize(), &new_blocks); + // Merge the new_blocks boxes into column_blocks if they are well-aligned + GroupColumnBlocks(&new_blocks, column_blocks); + } + } +} + +// Merge column segments into the current list if they are well aligned. +void TableFinder::GroupColumnBlocks(ColSegment_LIST* new_blocks, + ColSegment_LIST* column_blocks) { + ColSegment_IT src_it(new_blocks); + ColSegment_IT dest_it(column_blocks); + // iterate through the source list + for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) { + ColSegment* src_seg = src_it.data(); + const TBOX& src_box = src_seg->bounding_box(); + bool match_found = false; + // iterate through the destination list to find a matching column block + for (dest_it.mark_cycle_pt(); !dest_it.cycled_list(); dest_it.forward()) { + ColSegment* dest_seg = dest_it.data(); + TBOX dest_box = dest_seg->bounding_box(); + if (ConsecutiveBoxes(src_box, dest_box)) { + // If matching block is found, insert the current block into it + // and delete the source block. + dest_seg->InsertBox(src_box); + match_found = true; + delete src_it.extract(); + break; + } + } + // If no match is found, just append the source block to column_blocks + if (!match_found) { + dest_it.add_after_then_move(src_it.extract()); + } + } +} + +// are the two boxes immediate neighbors along the vertical direction +bool TableFinder::ConsecutiveBoxes(const TBOX &b1, const TBOX &b2) { + int x_margin = 20; + int y_margin = 5; + return (abs(b1.left() - b2.left()) < x_margin) && + (abs(b1.right() - b2.right()) < x_margin) && + (abs(b1.top()-b2.bottom()) < y_margin || + abs(b2.top()-b1.bottom()) < y_margin); +} + +// Set up info for clean_part_grid_ partitions to be valid during detection +// code. +void TableFinder::InitializePartitions(ColPartitionSet** all_columns) { + FindNeighbors(); + SetPartitionSpacings(&clean_part_grid_, all_columns); + SetGlobalSpacings(&clean_part_grid_); +} + +// Set left, right and top, bottom spacings of each colpartition. +void TableFinder::SetPartitionSpacings(ColPartitionGrid* grid, + ColPartitionSet** all_columns) { + // Iterate the ColPartitions in the grid. + ColPartitionGridSearch gsearch(grid); + gsearch.StartFullSearch(); + ColPartition* part = nullptr; + while ((part = gsearch.NextFullSearch()) != nullptr) { + ColPartitionSet* columns = all_columns[gsearch.GridY()]; + TBOX box = part->bounding_box(); + int y = part->MidY(); + ColPartition* left_column = columns->ColumnContaining(box.left(), y); + ColPartition* right_column = columns->ColumnContaining(box.right(), y); + // set distance from left column as space to the left + if (left_column) { + int left_space = std::max(0, box.left() - left_column->LeftAtY(y)); + part->set_space_to_left(left_space); + } + // set distance from right column as space to the right + if (right_column) { + int right_space = std::max(0, right_column->RightAtY(y) - box.right()); + part->set_space_to_right(right_space); + } + + // Look for images that may be closer. + // NOTE: used to be part_grid_, might cause issues now + ColPartitionGridSearch hsearch(grid); + hsearch.StartSideSearch(box.left(), box.bottom(), box.top()); + ColPartition* neighbor = nullptr; + while ((neighbor = hsearch.NextSideSearch(true)) != nullptr) { + if (neighbor->type() == PT_PULLOUT_IMAGE || + neighbor->type() == PT_FLOWING_IMAGE || + neighbor->type() == PT_HEADING_IMAGE) { + int right = neighbor->bounding_box().right(); + if (right < box.left()) { + int space = std::min(box.left() - right, part->space_to_left()); + part->set_space_to_left(space); + } + } + } + hsearch.StartSideSearch(box.left(), box.bottom(), box.top()); + neighbor = nullptr; + while ((neighbor = hsearch.NextSideSearch(false)) != nullptr) { + if (neighbor->type() == PT_PULLOUT_IMAGE || + neighbor->type() == PT_FLOWING_IMAGE || + neighbor->type() == PT_HEADING_IMAGE) { + int left = neighbor->bounding_box().left(); + if (left > box.right()) { + int space = std::min(left - box.right(), part->space_to_right()); + part->set_space_to_right(space); + } + } + } + + ColPartition* upper_part = part->SingletonPartner(true); + if (upper_part) { + int space = std::max(0, static_cast<int>(upper_part->bounding_box().bottom() - + part->bounding_box().bottom())); + part->set_space_above(space); + } else { + // TODO(nbeato): What constitutes a good value? + // 0 is the default value when not set, explicitly noting it needs to + // be something else. + part->set_space_above(INT32_MAX); + } + + ColPartition* lower_part = part->SingletonPartner(false); + if (lower_part) { + int space = std::max(0, static_cast<int>(part->bounding_box().bottom() - + lower_part->bounding_box().bottom())); + part->set_space_below(space); + } else { + // TODO(nbeato): What constitutes a good value? + // 0 is the default value when not set, explicitly noting it needs to + // be something else. + part->set_space_below(INT32_MAX); + } + } +} + +// Set spacing and closest neighbors above and below a given colpartition. +void TableFinder::SetVerticalSpacing(ColPartition* part) { + TBOX box = part->bounding_box(); + int top_range = std::min(box.top() + kMaxVerticalSpacing, static_cast<int>(tright().y())); + int bottom_range = std::max(box.bottom() - kMaxVerticalSpacing, static_cast<int>(bleft().y())); + box.set_top(top_range); + box.set_bottom(bottom_range); + + TBOX part_box = part->bounding_box(); + // Start a rect search + GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> + rectsearch(&clean_part_grid_); + rectsearch.StartRectSearch(box); + ColPartition* neighbor; + int min_space_above = kMaxVerticalSpacing; + int min_space_below = kMaxVerticalSpacing; + ColPartition* above_neighbor = nullptr; + ColPartition* below_neighbor = nullptr; + while ((neighbor = rectsearch.NextRectSearch()) != nullptr) { + if (neighbor == part) + continue; + TBOX neighbor_box = neighbor->bounding_box(); + if (neighbor_box.major_x_overlap(part_box)) { + int gap = abs(part->median_bottom() - neighbor->median_bottom()); + // If neighbor is below current partition + if (neighbor_box.top() < part_box.bottom() && + gap < min_space_below) { + min_space_below = gap; + below_neighbor = neighbor; + } // If neighbor is above current partition + else if (part_box.top() < neighbor_box.bottom() && + gap < min_space_above) { + min_space_above = gap; + above_neighbor = neighbor; + } + } + } + part->set_space_above(min_space_above); + part->set_space_below(min_space_below); + part->set_nearest_neighbor_above(above_neighbor); + part->set_nearest_neighbor_below(below_neighbor); +} + +// Set global spacing and x-height estimates +void TableFinder::SetGlobalSpacings(ColPartitionGrid* grid) { + STATS xheight_stats(0, kMaxVerticalSpacing + 1); + STATS width_stats(0, kMaxBlobWidth + 1); + STATS ledding_stats(0, kMaxVerticalSpacing + 1); + // Iterate the ColPartitions in the grid. + ColPartitionGridSearch gsearch(grid); + gsearch.SetUniqueMode(true); + gsearch.StartFullSearch(); + ColPartition* part = nullptr; + while ((part = gsearch.NextFullSearch()) != nullptr) { + // TODO(nbeato): HACK HACK HACK! medians are equal to partition length. + // ComputeLimits needs to get called somewhere outside of TableFinder + // to make sure the partitions are properly initialized. + // When this is called, SmoothPartitionPartners dies in an assert after + // table find runs. Alternative solution. + // part->ComputeLimits(); + if (part->IsTextType()) { + // xheight_stats.add(part->median_height(), part->boxes_count()); + // width_stats.add(part->median_width(), part->boxes_count()); + + // This loop can be removed when above issues are fixed. + // Replace it with the 2 lines commented out above. + BLOBNBOX_C_IT it(part->boxes()); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + xheight_stats.add(it.data()->bounding_box().height(), 1); + width_stats.add(it.data()->bounding_box().width(), 1); + } + + ledding_stats.add(part->space_above(), 1); + ledding_stats.add(part->space_below(), 1); + } + } + // Set estimates based on median of statistics obtained + set_global_median_xheight(static_cast<int>(xheight_stats.median() + 0.5)); + set_global_median_blob_width(static_cast<int>(width_stats.median() + 0.5)); + set_global_median_ledding(static_cast<int>(ledding_stats.median() + 0.5)); + #ifndef GRAPHICS_DISABLED + if (textord_tablefind_show_stats) { + const char* kWindowName = "X-height (R), X-width (G), and ledding (B)"; + ScrollView* stats_win = MakeWindow(500, 10, kWindowName); + xheight_stats.plot(stats_win, 10, 200, 2, 15, ScrollView::RED); + width_stats.plot(stats_win, 10, 200, 2, 15, ScrollView::GREEN); + ledding_stats.plot(stats_win, 10, 200, 2, 15, ScrollView::BLUE); + } + #endif // !GRAPHICS_DISABLED +} + +void TableFinder::set_global_median_xheight(int xheight) { + global_median_xheight_ = xheight; +} +void TableFinder::set_global_median_blob_width(int width) { + global_median_blob_width_ = width; +} +void TableFinder::set_global_median_ledding(int ledding) { + global_median_ledding_ = ledding; +} + +void TableFinder::FindNeighbors() { + ColPartitionGridSearch gsearch(&clean_part_grid_); + gsearch.StartFullSearch(); + ColPartition* part = nullptr; + while ((part = gsearch.NextFullSearch()) != nullptr) { + // TODO(nbeato): Rename this function, meaning is different now. + // IT is finding nearest neighbors its own way + //SetVerticalSpacing(part); + + ColPartition* upper = part->SingletonPartner(true); + if (upper) + part->set_nearest_neighbor_above(upper); + + ColPartition* lower = part->SingletonPartner(false); + if (lower) + part->set_nearest_neighbor_below(lower); + } +} + +// High level interface. Input is an unmarked ColPartitionGrid +// (namely, clean_part_grid_). Partitions are identified using local +// information and filter/smoothed. The function exit should contain +// a good sampling of the table partitions. +void TableFinder::MarkTablePartitions() { + MarkPartitionsUsingLocalInformation(); +#ifndef GRAPHICS_DISABLED + if (textord_tablefind_show_mark) { + ScrollView* table_win = MakeWindow(300, 300, "Initial Table Partitions"); + DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE); + DisplayColPartitions(table_win, &leader_and_ruling_grid_, + ScrollView::AQUAMARINE); + } +#endif + FilterFalseAlarms(); +#ifndef GRAPHICS_DISABLED + if (textord_tablefind_show_mark) { + ScrollView* table_win = MakeWindow(600, 300, "Filtered Table Partitions"); + DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE); + DisplayColPartitions(table_win, &leader_and_ruling_grid_, + ScrollView::AQUAMARINE); + } +#endif + SmoothTablePartitionRuns(); +#ifndef GRAPHICS_DISABLED + if (textord_tablefind_show_mark) { + ScrollView* table_win = MakeWindow(900, 300, "Smoothed Table Partitions"); + DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE); + DisplayColPartitions(table_win, &leader_and_ruling_grid_, + ScrollView::AQUAMARINE); + } +#endif + FilterFalseAlarms(); +#ifndef GRAPHICS_DISABLED + if (textord_tablefind_show_mark || textord_show_tables) { + ScrollView* table_win = MakeWindow(900, 300, "Final Table Partitions"); + DisplayColPartitions(table_win, &clean_part_grid_, ScrollView::BLUE); + DisplayColPartitions(table_win, &leader_and_ruling_grid_, + ScrollView::AQUAMARINE); + } +#endif +} + +// These types of partitions are marked as table partitions: +// 1- Partitions that have at lease one large gap between words +// 2- Partitions that consist of only one word (no significant gap +// between components) +// 3- Partitions that vertically overlap with other partitions within the +// same column. +// 4- Partitions with leaders before/after them. +void TableFinder::MarkPartitionsUsingLocalInformation() { + // Iterate the ColPartitions in the grid. + GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> + gsearch(&clean_part_grid_); + gsearch.StartFullSearch(); + ColPartition* part = nullptr; + while ((part = gsearch.NextFullSearch()) != nullptr) { + if (!part->IsTextType()) // Only consider text partitions + continue; + // Only consider partitions in dominant font size or smaller + if (part->median_height() > kMaxTableCellXheight * global_median_xheight_) + continue; + // Mark partitions with a large gap, or no significant gap as + // table partitions. + // Comments: It produces several false alarms at: + // - last line of a paragraph (fixed) + // - single word section headings + // - page headers and footers + // - numbered equations + // - line drawing regions + // TODO(faisal): detect and fix above-mentioned cases + if (HasWideOrNoInterWordGap(part) || + HasLeaderAdjacent(*part)) { + part->set_table_type(); + } + } +} + +// Check if the partition has at least one large gap between words or no +// significant gap at all +bool TableFinder::HasWideOrNoInterWordGap(ColPartition* part) const { + // Should only get text partitions. + ASSERT_HOST(part->IsTextType()); + // Blob access + BLOBNBOX_CLIST* part_boxes = part->boxes(); + BLOBNBOX_C_IT it(part_boxes); + // Check if this is a relatively small partition (such as a single word) + if (part->bounding_box().width() < + kMinBoxesInTextPartition * part->median_height() && + part_boxes->length() < kMinBoxesInTextPartition) + return true; + + // Variables used to compute inter-blob spacing. + int current_x0 = -1; + int current_x1 = -1; + int previous_x1 = -1; + // Stores the maximum gap detected. + int largest_partition_gap_found = -1; + // Text partition gap limits. If this is text (and not a table), + // there should be at least one gap larger than min_gap and no gap + // larger than max_gap. + const double max_gap = kMaxGapInTextPartition * part->median_height(); + const double min_gap = kMinMaxGapInTextPartition * part->median_height(); + + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + BLOBNBOX* blob = it.data(); + current_x0 = blob->bounding_box().left(); + current_x1 = blob->bounding_box().right(); + if (previous_x1 != -1) { + int gap = current_x0 - previous_x1; + + // TODO(nbeato): Boxes may overlap? Huh? + // For example, mag.3B 8003_033.3B.tif in UNLV data. The titles/authors + // on the top right of the page are filtered out with this line. + // Note 2: Iterating over blobs in a partition, so we are looking for + // spacing between the words. + if (gap < 0) { + // More likely case, the blobs slightly overlap. This can happen + // with diacritics (accents) or broken alphabet symbols (characters). + // Merge boxes together by taking max of right sides. + if (-gap < part->median_height() * kMaxBlobOverlapFactor) { + previous_x1 = std::max(previous_x1, current_x1); + continue; + } + // Extreme case, blobs overlap significantly in the same partition... + // This should not happen often (if at all), but it does. + // TODO(nbeato): investigate cases when this happens. + else { + // The behavior before was to completely ignore this case. + } + } + + // If a large enough gap is found, mark it as a table cell (return true) + if (gap > max_gap) + return true; + if (gap > largest_partition_gap_found) + largest_partition_gap_found = gap; + } + previous_x1 = current_x1; + } + // Since no large gap was found, return false if the partition is too + // long to be a data cell + if (part->bounding_box().width() > + kMaxBoxesInDataPartition * part->median_height() || + part_boxes->length() > kMaxBoxesInDataPartition) + return false; + + // A partition may be a single blob. In this case, it's an isolated symbol + // or non-text (such as a ruling or image). + // Detect these as table partitions? Shouldn't this be case by case? + // The behavior before was to ignore this, making max_partition_gap < 0 + // and implicitly return true. Just making it explicit. + if (largest_partition_gap_found == -1) + return true; + + // return true if the maximum gap found is smaller than the minimum allowed + // max_gap in a text partition. This indicates that there is no significant + // space in the partition, hence it is likely a single word. + return largest_partition_gap_found < min_gap; +} + +// A criteria for possible tables is that a table may have leaders +// between data cells. An aggressive solution to find such tables is to +// explicitly mark partitions that have adjacent leaders. +// Note that this includes overlapping leaders. However, it does not +// include leaders in different columns on the page. +// Possible false-positive will include lists, such as a table of contents. +// As these arise, the aggressive nature of this search may need to be +// trimmed down. +bool TableFinder::HasLeaderAdjacent(const ColPartition& part) { + if (part.flow() == BTFT_LEADER) + return true; + // Search range is left and right bounded by an offset of the + // median xheight. This offset is to allow some tolerance to the + // the leaders on the page in the event that the alignment is still + // a bit off. + const TBOX& box = part.bounding_box(); + const int search_size = kAdjacentLeaderSearchPadding * global_median_xheight_; + const int top = box.top() + search_size; + const int bottom = box.bottom() - search_size; + ColPartitionGridSearch hsearch(&leader_and_ruling_grid_); + for (int direction = 0; direction < 2; ++direction) { + bool right_to_left = (direction == 0); + int x = right_to_left ? box.right() : box.left(); + hsearch.StartSideSearch(x, bottom, top); + ColPartition* leader = nullptr; + while ((leader = hsearch.NextSideSearch(right_to_left)) != nullptr) { + // The leader could be a horizontal ruling in the grid. + // Make sure it is actually a leader. + if (leader->flow() != BTFT_LEADER) + continue; + // This should not happen, they are in different grids. + ASSERT_HOST(&part != leader); + // Make sure the leader shares a page column with the partition, + // otherwise we are spreading across columns. + if (!part.IsInSameColumnAs(*leader)) + break; + // There should be a significant vertical overlap + if (!leader->VSignificantCoreOverlap(part)) + continue; + // Leader passed all tests, so it is adjacent. + return true; + } + } + // No leaders are adjacent to the given partition. + return false; +} + +// Filter individual text partitions marked as table partitions +// consisting of paragraph endings, small section headings, and +// headers and footers. +void TableFinder::FilterFalseAlarms() { + FilterParagraphEndings(); + FilterHeaderAndFooter(); + // TODO(nbeato): Fully justified text as non-table? +} + +void TableFinder::FilterParagraphEndings() { + // Detect last line of paragraph + // Iterate the ColPartitions in the grid. + ColPartitionGridSearch gsearch(&clean_part_grid_); + gsearch.StartFullSearch(); + ColPartition* part = nullptr; + while ((part = gsearch.NextFullSearch()) != nullptr) { + if (part->type() != PT_TABLE) + continue; // Consider only table partitions + + // Paragraph ending should have flowing text above it. + ColPartition* upper_part = part->nearest_neighbor_above(); + if (!upper_part) + continue; + if (upper_part->type() != PT_FLOWING_TEXT) + continue; + if (upper_part->bounding_box().width() < + 2 * part->bounding_box().width()) + continue; + // Check if its the last line of a paragraph. + // In most cases, a paragraph ending should be left-aligned to text line + // above it. Sometimes, it could be a 2 line paragraph, in which case + // the line above it is indented. + // To account for that, check if the partition center is to + // the left of the one above it. + int mid = (part->bounding_box().left() + part->bounding_box().right()) / 2; + int upper_mid = (upper_part->bounding_box().left() + + upper_part->bounding_box().right()) / 2; + int current_spacing = 0; // spacing of the current line to margin + int upper_spacing = 0; // spacing of the previous line to the margin + if (left_to_right_language_) { + // Left to right languages, use mid - left to figure out the distance + // the middle is from the left margin. + int left = std::min(part->bounding_box().left(), + upper_part->bounding_box().left()); + current_spacing = mid - left; + upper_spacing = upper_mid - left; + } else { + // Right to left languages, use right - mid to figure out the distance + // the middle is from the right margin. + int right = std::max(part->bounding_box().right(), + upper_part->bounding_box().right()); + current_spacing = right - mid; + upper_spacing = right - upper_mid; + } + if (current_spacing * kParagraphEndingPreviousLineRatio > upper_spacing) + continue; + + // Paragraphs should have similar fonts. + if (!part->MatchingSizes(*upper_part) || + !part->MatchingStrokeWidth(*upper_part, kStrokeWidthFractionalTolerance, + kStrokeWidthConstantTolerance)) { + continue; + } + + // The last line of a paragraph should be left aligned. + // TODO(nbeato): This would be untrue if the text was right aligned. + // How often is that? + if (part->space_to_left() > + kMaxParagraphEndingLeftSpaceMultiple * part->median_height()) + continue; + // The line above it should be right aligned (assuming justified format). + // Since we can't assume justified text, we compare whitespace to text. + // The above line should have majority spanning text (or the current + // line could have fit on the previous line). So compare + // whitespace to text. + if (upper_part->bounding_box().width() < + kMinParagraphEndingTextToWhitespaceRatio * upper_part->space_to_right()) + continue; + + // Ledding above the line should be less than ledding below + if (part->space_above() >= part->space_below() || + part->space_above() > 2 * global_median_ledding_) + continue; + + // If all checks failed, it is probably text. + part->clear_table_type(); + } +} + +void TableFinder::FilterHeaderAndFooter() { + // Consider top-most text colpartition as header and bottom most as footer + ColPartition* header = nullptr; + ColPartition* footer = nullptr; + int max_top = INT32_MIN; + int min_bottom = INT32_MAX; + ColPartitionGridSearch gsearch(&clean_part_grid_); + gsearch.StartFullSearch(); + ColPartition* part = nullptr; + while ((part = gsearch.NextFullSearch()) != nullptr) { + if (!part->IsTextType()) + continue; // Consider only text partitions + int top = part->bounding_box().top(); + int bottom = part->bounding_box().bottom(); + if (top > max_top) { + max_top = top; + header = part; + } + if (bottom < min_bottom) { + min_bottom = bottom; + footer = part; + } + } + if (header) + header->clear_table_type(); + if (footer) + footer->clear_table_type(); +} + +// Mark all ColPartitions as table cells that have a table cell above +// and below them +// TODO(faisal): This is too aggressive at the moment. The method needs to +// consider spacing and alignment as well. Detection of false alarm table cells +// should also be done as part of it. +void TableFinder::SmoothTablePartitionRuns() { + // Iterate the ColPartitions in the grid. + ColPartitionGridSearch gsearch(&clean_part_grid_); + gsearch.StartFullSearch(); + ColPartition* part = nullptr; + while ((part = gsearch.NextFullSearch()) != nullptr) { + if (part->type() >= PT_TABLE || part->type() == PT_UNKNOWN) + continue; // Consider only text partitions + ColPartition* upper_part = part->nearest_neighbor_above(); + ColPartition* lower_part = part->nearest_neighbor_below(); + if (!upper_part || !lower_part) + continue; + if (upper_part->type() == PT_TABLE && lower_part->type() == PT_TABLE) + part->set_table_type(); + } + + // Pass 2, do the opposite. If both the upper and lower neighbors + // exist and are not tables, this probably shouldn't be a table. + gsearch.StartFullSearch(); + part = nullptr; + while ((part = gsearch.NextFullSearch()) != nullptr) { + if (part->type() != PT_TABLE) + continue; // Consider only text partitions + ColPartition* upper_part = part->nearest_neighbor_above(); + ColPartition* lower_part = part->nearest_neighbor_below(); + + // table can't be by itself + if ((upper_part && upper_part->type() != PT_TABLE) && + (lower_part && lower_part->type() != PT_TABLE)) { + part->clear_table_type(); + } + } +} + +// Set the type of a column segment based on the ratio of table to text cells +void TableFinder::SetColumnsType(ColSegment_LIST* column_blocks) { + ColSegment_IT it(column_blocks); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + ColSegment* seg = it.data(); + TBOX box = seg->bounding_box(); + int num_table_cells = 0; + int num_text_cells = 0; + GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> + rsearch(&clean_part_grid_); + rsearch.SetUniqueMode(true); + rsearch.StartRectSearch(box); + ColPartition* part = nullptr; + while ((part = rsearch.NextRectSearch()) != nullptr) { + if (part->type() == PT_TABLE) { + num_table_cells++; + } else if (part->type() == PT_FLOWING_TEXT) { + num_text_cells++; + } + } + // If a column block has no text or table partition in it, it is not needed + // for table detection. + if (!num_table_cells && !num_text_cells) { + delete it.extract(); + } else { + seg->set_num_table_cells(num_table_cells); + seg->set_num_text_cells(num_text_cells); + // set column type based on the ratio of table to text cells + seg->set_type(); + } + } +} + +// Move column blocks to grid +void TableFinder::MoveColSegmentsToGrid(ColSegment_LIST *segments, + ColSegmentGrid *col_seg_grid) { + ColSegment_IT it(segments); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + ColSegment* seg = it.extract(); + col_seg_grid->InsertBBox(true, true, seg); + } +} + +// Merge column blocks if a split is detected due to the presence of a +// table. A text block is considered split if it has multiple +// neighboring blocks above/below it, and at least one of the +// neighboring blocks is of table type (has a high density of table +// partitions). In this case neighboring blocks in the direction +// (above/below) of the table block are merged with the text block. + +// Comment: This method does not handle split due to a full page table +// since table columns in this case do not have a text column on which +// split decision can be based. +void TableFinder::GridMergeColumnBlocks() { + int margin = gridsize(); + + // Iterate the Column Blocks in the grid. + GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> + gsearch(&col_seg_grid_); + gsearch.StartFullSearch(); + ColSegment* seg; + while ((seg = gsearch.NextFullSearch()) != nullptr) { + if (seg->type() != COL_TEXT) + continue; // only consider text blocks for split detection + bool neighbor_found = false; + bool modified = false; // Modified at least once + // keep expanding current box as long as neighboring table columns + // are found above or below it. + do { + TBOX box = seg->bounding_box(); + // slightly expand the search region vertically + int top_range = std::min(box.top() + margin, static_cast<int>(tright().y())); + int bottom_range = std::max(box.bottom() - margin, static_cast<int>(bleft().y())); + box.set_top(top_range); + box.set_bottom(bottom_range); + neighbor_found = false; + GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> + rectsearch(&col_seg_grid_); + rectsearch.StartRectSearch(box); + ColSegment* neighbor = nullptr; + while ((neighbor = rectsearch.NextRectSearch()) != nullptr) { + if (neighbor == seg) + continue; + const TBOX& neighbor_box = neighbor->bounding_box(); + // If the neighbor box significantly overlaps with the current + // box (due to the expansion of the current box in the + // previous iteration of this loop), remove the neighbor box + // and expand the current box to include it. + if (neighbor_box.overlap_fraction(box) >= 0.9) { + seg->InsertBox(neighbor_box); + modified = true; + rectsearch.RemoveBBox(); + gsearch.RepositionIterator(); + delete neighbor; + continue; + } + // Only expand if the neighbor box is of table type + if (neighbor->type() != COL_TABLE) + continue; + // Insert the neighbor box into the current column block + if (neighbor_box.major_x_overlap(box) && + !box.contains(neighbor_box)) { + seg->InsertBox(neighbor_box); + neighbor_found = true; + modified = true; + rectsearch.RemoveBBox(); + gsearch.RepositionIterator(); + delete neighbor; + } + } + } while (neighbor_found); + if (modified) { + // Because the box has changed, it has to be removed first. + gsearch.RemoveBBox(); + col_seg_grid_.InsertBBox(true, true, seg); + gsearch.RepositionIterator(); + } + } +} + +// Group horizontally overlapping table partitions into table columns. +// TODO(faisal): This is too aggressive at the moment. The method should +// consider more attributes to group table partitions together. Some common +// errors are: +// 1- page number is merged with a table column above it even +// if there is a large vertical gap between them. +// 2- column headers go on to catch one of the columns arbitrarily +// 3- an isolated noise blob near page top or bottom merges with the table +// column below/above it +// 4- cells from two vertically adjacent tables merge together to make a +// single column resulting in merging of the two tables +void TableFinder::GetTableColumns(ColSegment_LIST *table_columns) { + ColSegment_IT it(table_columns); + // Iterate the ColPartitions in the grid. + GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> + gsearch(&clean_part_grid_); + gsearch.StartFullSearch(); + ColPartition* part; + while ((part = gsearch.NextFullSearch()) != nullptr) { + if (part->inside_table_column() || part->type() != PT_TABLE) + continue; // prevent a partition to be assigned to multiple columns + const TBOX& box = part->bounding_box(); + auto* col = new ColSegment(); + col->InsertBox(box); + part->set_inside_table_column(true); + // Start a search below the current cell to find bottom neighbours + // Note: a full search will always process things above it first, so + // this should be starting at the highest cell and working its way down. + GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> + vsearch(&clean_part_grid_); + vsearch.StartVerticalSearch(box.left(), box.right(), box.bottom()); + ColPartition* neighbor = nullptr; + bool found_neighbours = false; + while ((neighbor = vsearch.NextVerticalSearch(true)) != nullptr) { + // only consider neighbors not assigned to any column yet + if (neighbor->inside_table_column()) + continue; + // Horizontal lines should not break the flow + if (neighbor->IsHorizontalLine()) + continue; + // presence of a non-table neighbor marks the end of current + // table column + if (neighbor->type() != PT_TABLE) + break; + // add the neighbor partition to the table column + const TBOX& neighbor_box = neighbor->bounding_box(); + col->InsertBox(neighbor_box); + neighbor->set_inside_table_column(true); + found_neighbours = true; + } + if (found_neighbours) { + it.add_after_then_move(col); + } else { + part->set_inside_table_column(false); + delete col; + } + } +} + +// Mark regions in a column that are x-bounded by the column boundaries and +// y-bounded by the table columns' projection on the y-axis as table regions +void TableFinder::GetTableRegions(ColSegment_LIST* table_columns, + ColSegment_LIST* table_regions) { + ColSegment_IT cit(table_columns); + ColSegment_IT rit(table_regions); + // Iterate through column blocks + GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> + gsearch(&col_seg_grid_); + gsearch.StartFullSearch(); + ColSegment* part; + int page_height = tright().y() - bleft().y(); + ASSERT_HOST(page_height > 0); + // create a bool array to hold projection on y-axis + bool* table_region = new bool[page_height]; + while ((part = gsearch.NextFullSearch()) != nullptr) { + const TBOX& part_box = part->bounding_box(); + // reset the projection array + for (int i = 0; i < page_height; i++) { + table_region[i] = false; + } + // iterate through all table columns to find regions in the current + // page column block + cit.move_to_first(); + for (cit.mark_cycle_pt(); !cit.cycled_list(); cit.forward()) { + TBOX col_box = cit.data()->bounding_box(); + // find intersection region of table column and page column + TBOX intersection_box = col_box.intersection(part_box); + // project table column on the y-axis + for (int i = intersection_box.bottom(); i < intersection_box.top(); i++) { + table_region[i - bleft().y()] = true; + } + } + // set x-limits of table regions to page column width + TBOX current_table_box; + current_table_box.set_left(part_box.left()); + current_table_box.set_right(part_box.right()); + // go through the y-axis projection to find runs of table + // regions. Each run makes one table region. + for (int i = 1; i < page_height; i++) { + // detect start of a table region + if (!table_region[i - 1] && table_region[i]) { + current_table_box.set_bottom(i + bleft().y()); + } + // TODO(nbeato): Is it guaranteed that the last row is not a table region? + // detect end of a table region + if (table_region[i - 1] && !table_region[i]) { + current_table_box.set_top(i + bleft().y()); + if (!current_table_box.null_box()) { + auto* seg = new ColSegment(); + seg->InsertBox(current_table_box); + rit.add_after_then_move(seg); + } + } + } + } + delete[] table_region; +} + +// Merge table regions corresponding to tables spanning multiple columns if +// there is a colpartition (horizontal ruling line or normal text) that +// touches both regions. +// TODO(faisal): A rare error occurs if there are two horizontally adjacent +// tables with aligned ruling lines. In this case, line finder returns a +// single line and hence the tables get merged together +void TableFinder::GridMergeTableRegions() { + // Iterate the table regions in the grid. + GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> + gsearch(&table_grid_); + gsearch.StartFullSearch(); + ColSegment* seg = nullptr; + while ((seg = gsearch.NextFullSearch()) != nullptr) { + bool neighbor_found = false; + bool modified = false; // Modified at least once + do { + // Start a rectangle search x-bounded by the image and y by the table + const TBOX& box = seg->bounding_box(); + TBOX search_region(box); + search_region.set_left(bleft().x()); + search_region.set_right(tright().x()); + neighbor_found = false; + GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> + rectsearch(&table_grid_); + rectsearch.StartRectSearch(search_region); + ColSegment* neighbor = nullptr; + while ((neighbor = rectsearch.NextRectSearch()) != nullptr) { + if (neighbor == seg) + continue; + const TBOX& neighbor_box = neighbor->bounding_box(); + // Check if a neighbor box has a large overlap with the table + // region. This may happen as a result of merging two table + // regions in the previous iteration. + if (neighbor_box.overlap_fraction(box) >= 0.9) { + seg->InsertBox(neighbor_box); + rectsearch.RemoveBBox(); + gsearch.RepositionIterator(); + delete neighbor; + modified = true; + continue; + } + // Check if two table regions belong together based on a common + // horizontal ruling line + if (BelongToOneTable(box, neighbor_box)) { + seg->InsertBox(neighbor_box); + neighbor_found = true; + modified = true; + rectsearch.RemoveBBox(); + gsearch.RepositionIterator(); + delete neighbor; + } + } + } while (neighbor_found); + if (modified) { + // Because the box has changed, it has to be removed first. + gsearch.RemoveBBox(); + table_grid_.InsertBBox(true, true, seg); + gsearch.RepositionIterator(); + } + } +} + +// Decide if two table regions belong to one table based on a common +// horizontal ruling line or another colpartition +bool TableFinder::BelongToOneTable(const TBOX &box1, const TBOX &box2) { + // Check the obvious case. Most likely not true because overlapping boxes + // should already be merged, but seems like a good thing to do in case things + // change. + if (box1.overlap(box2)) + return true; + // Check for ColPartitions spanning both table regions + TBOX bbox = box1.bounding_union(box2); + // Start a rect search on bbox + GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> + rectsearch(&clean_part_grid_); + rectsearch.StartRectSearch(bbox); + ColPartition* part = nullptr; + while ((part = rectsearch.NextRectSearch()) != nullptr) { + const TBOX& part_box = part->bounding_box(); + // return true if a colpartition spanning both table regions is found + if (part_box.overlap(box1) && part_box.overlap(box2) && + !part->IsImageType()) + return true; + } + return false; +} + +// Adjust table boundaries by: +// - building a tight bounding box around all ColPartitions contained in it. +// - expanding table boundaries to include all colpartitions that overlap the +// table by more than half of their area +// - expanding table boundaries to include nearby horizontal rule lines +// - expanding table vertically to include left out column headers +// TODO(faisal): Expansion of table boundaries is quite aggressive. It usually +// makes following errors: +// 1- horizontal lines consisting of underlines are included in the table if +// they are close enough +// 2- horizontal lines originating from noise tend to get merged with a table +// near the top of the page +// 3- the criteria for including horizontal lines is very generous. Many times +// horizontal lines separating headers and footers get merged with a +// single-column table in a multi-column page thereby including text +// from the neighboring column inside the table +// 4- the criteria for including left out column headers also tends to +// occasionally include text-lines above the tables, typically from +// table caption +void TableFinder::AdjustTableBoundaries() { + // Iterate the table regions in the grid + ColSegment_CLIST adjusted_tables; + ColSegment_C_IT it(&adjusted_tables); + ColSegmentGridSearch gsearch(&table_grid_); + gsearch.StartFullSearch(); + ColSegment* table = nullptr; + while ((table = gsearch.NextFullSearch()) != nullptr) { + const TBOX& table_box = table->bounding_box(); + TBOX grown_box = table_box; + GrowTableBox(table_box, &grown_box); + // To prevent a table from expanding again, do not insert the + // modified box back to the grid. Instead move it to a list and + // and remove it from the grid. The list is moved later back to the grid. + if (!grown_box.null_box()) { + auto* col = new ColSegment(); + col->InsertBox(grown_box); + it.add_after_then_move(col); + } + gsearch.RemoveBBox(); + delete table; + } + // clear table grid to move final tables in it + // TODO(nbeato): table_grid_ should already be empty. The above loop + // removed everything. Maybe just assert it is empty? + table_grid_.Clear(); + it.move_to_first(); + // move back final tables to table_grid_ + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + ColSegment* seg = it.extract(); + table_grid_.InsertBBox(true, true, seg); + } +} + +void TableFinder::GrowTableBox(const TBOX& table_box, TBOX* result_box) { + // TODO(nbeato): The growing code is a bit excessive right now. + // By removing these lines, the partitions considered need + // to have some overlap or be special cases. These lines could + // be added again once a check is put in place to make sure that + // growing tables don't stomp on a lot of non-table partitions. + + // search for horizontal ruling lines within the vertical margin + // int vertical_margin = kRulingVerticalMargin * gridsize(); + TBOX search_box = table_box; + // int top = MIN(search_box.top() + vertical_margin, tright().y()); + // int bottom = MAX(search_box.bottom() - vertical_margin, bleft().y()); + // search_box.set_top(top); + // search_box.set_bottom(bottom); + + GrowTableToIncludePartials(table_box, search_box, result_box); + GrowTableToIncludeLines(table_box, search_box, result_box); + IncludeLeftOutColumnHeaders(result_box); +} + +// Grow a table by increasing the size of the box to include +// partitions with significant overlap with the table. +void TableFinder::GrowTableToIncludePartials(const TBOX& table_box, + const TBOX& search_range, + TBOX* result_box) { + // Rulings are in a different grid, so search 2 grids for rulings, text, + // and table partitions that are not entirely within the new box. + for (int i = 0; i < 2; ++i) { + ColPartitionGrid* grid = (i == 0) ? &fragmented_text_grid_ : + &leader_and_ruling_grid_; + ColPartitionGridSearch rectsearch(grid); + rectsearch.StartRectSearch(search_range); + ColPartition* part = nullptr; + while ((part = rectsearch.NextRectSearch()) != nullptr) { + // Only include text and table types. + if (part->IsImageType()) + continue; + const TBOX& part_box = part->bounding_box(); + // Include partition in the table if more than half of it + // is covered by the table + if (part_box.overlap_fraction(table_box) > kMinOverlapWithTable) { + *result_box = result_box->bounding_union(part_box); + continue; + } + } + } +} + +// Grow a table by expanding to the extents of significantly +// overlapping lines. +void TableFinder::GrowTableToIncludeLines(const TBOX& table_box, + const TBOX& search_range, + TBOX* result_box) { + ColPartitionGridSearch rsearch(&leader_and_ruling_grid_); + rsearch.SetUniqueMode(true); + rsearch.StartRectSearch(search_range); + ColPartition* part = nullptr; + while ((part = rsearch.NextRectSearch()) != nullptr) { + // TODO(nbeato) This should also do vertical, but column + // boundaries are breaking things. This function needs to be + // updated to allow vertical lines as well. + if (!part->IsLineType()) + continue; + // Avoid the following function call if the result of the + // function is irrelevant. + const TBOX& part_box = part->bounding_box(); + if (result_box->contains(part_box)) + continue; + // Include a partially overlapping horizontal line only if the + // extra ColPartitions that will be included due to expansion + // have large side spacing w.r.t. columns containing them. + if (HLineBelongsToTable(*part, table_box)) + *result_box = result_box->bounding_union(part_box); + // TODO(nbeato): Vertical + } +} + +// Checks whether the horizontal line belong to the table by looking at the +// side spacing of extra ColParitions that will be included in the table +// due to expansion +bool TableFinder::HLineBelongsToTable(const ColPartition& part, + const TBOX& table_box) { + if (!part.IsHorizontalLine()) + return false; + const TBOX& part_box = part.bounding_box(); + if (!part_box.major_x_overlap(table_box)) + return false; + // Do not consider top-most horizontal line since it usually + // originates from noise. + // TODO(nbeato): I had to comment this out because the ruling grid doesn't + // have neighbors solved. + // if (!part.nearest_neighbor_above()) + // return false; + const TBOX bbox = part_box.bounding_union(table_box); + // In the "unioned table" box (the table extents expanded by the line), + // keep track of how many partitions have significant padding to the left + // and right. If more than half of the partitions covered by the new table + // have significant spacing, the line belongs to the table and the table + // grows to include all of the partitions. + int num_extra_partitions = 0; + int extra_space_to_right = 0; + int extra_space_to_left = 0; + // Rulings are in a different grid, so search 2 grids for rulings, text, + // and table partitions that are introduced by the new box. + for (int i = 0; i < 2; ++i) { + ColPartitionGrid* grid = (i == 0) ? &clean_part_grid_ : + &leader_and_ruling_grid_; + // Start a rect search on bbox + ColPartitionGridSearch rectsearch(grid); + rectsearch.SetUniqueMode(true); + rectsearch.StartRectSearch(bbox); + ColPartition* extra_part = nullptr; + while ((extra_part = rectsearch.NextRectSearch()) != nullptr) { + // ColPartition already in table + const TBOX& extra_part_box = extra_part->bounding_box(); + if (extra_part_box.overlap_fraction(table_box) > kMinOverlapWithTable) + continue; + // Non-text ColPartitions do not contribute + if (extra_part->IsImageType()) + continue; + // Consider this partition. + num_extra_partitions++; + // presence of a table cell is a strong hint, so just increment the scores + // without looking at the spacing. + if (extra_part->type() == PT_TABLE || extra_part->IsLineType()) { + extra_space_to_right++; + extra_space_to_left++; + continue; + } + int space_threshold = kSideSpaceMargin * part.median_height(); + if (extra_part->space_to_right() > space_threshold) + extra_space_to_right++; + if (extra_part->space_to_left() > space_threshold) + extra_space_to_left++; + } + } + // tprintf("%d %d %d\n", + // num_extra_partitions,extra_space_to_right,extra_space_to_left); + return (extra_space_to_right > num_extra_partitions / 2) || + (extra_space_to_left > num_extra_partitions / 2); +} + +// Look for isolated column headers above the given table box and +// include them in the table +void TableFinder::IncludeLeftOutColumnHeaders(TBOX* table_box) { + // Start a search above the current table to look for column headers + ColPartitionGridSearch vsearch(&clean_part_grid_); + vsearch.StartVerticalSearch(table_box->left(), table_box->right(), + table_box->top()); + ColPartition* neighbor = nullptr; + ColPartition* previous_neighbor = nullptr; + while ((neighbor = vsearch.NextVerticalSearch(false)) != nullptr) { + // Max distance to find a table heading. + const int max_distance = kMaxColumnHeaderDistance * + neighbor->median_height(); + int table_top = table_box->top(); + const TBOX& box = neighbor->bounding_box(); + // Do not continue if the next box is way above + if (box.bottom() - table_top > max_distance) + break; + // Unconditionally include partitions of type TABLE or LINE + // TODO(faisal): add some reasonable conditions here + if (neighbor->type() == PT_TABLE || neighbor->IsLineType()) { + table_box->set_top(box.top()); + previous_neighbor = nullptr; + continue; + } + // If there are two text partitions, one above the other, without a table + // cell on their left or right side, consider them a barrier and quit + if (previous_neighbor == nullptr) { + previous_neighbor = neighbor; + } else { + const TBOX& previous_box = previous_neighbor->bounding_box(); + if (!box.major_y_overlap(previous_box)) + break; + } + } +} + +// Remove false alarms consisting of a single column based on their +// projection on the x-axis. Projection of a real table on the x-axis +// should have at least one zero-valley larger than the global median +// x-height of the page. +void TableFinder::DeleteSingleColumnTables() { + int page_width = tright().x() - bleft().x(); + ASSERT_HOST(page_width > 0); + // create an integer array to hold projection on x-axis + int* table_xprojection = new int[page_width]; + // Iterate through all tables in the table grid + GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> + table_search(&table_grid_); + table_search.StartFullSearch(); + ColSegment* table; + while ((table = table_search.NextFullSearch()) != nullptr) { + TBOX table_box = table->bounding_box(); + // reset the projection array + for (int i = 0; i < page_width; i++) { + table_xprojection[i] = 0; + } + // Start a rect search on table_box + GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> + rectsearch(&clean_part_grid_); + rectsearch.SetUniqueMode(true); + rectsearch.StartRectSearch(table_box); + ColPartition* part; + while ((part = rectsearch.NextRectSearch()) != nullptr) { + if (!part->IsTextType()) + continue; // Do not consider non-text partitions + if (part->flow() == BTFT_LEADER) + continue; // Assume leaders are in tables + TBOX part_box = part->bounding_box(); + // Do not consider partitions partially covered by the table + if (part_box.overlap_fraction(table_box) < kMinOverlapWithTable) + continue; + BLOBNBOX_CLIST* part_boxes = part->boxes(); + BLOBNBOX_C_IT pit(part_boxes); + + // Make sure overlapping blobs don't artificially inflate the number + // of rows in the table. This happens frequently with things such as + // decimals and split characters. Do this by assuming the column + // partition is sorted mostly left to right and just clip + // bounding boxes by the previous box's extent. + int next_position_to_write = 0; + + for (pit.mark_cycle_pt(); !pit.cycled_list(); pit.forward()) { + BLOBNBOX *pblob = pit.data(); + // ignore blob height for the purpose of projection since we + // are only interested in finding valleys + int xstart = pblob->bounding_box().left(); + int xend = pblob->bounding_box().right(); + + xstart = std::max(xstart, next_position_to_write); + for (int i = xstart; i < xend; i++) + table_xprojection[i - bleft().x()]++; + next_position_to_write = xend; + } + } + // Find largest valley between two reasonable peaks in the table + if (!GapInXProjection(table_xprojection, page_width)) { + table_search.RemoveBBox(); + delete table; + } + } + delete[] table_xprojection; +} + +// Return true if at least one gap larger than the global x-height +// exists in the horizontal projection +bool TableFinder::GapInXProjection(int* xprojection, int length) { + // Find peak value of the histogram + int peak_value = 0; + for (int i = 0; i < length; i++) { + if (xprojection[i] > peak_value) { + peak_value = xprojection[i]; + } + } + // Peak value represents the maximum number of horizontally + // overlapping colpartitions, so this can be considered as the + // number of rows in the table + if (peak_value < kMinRowsInTable) + return false; + double projection_threshold = kSmallTableProjectionThreshold * peak_value; + if (peak_value >= kLargeTableRowCount) + projection_threshold = kLargeTableProjectionThreshold * peak_value; + // Threshold the histogram + for (int i = 0; i < length; i++) { + xprojection[i] = (xprojection[i] >= projection_threshold) ? 1 : 0; + } + // Find the largest run of zeros between two ones + int largest_gap = 0; + int run_start = -1; + for (int i = 1; i < length; i++) { + // detect start of a run of zeros + if (xprojection[i - 1] && !xprojection[i]) { + run_start = i; + } + // detect end of a run of zeros and update the value of largest gap + if (run_start != -1 && !xprojection[i - 1] && xprojection[i]) { + int gap = i - run_start; + if (gap > largest_gap) + largest_gap = gap; + run_start = -1; + } + } + return largest_gap > kMaxXProjectionGapFactor * global_median_xheight_; +} + +// Given the location of a table "guess", try to overlay a cellular +// grid in the location, adjusting the boundaries. +// TODO(nbeato): Falsely introduces: +// -headers/footers (not any worse, too much overlap destroys cells) +// -page numbers (not worse, included because maximize margins) +// -equations (nicely fit into a celluar grid, but more sparsely) +// -figures (random text box, also sparse) +// -small left-aligned text areas with overlapping positioned whitespace +// (rejected before) +// Overall, this just needs some more work. +void TableFinder::RecognizeTables() { + ScrollView* table_win = nullptr; +#ifndef GRAPHICS_DISABLED + if (textord_show_tables) { + table_win = MakeWindow(0, 0, "Table Structure"); + DisplayColPartitions(table_win, &fragmented_text_grid_, + ScrollView::BLUE, ScrollView::LIGHT_BLUE); + // table_grid_.DisplayBoxes(table_win); + } +#endif + + TableRecognizer recognizer; + recognizer.Init(); + recognizer.set_line_grid(&leader_and_ruling_grid_); + recognizer.set_text_grid(&fragmented_text_grid_); + recognizer.set_max_text_height(global_median_xheight_ * 2.0); + recognizer.set_min_height(1.5 * gridheight()); + // Loop over all of the tables and try to fit them. + // Store the good tables here. + ColSegment_CLIST good_tables; + ColSegment_C_IT good_it(&good_tables); + + ColSegmentGridSearch gsearch(&table_grid_); + gsearch.StartFullSearch(); + ColSegment* found_table = nullptr; + while ((found_table = gsearch.NextFullSearch()) != nullptr) { + gsearch.RemoveBBox(); + + // The goal is to make the tables persistent in a list. + // When that happens, this will move into the search loop. + const TBOX& found_box = found_table->bounding_box(); + StructuredTable* table_structure = recognizer.RecognizeTable(found_box); + + // Process a table. Good tables are inserted into the grid again later on + // We can't change boxes in the grid while it is running a search. + if (table_structure != nullptr) { +#ifndef GRAPHICS_DISABLED + if (textord_show_tables) { + table_structure->Display(table_win, ScrollView::LIME_GREEN); + } +#endif + found_table->set_bounding_box(table_structure->bounding_box()); + delete table_structure; + good_it.add_after_then_move(found_table); + } else { + delete found_table; + } + } + // TODO(nbeato): MERGE!! There is awesome info now available for merging. + + // At this point, the grid is empty. We can safely insert the good tables + // back into grid. + for (good_it.mark_cycle_pt(); !good_it.cycled_list(); good_it.forward()) + table_grid_.InsertBBox(true, true, good_it.extract()); +} + +#ifndef GRAPHICS_DISABLED + +// Displays the column segments in some window. +void TableFinder::DisplayColSegments(ScrollView* win, + ColSegment_LIST *segments, + ScrollView::Color color) { + win->Pen(color); + win->Brush(ScrollView::NONE); + ColSegment_IT it(segments); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + ColSegment* col = it.data(); + const TBOX& box = col->bounding_box(); + int left_x = box.left(); + int right_x = box.right(); + int top_y = box.top(); + int bottom_y = box.bottom(); + win->Rectangle(left_x, bottom_y, right_x, top_y); + } + win->UpdateWindow(); +} + +// Displays the colpartitions using a new coloring on an existing window. +// Note: This method is only for debug purpose during development and +// would not be part of checked in code +void TableFinder::DisplayColPartitions(ScrollView* win, + ColPartitionGrid* grid, + ScrollView::Color default_color, + ScrollView::Color table_color) { + ScrollView::Color color = default_color; + // Iterate the ColPartitions in the grid. + GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> + gsearch(grid); + gsearch.StartFullSearch(); + ColPartition* part = nullptr; + while ((part = gsearch.NextFullSearch()) != nullptr) { + color = default_color; + if (part->type() == PT_TABLE) + color = table_color; + + const TBOX& box = part->bounding_box(); + int left_x = box.left(); + int right_x = box.right(); + int top_y = box.top(); + int bottom_y = box.bottom(); + win->Brush(ScrollView::NONE); + win->Pen(color); + win->Rectangle(left_x, bottom_y, right_x, top_y); + } + win->UpdateWindow(); +} + +void TableFinder::DisplayColPartitions(ScrollView* win, + ColPartitionGrid* grid, + ScrollView::Color default_color) { + DisplayColPartitions(win, grid, default_color, ScrollView::YELLOW); +} + +void TableFinder::DisplayColPartitionConnections( + ScrollView* win, + ColPartitionGrid* grid, + ScrollView::Color color) { + // Iterate the ColPartitions in the grid. + GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> + gsearch(grid); + gsearch.StartFullSearch(); + ColPartition* part = nullptr; + while ((part = gsearch.NextFullSearch()) != nullptr) { + const TBOX& box = part->bounding_box(); + int left_x = box.left(); + int right_x = box.right(); + int top_y = box.top(); + int bottom_y = box.bottom(); + + ColPartition* upper_part = part->nearest_neighbor_above(); + if (upper_part) { + const TBOX& upper_box = upper_part->bounding_box(); + int mid_x = (left_x + right_x) / 2; + int mid_y = (top_y + bottom_y) / 2; + int other_x = (upper_box.left() + upper_box.right()) / 2; + int other_y = (upper_box.top() + upper_box.bottom()) / 2; + win->Brush(ScrollView::NONE); + win->Pen(color); + win->Line(mid_x, mid_y, other_x, other_y); + } + ColPartition* lower_part = part->nearest_neighbor_below(); + if (lower_part) { + const TBOX& lower_box = lower_part->bounding_box(); + int mid_x = (left_x + right_x) / 2; + int mid_y = (top_y + bottom_y) / 2; + int other_x = (lower_box.left() + lower_box.right()) / 2; + int other_y = (lower_box.top() + lower_box.bottom()) / 2; + win->Brush(ScrollView::NONE); + win->Pen(color); + win->Line(mid_x, mid_y, other_x, other_y); + } + } + win->UpdateWindow(); +} + +#endif + +// Merge all colpartitions in table regions to make them a single +// colpartition and revert types of isolated table cells not +// assigned to any table to their original types. +void TableFinder::MakeTableBlocks(ColPartitionGrid* grid, + ColPartitionSet** all_columns, + WidthCallback width_cb) { + // Since we have table blocks already, remove table tags from all + // colpartitions + GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> + gsearch(grid); + gsearch.StartFullSearch(); + ColPartition* part = nullptr; + + while ((part = gsearch.NextFullSearch()) != nullptr) { + if (part->type() == PT_TABLE) { + part->clear_table_type(); + } + } + // Now make a single colpartition out of each table block and remove + // all colpartitions contained within a table + GridSearch<ColSegment, ColSegment_CLIST, ColSegment_C_IT> + table_search(&table_grid_); + table_search.StartFullSearch(); + ColSegment* table; + while ((table = table_search.NextFullSearch()) != nullptr) { + const TBOX& table_box = table->bounding_box(); + // Start a rect search on table_box + GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> + rectsearch(grid); + rectsearch.StartRectSearch(table_box); + ColPartition* part; + ColPartition* table_partition = nullptr; + while ((part = rectsearch.NextRectSearch()) != nullptr) { + // Do not consider image partitions + if (!part->IsTextType()) + continue; + TBOX part_box = part->bounding_box(); + // Include partition in the table if more than half of it + // is covered by the table + if (part_box.overlap_fraction(table_box) > kMinOverlapWithTable) { + rectsearch.RemoveBBox(); + if (table_partition) { + table_partition->Absorb(part, width_cb); + } else { + table_partition = part; + } + } + } + // Insert table colpartition back to part_grid_ + if (table_partition) { + // To match the columns used when transforming to blocks, the new table + // partition must have its first and last column set at the grid y that + // corresponds to its bottom. + const TBOX& table_box = table_partition->bounding_box(); + int grid_x, grid_y; + grid->GridCoords(table_box.left(), table_box.bottom(), &grid_x, &grid_y); + table_partition->SetPartitionType(resolution_, all_columns[grid_y]); + table_partition->set_table_type(); + table_partition->set_blob_type(BRT_TEXT); + table_partition->set_flow(BTFT_CHAIN); + table_partition->SetBlobTypes(); + grid->InsertBBox(true, true, table_partition); + } + } +} + +//////// ColSegment code +//////// +ColSegment::ColSegment() + : ELIST_LINK(), + num_table_cells_(0), + num_text_cells_(0), + type_(COL_UNKNOWN) { +} + +// Provides a color for BBGrid to draw the rectangle. +ScrollView::Color ColSegment::BoxColor() const { + const ScrollView::Color kBoxColors[PT_COUNT] = { + ScrollView::YELLOW, + ScrollView::BLUE, + ScrollView::YELLOW, + ScrollView::MAGENTA, + }; + return kBoxColors[type_]; +} + +// Insert a box into this column segment +void ColSegment::InsertBox(const TBOX& other) { + bounding_box_ = bounding_box_.bounding_union(other); +} + +// Set column segment type based on the ratio of text and table partitions +// in it. +void ColSegment::set_type() { + if (num_table_cells_ > kTableColumnThreshold * num_text_cells_) + type_ = COL_TABLE; + else if (num_text_cells_ > num_table_cells_) + type_ = COL_TEXT; + else + type_ = COL_MIXED; +} + +} // namespace tesseract. diff --git a/tesseract/src/textord/tablefind.h b/tesseract/src/textord/tablefind.h new file mode 100644 index 00000000..dc6ff932 --- /dev/null +++ b/tesseract/src/textord/tablefind.h @@ -0,0 +1,427 @@ +/////////////////////////////////////////////////////////////////////// +// File: tablefind.h +// Description: Helper classes to find tables from ColPartitions. +// Author: Faisal Shafait (faisal.shafait@dfki.de) +// +// (C) Copyright 2009, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_TEXTORD_TABLEFIND_H_ +#define TESSERACT_TEXTORD_TABLEFIND_H_ + +#include "colpartitiongrid.h" +#include "elst.h" +#include "rect.h" + +namespace tesseract { + +// Possible types for a column segment. +enum ColSegType { + COL_UNKNOWN, + COL_TEXT, + COL_TABLE, + COL_MIXED, + COL_COUNT +}; + +class ColPartitionSet; + +// ColSegment holds rectangular blocks that represent segmentation of a page +// into regions containing single column text/table. +class ColSegment; +ELISTIZEH(ColSegment) +CLISTIZEH(ColSegment) + +class ColSegment : public ELIST_LINK { + public: + ColSegment(); + ~ColSegment() = default; + + // Simple accessors and mutators + const TBOX& bounding_box() const { + return bounding_box_; + } + + void set_top(int y) { + bounding_box_.set_top(y); + } + + void set_bottom(int y) { + bounding_box_.set_bottom(y); + } + + void set_left(int x) { + bounding_box_.set_left(x); + } + + void set_right(int x) { + bounding_box_.set_right(x); + } + + void set_bounding_box(const TBOX& other) { + bounding_box_ = other; + } + + int get_num_table_cells() const { + return num_table_cells_; + } + + // set the number of table colpartitions covered by the bounding_box_ + void set_num_table_cells(int n) { + num_table_cells_ = n; + } + + int get_num_text_cells() const { + return num_text_cells_; + } + + // set the number of text colpartitions covered by the bounding_box_ + void set_num_text_cells(int n) { + num_text_cells_ = n; + } + + ColSegType type() const { + return type_; + } + + // set the type of the block based on the ratio of table to text + // colpartitions covered by it. + void set_type(); + + // Provides a color for BBGrid to draw the rectangle. + ScrollView::Color BoxColor() const; + + // Insert a rectangle into bounding_box_ + void InsertBox(const TBOX& other); + + private: + TBOX bounding_box_; // bounding box + int num_table_cells_; + int num_text_cells_; + ColSegType type_; +}; + +// Typedef BBGrid of ColSegments +using ColSegmentGrid = BBGrid<ColSegment, + ColSegment_CLIST, + ColSegment_C_IT>; +using ColSegmentGridSearch = GridSearch<ColSegment, + ColSegment_CLIST, + ColSegment_C_IT>; + +// TableFinder is a utility class to find a set of tables given a set of +// ColPartitions and Columns. The TableFinder will mark candidate ColPartitions +// based on research in "Table Detection in Heterogeneous Documents". +// Usage flow is as follows: +// TableFinder finder; +// finder.InsertCleanPartitions(/* grid info */) +// finder.LocateTables(/* ColPartitions and Columns */); +// finder.Update TODO(nbeato) +class TESS_API TableFinder { + public: + // Constructor is simple initializations + TableFinder(); + ~TableFinder(); + + // Set the resolution of the connected components in ppi. + void set_resolution(int resolution) { + resolution_ = resolution; + } + // Change the reading order. Initially it is left to right. + void set_left_to_right_language(bool order); + + // Initialize + void Init(int grid_size, const ICOORD& bottom_left, const ICOORD& top_right); + + // Copy cleaned partitions from ColumnFinder's part_grid_ to this + // clean_part_grid_ and insert dot-like noise into period_grid_. + // It resizes the grids in this object to the dimensions of grid. + void InsertCleanPartitions(ColPartitionGrid* grid, TO_BLOCK* block); + + // High level function to perform table detection + // Finds tables and updates the grid object with new partitions for the + // tables. The columns and width callbacks are used to merge tables. + // The reskew argument is only used to write the tables to the out.png + // if that feature is enabled. + void LocateTables(ColPartitionGrid* grid, + ColPartitionSet** columns, + WidthCallback width_cb, + const FCOORD& reskew); + + protected: + // Access for the grid dimensions. + // The results will not be correct until InsertCleanPartitions + // has been called. The values are taken from the grid passed as an argument + // to that function. + int gridsize() const; + int gridwidth() const; + int gridheight() const; + const ICOORD& bleft() const; + const ICOORD& tright() const; + + // Makes a window for debugging, see BBGrid + ScrollView* MakeWindow(int x, int y, const char* window_name); + + //////// Functions to insert objects from the grid into the table finder. + //////// In all cases, ownership is transferred to the table finder. + // Inserts text into the table finder. + void InsertTextPartition(ColPartition* part); + void InsertFragmentedTextPartition(ColPartition* part); + void InsertLeaderPartition(ColPartition* part); + void InsertRulingPartition(ColPartition* part); + void InsertImagePartition(ColPartition* part); + void SplitAndInsertFragmentedTextPartition(ColPartition* part); + bool AllowTextPartition(const ColPartition& part) const; + bool AllowBlob(const BLOBNBOX& blob) const; + + //////// Functions that manipulate ColPartitions in the part_grid_ ///// + //////// to find tables. + //////// + + // Utility function to move segments to col_seg_grid + // Note: Move includes ownership, + // so segments will be be owned by col_seg_grid + void MoveColSegmentsToGrid(ColSegment_LIST* segments, + ColSegmentGrid* col_seg_grid); + + //////// Set up code to run during table detection to correctly + //////// initialize variables on column partitions that are used later. + //////// + + // Initialize the grid and partitions + void InitializePartitions(ColPartitionSet** all_columns); + + // Set left, right and top, bottom spacings of each colpartition. + // Left/right spacings are w.r.t the column boundaries + // Top/bottom spacings are w.r.t. previous and next colpartitions + static void SetPartitionSpacings(ColPartitionGrid* grid, + ColPartitionSet** all_columns); + + // Set spacing and closest neighbors above and below a given colpartition. + void SetVerticalSpacing(ColPartition* part); + + // Set global spacing estimates. This function is dependent on the + // partition spacings. So make sure SetPartitionSpacings is called + // on the same grid before this. + void SetGlobalSpacings(ColPartitionGrid* grid); + // Access to the global median xheight. The xheight is the height + // of a lowercase 'x' character on the page. This can be viewed as the + // average height of a lowercase letter in a textline. As a result + // it is used to make assumptions about spacing between words and + // table cells. + void set_global_median_xheight(int xheight); + // Access to the global median blob width. The width is useful + // when deciding if a partition is noise. + void set_global_median_blob_width(int width); + // Access to the global median ledding. The ledding is the distance between + // two adjacent text lines. This value can be used to get a rough estimate + // for the amount of space between two lines of text. As a result, it + // is used to calculate appropriate spacing between adjacent rows of text. + void set_global_median_ledding(int ledding); + + // Updates the nearest neighbors for each ColPartition in clean_part_grid_. + // The neighbors are most likely SingletonPartner calls after the neighbors + // are assigned. This is hear until it is decided to remove the + // nearest_neighbor code in ColPartition + void FindNeighbors(); + + //////// Functions to mark candidate column partitions as tables. + //////// Tables are marked as described in + //////// Table Detection in Heterogeneous Documents (2010, Shafait & Smith) + //////// + + // High level function to mark partitions as table rows/cells. + // When this function is done, the column partitions in clean_part_grid_ + // should mostly be marked as tables. + void MarkTablePartitions(); + // Marks partitions given a local view of a single partition + void MarkPartitionsUsingLocalInformation(); + /////// Heuristics for local marking + // Check if the partition has at least one large gap between words or no + // significant gap at all + // TODO(nbeato): Make const, prevented because blobnbox array access + bool HasWideOrNoInterWordGap(ColPartition* part) const; + // Checks if a partition is adjacent to leaders on the page + bool HasLeaderAdjacent(const ColPartition& part); + // Filter individual text partitions marked as table partitions + // consisting of paragraph endings, small section headings, and + // headers and footers. + void FilterFalseAlarms(); + void FilterParagraphEndings(); + void FilterHeaderAndFooter(); + // Mark all ColPartitions as table cells that have a table cell above + // and below them + void SmoothTablePartitionRuns(); + + //////// Functions to create bounding boxes (ColSegment) objects for + //////// the columns on the page. The columns are not necessarily + //////// vertical lines, meaning if tab stops strongly suggests that + //////// a column changes horizontal position, as in the case below, + //////// The ColSegment objects will respect that after processing. + //////// + //////// _____________ + //////// Ex. | | | + //////// |_____|______| 5 boxes: 2 on this line + //////// | | | | 3 on this line + //////// |___|____|___| + //////// + + // Get Column segments from best_columns_ + void GetColumnBlocks(ColPartitionSet** columns, + ColSegment_LIST *col_segments); + + // Group Column segments into consecutive single column regions. + void GroupColumnBlocks(ColSegment_LIST *current_segments, + ColSegment_LIST *col_segments); + + // Check if two boxes are consecutive within the same column + bool ConsecutiveBoxes(const TBOX &b1, const TBOX &b2); + + // Set the ratio of candidate table partitions in each column + void SetColumnsType(ColSegment_LIST* col_segments); + + // Merge Column Blocks that were split due to the presence of a table + void GridMergeColumnBlocks(); + + //////// Functions to turn marked ColPartitions into candidate tables + //////// using a modified T-Recs++ algorithm described in + //////// Applying The T-Recs Table Recognition System + //////// To The Business Letter Domain (2001, Kieninger & Dengel) + //////// + + // Merge partititons cells into table columns + // Differs from paper by just looking at marked table partitions + // instead of similarity metric. + // Modified section 4.1 of paper. + void GetTableColumns(ColSegment_LIST *table_columns); + + // Finds regions within a column that potentially contain a table. + // Ie, the table columns from GetTableColumns are turned into boxes + // that span the entire page column (using ColumnBlocks found in + // earlier functions) in the x direction and the min/max extent of + // overlapping table columns in the y direction. + // Section 4.2 of paper. + void GetTableRegions(ColSegment_LIST *table_columns, + ColSegment_LIST *table_regions); + + + //////// Functions to "patch up" found tables + //////// + + // Merge table regions corresponding to tables spanning multiple columns + void GridMergeTableRegions(); + bool BelongToOneTable(const TBOX &box1, const TBOX &box2); + + // Adjust table boundaries by building a tight bounding box around all + // ColPartitions contained in it. + void AdjustTableBoundaries(); + + // Grows a table to include partitions that are partially covered + // by the table. This includes lines and text. It does not include + // noise or images. + // On entry, result_box is the minimum size of the result. The results of the + // function will union the actual result with result_box. + void GrowTableBox(const TBOX& table_box, TBOX* result_box); + // Grow a table by increasing the size of the box to include + // partitions with significant overlap with the table. + void GrowTableToIncludePartials(const TBOX& table_box, + const TBOX& search_range, + TBOX* result_box); + // Grow a table by expanding to the extents of significantly + // overlapping lines. + void GrowTableToIncludeLines(const TBOX& table_box, const TBOX& search_range, + TBOX* result_box); + // Checks whether the horizontal line belong to the table by looking at the + // side spacing of extra ColParitions that will be included in the table + // due to expansion + bool HLineBelongsToTable(const ColPartition& part, const TBOX& table_box); + + // Look for isolated column headers above the given table box and + // include them in the table + void IncludeLeftOutColumnHeaders(TBOX* table_box); + + // Remove false alarms consisting of a single column + void DeleteSingleColumnTables(); + + // Return true if at least one gap larger than the global x-height + // exists in the horizontal projection + bool GapInXProjection(int* xprojection, int length); + + //////// Recognize the tables. + //////// + // This function will run the table recognizer and try to find better + // bounding boxes. The structures of the tables never leave this function + // right now. It just tries to prune and merge tables based on info it + // has available. + void RecognizeTables(); + + //////// Debugging functions. Render different structures to GUI + //////// for visual debugging / intuition. + //////// + + // Displays Colpartitions marked as table row. Overlays them on top of + // part_grid_. + void DisplayColSegments(ScrollView* win, ColSegment_LIST *cols, + ScrollView::Color color); + + // Displays the colpartitions using a new coloring on an existing window. + // Note: This method is only for debug purpose during development and + // would not be part of checked in code + void DisplayColPartitions(ScrollView* win, ColPartitionGrid* grid, + ScrollView::Color text_color, + ScrollView::Color table_color); + void DisplayColPartitions(ScrollView* win, ColPartitionGrid* grid, + ScrollView::Color default_color); + void DisplayColPartitionConnections(ScrollView* win, + ColPartitionGrid* grid, + ScrollView::Color default_color); + + // Merge all colpartitions in table regions to make them a single + // colpartition and revert types of isolated table cells not + // assigned to any table to their original types. + void MakeTableBlocks(ColPartitionGrid* grid, + ColPartitionSet** columns, + WidthCallback width_cb); + + ///////////////////////////////////////////////// + // Useful objects used during table find process. + ///////////////////////////////////////////////// + // Resolution of the connected components in ppi. + int resolution_; + // Estimate of median x-height over the page + int global_median_xheight_; + // Estimate of the median blob width on the page + int global_median_blob_width_; + // Estimate of median leading on the page + int global_median_ledding_; + // Grid to hold cleaned colpartitions after removing all + // colpartitions that consist of only noise blobs, and removing + // noise blobs from remaining colpartitions. + ColPartitionGrid clean_part_grid_; + // Grid contains the leaders and ruling lines. + ColPartitionGrid leader_and_ruling_grid_; + // Grid contains the broken down column partitions. It can be thought + // of as a "word" grid. However, it usually doesn't break apart text lines. + // It does break apart table data (most of the time). + ColPartitionGrid fragmented_text_grid_; + // Grid of page column blocks + ColSegmentGrid col_seg_grid_; + // Grid of detected tables + ColSegmentGrid table_grid_; + // The reading order of text. Defaults to true, for languages such as English. + bool left_to_right_language_; +}; + +} // namespace tesseract. + +#endif // TESSERACT_TEXTORD_TABLEFIND_H_ diff --git a/tesseract/src/textord/tablerecog.cpp b/tesseract/src/textord/tablerecog.cpp new file mode 100644 index 00000000..af565891 --- /dev/null +++ b/tesseract/src/textord/tablerecog.cpp @@ -0,0 +1,1067 @@ +/////////////////////////////////////////////////////////////////////// +// File: tablerecog.cpp +// Description: Helper class to help structure table areas. Given an bounding +// box from TableFinder, the TableRecognizer should give a +// StructuredTable (maybe a list in the future) of "good" tables +// in that area. +// Author: Nicholas Beato +// Created: Friday, Aug. 20, 2010 +// +// (C) Copyright 2009, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#include "tablerecog.h" + +#include <algorithm> + +namespace tesseract { + +// The amount of space required between the ColPartitions in 2 columns +// of a non-lined table as a multiple of the median width. +const double kHorizontalSpacing = 0.30; +// The amount of space required between the ColPartitions in 2 rows +// of a non-lined table as multiples of the median height. +const double kVerticalSpacing = -0.2; +// The number of cells that the grid lines may intersect. +// See FindCellSplitLocations for explanation. +const int kCellSplitRowThreshold = 0; +const int kCellSplitColumnThreshold = 0; +// For "lined tables", the number of required lines. Currently a guess. +const int kLinedTableMinVerticalLines = 3; +const int kLinedTableMinHorizontalLines = 3; +// Number of columns required, as a fraction of the most columns found. +// None of these are tweaked at all. +const double kRequiredColumns = 0.7; +// The tolerance for comparing margins of potential tables. +const double kMarginFactor = 1.1; +// The first and last row should be consistent cell height. +// This factor is the first and last row cell height max. +const double kMaxRowSize = 2.5; +// Number of filled columns required to form a strong table row. +// For small tables, this is an absolute number. +const double kGoodRowNumberOfColumnsSmall[] = { 2, 2, 2, 2, 2, 3, 3 }; +const int kGoodRowNumberOfColumnsSmallSize = + sizeof(kGoodRowNumberOfColumnsSmall) / sizeof(double) - 1; +// For large tables, it is a relative number +const double kGoodRowNumberOfColumnsLarge = 0.7; +// The amount of area that must be covered in a cell by ColPartitions to +// be considered "filled" +const double kMinFilledArea = 0.35; + +//////// +//////// StructuredTable Class +//////// + +StructuredTable::StructuredTable() + : text_grid_(nullptr), + line_grid_(nullptr), + is_lined_(false), + space_above_(0), + space_below_(0), + space_left_(0), + space_right_(0), + median_cell_height_(0), + median_cell_width_(0), + max_text_height_(INT32_MAX) { +} + +void StructuredTable::Init() { +} + +void StructuredTable::set_text_grid(ColPartitionGrid* text_grid) { + text_grid_ = text_grid; +} +void StructuredTable::set_line_grid(ColPartitionGrid* line_grid) { + line_grid_ = line_grid; +} +void StructuredTable::set_max_text_height(int height) { + max_text_height_ = height; +} +bool StructuredTable::is_lined() const { + return is_lined_; +} +int StructuredTable::row_count() const { + return cell_y_.size() == 0 ? 0 : cell_y_.size() - 1; +} +int StructuredTable::column_count() const { + return cell_x_.size() == 0 ? 0 : cell_x_.size() - 1; +} +int StructuredTable::cell_count() const { + return row_count() * column_count(); +} +void StructuredTable::set_bounding_box(const TBOX& box) { + bounding_box_ = box; +} +const TBOX& StructuredTable::bounding_box() const { + return bounding_box_; +} +int StructuredTable::median_cell_height() { + return median_cell_height_; +} +int StructuredTable::median_cell_width() { + return median_cell_width_; +} +int StructuredTable::row_height(int row) const { + ASSERT_HOST(0 <= row && row < row_count()); + return cell_y_[row + 1] - cell_y_[row]; +} +int StructuredTable::column_width(int column) const { + ASSERT_HOST(0 <= column && column < column_count()); + return cell_x_[column + 1] - cell_x_[column]; +} +int StructuredTable::space_above() const { + return space_above_; +} +int StructuredTable::space_below() const { + return space_below_; +} + +// At this point, we know that the lines are contained +// by the box (by FindLinesBoundingBox). +// So try to find the cell structure and make sure it works out. +// The assumption is that all lines span the table. If this +// assumption fails, the VerifyLinedTable method will +// abort the lined table. The TableRecognizer will fall +// back on FindWhitespacedStructure. +bool StructuredTable::FindLinedStructure() { + ClearStructure(); + + // Search for all of the lines in the current box. + // Update the cellular structure with the exact lines. + ColPartitionGridSearch box_search(line_grid_); + box_search.SetUniqueMode(true); + box_search.StartRectSearch(bounding_box_); + ColPartition* line = nullptr; + + while ((line = box_search.NextRectSearch()) != nullptr) { + if (line->IsHorizontalLine()) + cell_y_.push_back(line->MidY()); + if (line->IsVerticalLine()) + cell_x_.push_back(line->MidX()); + } + + // HasSignificantLines should guarantee cells. + // Because that code is a different class, just gracefully + // return false. This could be an assert. + if (cell_x_.size() < 3 || cell_y_.size() < 3) + return false; + + cell_x_.sort(); + cell_y_.sort(); + + // Remove duplicates that may have occurred due to split lines. + cell_x_.compact_sorted(); + cell_y_.compact_sorted(); + + // The border should be the extents of line boxes, not middle. + cell_x_[0] = bounding_box_.left(); + cell_x_[cell_x_.size() - 1] = bounding_box_.right(); + cell_y_[0] = bounding_box_.bottom(); + cell_y_[cell_y_.size() - 1] = bounding_box_.top(); + + // Remove duplicates that may have occurred due to moving the borders. + cell_x_.compact_sorted(); + cell_y_.compact_sorted(); + + CalculateMargins(); + CalculateStats(); + is_lined_ = VerifyLinedTableCells(); + return is_lined_; +} + +// Finds the cellular structure given a particular box. +bool StructuredTable::FindWhitespacedStructure() { + ClearStructure(); + FindWhitespacedColumns(); + FindWhitespacedRows(); + + if (!VerifyWhitespacedTable()) { + return false; + } else { + bounding_box_.set_left(cell_x_[0]); + bounding_box_.set_right(cell_x_[cell_x_.size() - 1]); + bounding_box_.set_bottom(cell_y_[0]); + bounding_box_.set_top(cell_y_[cell_y_.size() - 1]); + AbsorbNearbyLines(); + CalculateMargins(); + CalculateStats(); + return true; + } +} + +// Tests if a partition fits inside the table structure. +// Partitions must fully span a grid line in order to intersect it. +// This means that a partition does not intersect a line +// that it "just" touches. This is mainly because the assumption +// throughout the code is that "0" distance is a very very small space. +bool StructuredTable::DoesPartitionFit(const ColPartition& part) const { + const TBOX& box = part.bounding_box(); + for (int i = 0; i < cell_x_.size(); ++i) + if (box.left() < cell_x_[i] && cell_x_[i] < box.right()) + return false; + for (int i = 0; i < cell_y_.size(); ++i) + if (box.bottom() < cell_y_[i] && cell_y_[i] < box.top()) + return false; + return true; +} + +// Checks if a sub-table has multiple data cells filled. +int StructuredTable::CountFilledCells() { + return CountFilledCells(0, row_count() - 1, 0, column_count() - 1); +} +int StructuredTable::CountFilledCellsInRow(int row) { + return CountFilledCells(row, row, 0, column_count() - 1); +} +int StructuredTable::CountFilledCellsInColumn(int column) { + return CountFilledCells(0, row_count() - 1, column, column); +} +int StructuredTable::CountFilledCells(int row_start, int row_end, + int column_start, int column_end) { + ASSERT_HOST(0 <= row_start && row_start <= row_end && row_end < row_count()); + ASSERT_HOST(0 <= column_start && column_start <= column_end && + column_end < column_count()); + int cell_count = 0; + TBOX cell_box; + for (int row = row_start; row <= row_end; ++row) { + cell_box.set_bottom(cell_y_[row]); + cell_box.set_top(cell_y_[row + 1]); + for (int col = column_start; col <= column_end; ++col) { + cell_box.set_left(cell_x_[col]); + cell_box.set_right(cell_x_[col + 1]); + if (CountPartitions(cell_box) > 0) + ++cell_count; + } + } + return cell_count; +} + +// Makes sure that at least one cell in a row has substantial area filled. +// This can filter out large whitespace caused by growing tables too far +// and page numbers. +bool StructuredTable::VerifyRowFilled(int row) { + for (int i = 0; i < column_count(); ++i) { + double area_filled = CalculateCellFilledPercentage(row, i); + if (area_filled >= kMinFilledArea) + return true; + } + return false; +} + +// Finds the filled area in a cell. +// Assume ColPartitions do not overlap for simplicity (even though they do). +double StructuredTable::CalculateCellFilledPercentage(int row, int column) { + ASSERT_HOST(0 <= row && row <= row_count()); + ASSERT_HOST(0 <= column && column <= column_count()); + const TBOX kCellBox(cell_x_[column], cell_y_[row], + cell_x_[column + 1], cell_y_[row + 1]); + ASSERT_HOST(!kCellBox.null_box()); + + ColPartitionGridSearch gsearch(text_grid_); + gsearch.SetUniqueMode(true); + gsearch.StartRectSearch(kCellBox); + double area_covered = 0; + ColPartition* text = nullptr; + while ((text = gsearch.NextRectSearch()) != nullptr) { + if (text->IsTextType()) + area_covered += text->bounding_box().intersection(kCellBox).area(); + } + const int32_t current_area = kCellBox.area(); + if (current_area == 0) { + return 1.0; + } + return std::min(1.0, area_covered / current_area); +} + +#ifndef GRAPHICS_DISABLED + +void StructuredTable::Display(ScrollView* window, ScrollView::Color color) { + window->Brush(ScrollView::NONE); + window->Pen(color); + window->Rectangle(bounding_box_.left(), bounding_box_.bottom(), + bounding_box_.right(), bounding_box_.top()); + for (int i = 0; i < cell_x_.size(); i++) { + window->Line(cell_x_[i], bounding_box_.bottom(), + cell_x_[i], bounding_box_.top()); + } + for (int i = 0; i < cell_y_.size(); i++) { + window->Line(bounding_box_.left(), cell_y_[i], + bounding_box_.right(), cell_y_[i]); + } + window->UpdateWindow(); +} + +#endif + +// Clear structure information. +void StructuredTable::ClearStructure() { + cell_x_.clear(); + cell_y_.clear(); + is_lined_ = false; + space_above_ = 0; + space_below_ = 0; + space_left_ = 0; + space_right_ = 0; + median_cell_height_ = 0; + median_cell_width_ = 0; +} + +// When a table has lines, the lines should not intersect any partitions. +// The following function makes sure the previous assumption is met. +bool StructuredTable::VerifyLinedTableCells() { + // Function only called when lines exist. + ASSERT_HOST(cell_y_.size() >= 2 && cell_x_.size() >= 2); + for (int i = 0; i < cell_y_.size(); ++i) { + if (CountHorizontalIntersections(cell_y_[i]) > 0) + return false; + } + for (int i = 0; i < cell_x_.size(); ++i) { + if (CountVerticalIntersections(cell_x_[i]) > 0) + return false; + } + return true; +} + +// TODO(nbeato): Could be much better than this. +// Examples: +// - Caclulate the percentage of filled cells. +// - Calculate the average number of ColPartitions per cell. +// - Calculate the number of cells per row with partitions. +// - Check if ColPartitions in adjacent cells are similar. +// - Check that all columns are at least a certain width. +// - etc. +bool StructuredTable::VerifyWhitespacedTable() { + // criteria for a table, must be at least 2x3 or 3x2 + return row_count() >= 2 && column_count() >= 2 && cell_count() >= 6; +} + +// Finds vertical splits in the ColPartitions of text_grid_ by considering +// all possible "good" guesses. A good guess is just the left/right sides of +// the partitions, since these locations will uniquely define where the +// extremal values where the splits can occur. The split happens +// in the middle of the two nearest partitions. +void StructuredTable::FindWhitespacedColumns() { + // Set of the extents of all partitions on the page. + GenericVector<int> left_sides; + GenericVector<int> right_sides; + + // Look at each text partition. We want to find the partitions + // that have extremal left/right sides. These will give us a basis + // for the table columns. + ColPartitionGridSearch gsearch(text_grid_); + gsearch.SetUniqueMode(true); + gsearch.StartRectSearch(bounding_box_); + ColPartition* text = nullptr; + while ((text = gsearch.NextRectSearch()) != nullptr) { + if (!text->IsTextType()) + continue; + + ASSERT_HOST(text->bounding_box().left() < text->bounding_box().right()); + int spacing = static_cast<int>(text->median_width() * + kHorizontalSpacing / 2.0 + 0.5); + left_sides.push_back(text->bounding_box().left() - spacing); + right_sides.push_back(text->bounding_box().right() + spacing); + } + // It causes disaster below, so avoid it! + if (left_sides.size() == 0 || right_sides.size() == 0) + return; + + // Since data may be inserted in grid order, we sort the left/right sides. + left_sides.sort(); + right_sides.sort(); + + // At this point, in the "merged list", we expect to have a left side, + // followed by either more left sides or a right side. The last number + // should be a right side. We find places where the splits occur by looking + // for "valleys". If we want to force gap sizes or allow overlap, change + // the spacing above. If you want to let lines "slice" partitions as long + // as it is infrequent, change the following function. + FindCellSplitLocations(left_sides, right_sides, kCellSplitColumnThreshold, + &cell_x_); +} + +// Finds horizontal splits in the ColPartitions of text_grid_ by considering +// all possible "good" guesses. A good guess is just the bottom/top sides of +// the partitions, since these locations will uniquely define where the +// extremal values where the splits can occur. The split happens +// in the middle of the two nearest partitions. +void StructuredTable::FindWhitespacedRows() { + // Set of the extents of all partitions on the page. + GenericVector<int> bottom_sides; + GenericVector<int> top_sides; + // We will be "shrinking" partitions, so keep the min/max around to + // make sure the bottom/top lines do not intersect text. + int min_bottom = INT32_MAX; + int max_top = INT32_MIN; + + // Look at each text partition. We want to find the partitions + // that have extremal bottom/top sides. These will give us a basis + // for the table rows. Because the textlines can be skewed and close due + // to warping, the height of the partitions is toned down a little bit. + ColPartitionGridSearch gsearch(text_grid_); + gsearch.SetUniqueMode(true); + gsearch.StartRectSearch(bounding_box_); + ColPartition* text = nullptr; + while ((text = gsearch.NextRectSearch()) != nullptr) { + if (!text->IsTextType()) + continue; + + ASSERT_HOST(text->bounding_box().bottom() < text->bounding_box().top()); + min_bottom = std::min(min_bottom, static_cast<int>(text->bounding_box().bottom())); + max_top = std::max(max_top, static_cast<int>(text->bounding_box().top())); + + // Ignore "tall" text partitions, as these are usually false positive + // vertical text or multiple lines pulled together. + if (text->bounding_box().height() > max_text_height_) + continue; + + int spacing = static_cast<int>(text->bounding_box().height() * + kVerticalSpacing / 2.0 + 0.5); + int bottom = text->bounding_box().bottom() - spacing; + int top = text->bounding_box().top() + spacing; + // For horizontal text, the factor can be negative. This should + // probably cause a warning or failure. I haven't actually checked if + // it happens. + if (bottom >= top) + continue; + + bottom_sides.push_back(bottom); + top_sides.push_back(top); + } + // It causes disaster below, so avoid it! + if (bottom_sides.size() == 0 || top_sides.size() == 0) + return; + + // Since data may be inserted in grid order, we sort the bottom/top sides. + bottom_sides.sort(); + top_sides.sort(); + + // At this point, in the "merged list", we expect to have a bottom side, + // followed by either more bottom sides or a top side. The last number + // should be a top side. We find places where the splits occur by looking + // for "valleys". If we want to force gap sizes or allow overlap, change + // the spacing above. If you want to let lines "slice" partitions as long + // as it is infrequent, change the following function. + FindCellSplitLocations(bottom_sides, top_sides, kCellSplitRowThreshold, + &cell_y_); + + // Recover the min/max correctly since it was shifted. + cell_y_[0] = min_bottom; + cell_y_[cell_y_.size() - 1] = max_top; +} + +void StructuredTable::CalculateMargins() { + space_above_ = INT32_MAX; + space_below_ = INT32_MAX; + space_right_ = INT32_MAX; + space_left_ = INT32_MAX; + UpdateMargins(text_grid_); + UpdateMargins(line_grid_); +} +// Finds the nearest partition in grid to the table +// boundaries and updates the margin. +void StructuredTable::UpdateMargins(ColPartitionGrid* grid) { + int below = FindVerticalMargin(grid, bounding_box_.bottom(), true); + space_below_ = std::min(space_below_, below); + int above = FindVerticalMargin(grid, bounding_box_.top(), false); + space_above_ = std::min(space_above_, above); + int left = FindHorizontalMargin(grid, bounding_box_.left(), true); + space_left_ = std::min(space_left_, left); + int right = FindHorizontalMargin(grid, bounding_box_.right(), false); + space_right_ = std::min(space_right_, right); +} +int StructuredTable::FindVerticalMargin(ColPartitionGrid* grid, int border, + bool decrease) const { + ColPartitionGridSearch gsearch(grid); + gsearch.SetUniqueMode(true); + gsearch.StartVerticalSearch(bounding_box_.left(), bounding_box_.right(), + border); + ColPartition* part = nullptr; + while ((part = gsearch.NextVerticalSearch(decrease)) != nullptr) { + if (!part->IsTextType() && !part->IsHorizontalLine()) + continue; + int distance = decrease ? border - part->bounding_box().top() + : part->bounding_box().bottom() - border; + if (distance >= 0) + return distance; + } + return INT32_MAX; +} +int StructuredTable::FindHorizontalMargin(ColPartitionGrid* grid, int border, + bool decrease) const { + ColPartitionGridSearch gsearch(grid); + gsearch.SetUniqueMode(true); + gsearch.StartSideSearch(border, bounding_box_.bottom(), bounding_box_.top()); + ColPartition* part = nullptr; + while ((part = gsearch.NextSideSearch(decrease)) != nullptr) { + if (!part->IsTextType() && !part->IsVerticalLine()) + continue; + int distance = decrease ? border - part->bounding_box().right() + : part->bounding_box().left() - border; + if (distance >= 0) + return distance; + } + return INT32_MAX; +} + +void StructuredTable::CalculateStats() { + const int kMaxCellHeight = 1000; + const int kMaxCellWidth = 1000; + STATS height_stats(0, kMaxCellHeight + 1); + STATS width_stats(0, kMaxCellWidth + 1); + + for (int i = 0; i < row_count(); ++i) + height_stats.add(row_height(i), column_count()); + for (int i = 0; i < column_count(); ++i) + width_stats.add(column_width(i), row_count()); + + median_cell_height_ = static_cast<int>(height_stats.median() + 0.5); + median_cell_width_ = static_cast<int>(width_stats.median() + 0.5); +} + +// Looks for grid lines near the current bounding box and +// grows the bounding box to include them if no intersections +// will occur as a result. This is necessary because the margins +// are calculated relative to the closest line/text. If the +// line isn't absorbed, the margin will be the distance to the line. +void StructuredTable::AbsorbNearbyLines() { + ColPartitionGridSearch gsearch(line_grid_); + gsearch.SetUniqueMode(true); + + // Is the closest line above good? Loop multiple times for tables with + // multi-line (sometimes 2) borders. Limit the number of lines by + // making sure they stay within a table cell or so. + ColPartition* line = nullptr; + gsearch.StartVerticalSearch(bounding_box_.left(), bounding_box_.right(), + bounding_box_.top()); + while ((line = gsearch.NextVerticalSearch(false)) != nullptr) { + if (!line->IsHorizontalLine()) + break; + TBOX text_search(bounding_box_.left(), bounding_box_.top() + 1, + bounding_box_.right(), line->MidY()); + if (text_search.height() > median_cell_height_ * 2) + break; + if (CountPartitions(text_search) > 0) + break; + bounding_box_.set_top(line->MidY()); + } + // As above, is the closest line below good? + line = nullptr; + gsearch.StartVerticalSearch(bounding_box_.left(), bounding_box_.right(), + bounding_box_.bottom()); + while ((line = gsearch.NextVerticalSearch(true)) != nullptr) { + if (!line->IsHorizontalLine()) + break; + TBOX text_search(bounding_box_.left(), line->MidY(), + bounding_box_.right(), bounding_box_.bottom() - 1); + if (text_search.height() > median_cell_height_ * 2) + break; + if (CountPartitions(text_search) > 0) + break; + bounding_box_.set_bottom(line->MidY()); + } + // TODO(nbeato): vertical lines +} + + +// This function will find all "0 valleys" (of any length) given two +// arrays. The arrays are the mins and maxes of partitions (either +// left and right or bottom and top). Since the min/max lists are generated +// with pairs of increasing integers, we can make some assumptions in +// the function about ordering of the overall list, which are shown in the +// asserts. +// The algorithm works as follows: +// While there are numbers to process, take the smallest number. +// If it is from the min_list, increment the "hill" counter. +// Otherwise, decrement the "hill" counter. +// In the process of doing this, keep track of "crossing" the +// desired height. +// The first/last items are extremal values of the list and known. +// NOTE: This function assumes the lists are sorted! +void StructuredTable::FindCellSplitLocations(const GenericVector<int>& min_list, + const GenericVector<int>& max_list, + int max_merged, + GenericVector<int>* locations) { + locations->clear(); + ASSERT_HOST(min_list.size() == max_list.size()); + if (min_list.size() == 0) + return; + ASSERT_HOST(min_list.get(0) < max_list.get(0)); + ASSERT_HOST(min_list.get(min_list.size() - 1) < + max_list.get(max_list.size() - 1)); + + locations->push_back(min_list.get(0)); + int min_index = 0; + int max_index = 0; + int stacked_partitions = 0; + int last_cross_position = INT32_MAX; + // max_index will expire after min_index. + // However, we can't "increase" the hill size if min_index expired. + // So finish processing when min_index expires. + while (min_index < min_list.size()) { + // Increase the hill count. + if (min_list[min_index] < max_list[max_index]) { + ++stacked_partitions; + if (last_cross_position != INT32_MAX && + stacked_partitions > max_merged) { + int mid = (last_cross_position + min_list[min_index]) / 2; + locations->push_back(mid); + last_cross_position = INT32_MAX; + } + ++min_index; + } else { + // Decrease the hill count. + --stacked_partitions; + if (last_cross_position == INT32_MAX && + stacked_partitions <= max_merged) { + last_cross_position = max_list[max_index]; + } + ++max_index; + } + } + locations->push_back(max_list.get(max_list.size() - 1)); +} + +// Counts the number of partitions in the table +// box that intersection the given x value. +int StructuredTable::CountVerticalIntersections(int x) { + int count = 0; + // Make a small box to keep the search time down. + const int kGridSize = text_grid_->gridsize(); + TBOX vertical_box = bounding_box_; + vertical_box.set_left(x - kGridSize); + vertical_box.set_right(x + kGridSize); + + ColPartitionGridSearch gsearch(text_grid_); + gsearch.SetUniqueMode(true); + gsearch.StartRectSearch(vertical_box); + ColPartition* text = nullptr; + while ((text = gsearch.NextRectSearch()) != nullptr) { + if (!text->IsTextType()) + continue; + const TBOX& box = text->bounding_box(); + if (box.left() < x && x < box.right()) + ++count; + } + return count; +} + +// Counts the number of partitions in the table +// box that intersection the given y value. +int StructuredTable::CountHorizontalIntersections(int y) { + int count = 0; + // Make a small box to keep the search time down. + const int kGridSize = text_grid_->gridsize(); + TBOX horizontal_box = bounding_box_; + horizontal_box.set_bottom(y - kGridSize); + horizontal_box.set_top(y + kGridSize); + + ColPartitionGridSearch gsearch(text_grid_); + gsearch.SetUniqueMode(true); + gsearch.StartRectSearch(horizontal_box); + ColPartition* text = nullptr; + while ((text = gsearch.NextRectSearch()) != nullptr) { + if (!text->IsTextType()) + continue; + + const TBOX& box = text->bounding_box(); + if (box.bottom() < y && y < box.top()) + ++count; + } + return count; +} + +// Counts how many text partitions are in this box. +// This is used to count partitons in cells, as that can indicate +// how "strong" a potential table row/column (or even full table) actually is. +int StructuredTable::CountPartitions(const TBOX& box) { + ColPartitionGridSearch gsearch(text_grid_); + gsearch.SetUniqueMode(true); + gsearch.StartRectSearch(box); + int count = 0; + ColPartition* text = nullptr; + while ((text = gsearch.NextRectSearch()) != nullptr) { + if (text->IsTextType()) + ++count; + } + return count; +} + +//////// +//////// TableRecognizer Class +//////// + +TableRecognizer::TableRecognizer() + : text_grid_(nullptr), + line_grid_(nullptr), + min_height_(0), + min_width_(0), + max_text_height_(INT32_MAX) { +} + +TableRecognizer::~TableRecognizer() { +} + +void TableRecognizer::Init() { +} + +void TableRecognizer::set_text_grid(ColPartitionGrid* text_grid) { + text_grid_ = text_grid; +} +void TableRecognizer::set_line_grid(ColPartitionGrid* line_grid) { + line_grid_ = line_grid; +} +void TableRecognizer::set_min_height(int height) { + min_height_ = height; +} +void TableRecognizer::set_min_width(int width) { + min_width_ = width; +} +void TableRecognizer::set_max_text_height(int height) { + max_text_height_ = height; +} + +StructuredTable* TableRecognizer::RecognizeTable(const TBOX& guess) { + auto* table = new StructuredTable(); + table->Init(); + table->set_text_grid(text_grid_); + table->set_line_grid(line_grid_); + table->set_max_text_height(max_text_height_); + + // Try to solve this simple case, a table with *both* + // vertical and horizontal lines. + if (RecognizeLinedTable(guess, table)) + return table; + + // Fallback to whitespace if that failed. + // TODO(nbeato): Break this apart to take advantage of horizontal + // lines or vertical lines when present. + if (RecognizeWhitespacedTable(guess, table)) + return table; + + // No table found... + delete table; + return nullptr; +} + +bool TableRecognizer::RecognizeLinedTable(const TBOX& guess_box, + StructuredTable* table) { + if (!HasSignificantLines(guess_box)) + return false; + TBOX line_bound = guess_box; + if (!FindLinesBoundingBox(&line_bound)) + return false; + table->set_bounding_box(line_bound); + return table->FindLinedStructure(); +} + +// Quick implementation. Just count the number of lines in the box. +// A better implementation would counter intersections and look for connected +// components. It could even go as far as finding similar length lines. +// To account for these possible issues, the VerifyLinedTableCells function +// will reject lined tables that cause intersections with text on the page. +// TODO(nbeato): look for "better" lines +bool TableRecognizer::HasSignificantLines(const TBOX& guess) { + ColPartitionGridSearch box_search(line_grid_); + box_search.SetUniqueMode(true); + box_search.StartRectSearch(guess); + ColPartition* line = nullptr; + int vertical_count = 0; + int horizontal_count = 0; + + while ((line = box_search.NextRectSearch()) != nullptr) { + if (line->IsHorizontalLine()) + ++horizontal_count; + if (line->IsVerticalLine()) + ++vertical_count; + } + + return vertical_count >= kLinedTableMinVerticalLines && + horizontal_count >= kLinedTableMinHorizontalLines; +} + +// Given a bounding box with a bunch of horizontal / vertical lines, +// we just find the extents of all of these lines iteratively. +// The box will be at least as large as guess. This +// could possibly be a bad assumption. +// It is guaranteed to halt in at least O(n * gridarea) where n +// is the number of lines. +// The assumption is that growing the box iteratively will add lines +// several times, but eventually we'll find the extents. +// +// For tables, the approach is a bit aggressive, a single line (which could be +// noise or a column ruling) can destroy the table inside. +// +// TODO(nbeato): This is a quick first implementation. +// A better implementation would actually look for consistency +// in extents of the lines and find the extents using lines +// that clearly describe the table. This would allow the +// lines to "vote" for height/width. An approach like +// this would solve issues with page layout rulings. +// I haven't looked for these issues yet, so I can't even +// say they happen confidently. +bool TableRecognizer::FindLinesBoundingBox(TBOX* bounding_box) { + // The first iteration will tell us if there are lines + // present and shrink the box to a minimal iterative size. + if (!FindLinesBoundingBoxIteration(bounding_box)) + return false; + + // Keep growing until the area of the table stabilizes. + // The box can only get bigger, increasing area. + bool changed = true; + while (changed) { + changed = false; + int old_area = bounding_box->area(); + bool check = FindLinesBoundingBoxIteration(bounding_box); + // At this point, the function will return true. + ASSERT_HOST(check); + ASSERT_HOST(bounding_box->area() >= old_area); + changed = (bounding_box->area() > old_area); + } + + return true; +} + +bool TableRecognizer::FindLinesBoundingBoxIteration(TBOX* bounding_box) { + // Search for all of the lines in the current box, keeping track of extents. + ColPartitionGridSearch box_search(line_grid_); + box_search.SetUniqueMode(true); + box_search.StartRectSearch(*bounding_box); + ColPartition* line = nullptr; + bool first_line = true; + + while ((line = box_search.NextRectSearch()) != nullptr) { + if (line->IsLineType()) { + if (first_line) { + // The first iteration can shrink the box. + *bounding_box = line->bounding_box(); + first_line = false; + } else { + *bounding_box += line->bounding_box(); + } + } + } + return !first_line; +} + +// The goal of this function is to move the table boundaries around and find +// a table that maximizes the whitespace around the table while maximizing +// the cellular structure. As a result, it gets confused by headers, footers, +// and merged columns (text that crosses columns). There is a tolerance +// that allows a few partitions to count towards potential cell merges. +// It's the max_merged parameter to FindPartitionLocations. +// It can work, but it needs some false positive remove on boundaries. +// For now, the grid structure must not intersect any partitions. +// Also, small tolerance is added to the horizontal lines for tightly packed +// tables. The tolerance is added by adjusting the bounding boxes of the +// partitions (in FindHorizontalPartitions). The current implementation +// only adjusts the vertical extents of the table. +// +// Also note. This was hacked at a lot. It could probably use some +// more hacking at to find a good set of border conditions and then a +// nice clean up. +bool TableRecognizer::RecognizeWhitespacedTable(const TBOX& guess_box, + StructuredTable* table) { + TBOX best_box = guess_box; // Best borders known. + int best_below = 0; // Margin size above best table. + int best_above = 0; // Margin size below best table. + TBOX adjusted = guess_box; // The search box. + + // We assume that the guess box is somewhat accurate, so we don't allow + // the adjusted border to pass half of the guessed area. This prevents + // "negative" tables from forming. + const int kMidGuessY = (guess_box.bottom() + guess_box.top()) / 2; + // Keeps track of the most columns in an accepted table. The resulting table + // may be less than the max, but we don't want to stray too far. + int best_cols = 0; + // Make sure we find a good border. + bool found_good_border = false; + + // Find the bottom of the table by trying a few different locations. For + // each location, the top, left, and right are fixed. We start the search + // in a smaller table to favor best_cols getting a good estimate sooner. + int last_bottom = INT32_MAX; + int bottom = NextHorizontalSplit(guess_box.left(), guess_box.right(), + kMidGuessY - min_height_ / 2, true); + int top = NextHorizontalSplit(guess_box.left(), guess_box.right(), + kMidGuessY + min_height_ / 2, false); + adjusted.set_top(top); + + // Headers/footers can be spaced far from everything. + // Make sure that the space below is greater than the space above + // the lowest row. + int previous_below = 0; + const int kMaxChances = 10; + int chances = kMaxChances; + while (bottom != last_bottom) { + adjusted.set_bottom(bottom); + + if (adjusted.height() >= min_height_) { + // Try to fit the grid on the current box. We give it a chance + // if the number of columns didn't significantly drop. + table->set_bounding_box(adjusted); + if (table->FindWhitespacedStructure() && + table->column_count() >= best_cols * kRequiredColumns) { + if (false && IsWeakTableRow(table, 0)) { + // Currently buggy, but was looking promising so disabled. + --chances; + } else { + // We favor 2 things, + // 1- Adding rows that have partitioned data. + // 2- Better margins (to find header/footer). + // For better tables, we just look for multiple cells in the + // bottom row with data in them. + // For margins, the space below the last row should + // be better than a table with the last row removed. + chances = kMaxChances; + double max_row_height = kMaxRowSize * table->median_cell_height(); + if ((table->space_below() * kMarginFactor >= best_below && + table->space_below() >= previous_below) || + (table->CountFilledCellsInRow(0) > 1 && + table->row_height(0) < max_row_height)) { + best_box.set_bottom(bottom); + best_below = table->space_below(); + best_cols = std::max(table->column_count(), best_cols); + found_good_border = true; + } + } + previous_below = table->space_below(); + } else { + --chances; + } + } + if (chances <= 0) + break; + + last_bottom = bottom; + bottom = NextHorizontalSplit(guess_box.left(), guess_box.right(), + last_bottom, true); + } + if (!found_good_border) + return false; + + // TODO(nbeato) comments: follow modified code above... put it in a function! + found_good_border = false; + int last_top = INT32_MIN; + top = NextHorizontalSplit(guess_box.left(), guess_box.right(), + kMidGuessY + min_height_ / 2, false); + int previous_above = 0; + chances = kMaxChances; + + adjusted.set_bottom(best_box.bottom()); + while (last_top != top) { + adjusted.set_top(top); + if (adjusted.height() >= min_height_) { + table->set_bounding_box(adjusted); + if (table->FindWhitespacedStructure() && + table->column_count() >= best_cols * kRequiredColumns) { + int last_row = table->row_count() - 1; + if (false && IsWeakTableRow(table, last_row)) { + // Currently buggy, but was looking promising so disabled. + --chances; + } else { + chances = kMaxChances; + double max_row_height = kMaxRowSize * table->median_cell_height(); + if ((table->space_above() * kMarginFactor >= best_above && + table->space_above() >= previous_above) || + (table->CountFilledCellsInRow(last_row) > 1 && + table->row_height(last_row) < max_row_height)) { + best_box.set_top(top); + best_above = table->space_above(); + best_cols = std::max(table->column_count(), best_cols); + found_good_border = true; + } + } + previous_above = table->space_above(); + } else { + --chances; + } + } + if (chances <= 0) + break; + + last_top = top; + top = NextHorizontalSplit(guess_box.left(), guess_box.right(), + last_top, false); + } + + if (!found_good_border) + return false; + + // If we get here, this shouldn't happen. It can be an assert, but + // I haven't tested it enough to make it crash things. + if (best_box.null_box()) + return false; + + // Given the best locations, fit the box to those locations. + table->set_bounding_box(best_box); + return table->FindWhitespacedStructure(); +} + +// Finds the closest value to y that can safely cause a horizontal +// split in the partitions. +// This function has been buggy and not as reliable as I would've +// liked. I suggest finding all of the splits using the +// FindPartitionLocations once and then just keeping the results +// of that function cached somewhere. +int TableRecognizer::NextHorizontalSplit(int left, int right, int y, + bool top_to_bottom) { + ColPartitionGridSearch gsearch(text_grid_); + gsearch.SetUniqueMode(true); + gsearch.StartVerticalSearch(left, right, y); + ColPartition* text = nullptr; + int last_y = y; + while ((text = gsearch.NextVerticalSearch(top_to_bottom)) != nullptr) { + if (!text->IsTextType() || !text->IsHorizontalType()) + continue; + if (text->bounding_box().height() > max_text_height_) + continue; + + const TBOX& text_box = text->bounding_box(); + if (top_to_bottom && (last_y >= y || last_y <= text_box.top())) { + last_y = std::min(last_y, static_cast<int>(text_box.bottom())); + continue; + } + if (!top_to_bottom && (last_y <= y || last_y >= text_box.bottom())) { + last_y = std::max(last_y, static_cast<int>(text_box.top())); + continue; + } + + return last_y; + } + // If none is found, we at least want to preserve the min/max, + // which defines the overlap of y with the last partition in the grid. + return last_y; +} + +// Code is buggy right now. It is disabled in the calling function. +// It seems like sometimes the row that is passed in is not correct +// sometimes (like a phantom row is introduced). There's something going +// on in the cell_y_ data member before this is called... not certain. +bool TableRecognizer::IsWeakTableRow(StructuredTable* table, int row) { + if (!table->VerifyRowFilled(row)) + return false; + + double threshold = 0.0; + if (table->column_count() > kGoodRowNumberOfColumnsSmallSize) + threshold = table->column_count() * kGoodRowNumberOfColumnsLarge; + else + threshold = kGoodRowNumberOfColumnsSmall[table->column_count()]; + + return table->CountFilledCellsInRow(row) < threshold; +} + +} // namespace tesseract diff --git a/tesseract/src/textord/tablerecog.h b/tesseract/src/textord/tablerecog.h new file mode 100644 index 00000000..eb8f0543 --- /dev/null +++ b/tesseract/src/textord/tablerecog.h @@ -0,0 +1,378 @@ +/////////////////////////////////////////////////////////////////////// +// File: tablerecog.h +// Description: Functions to detect structure of tables. +// Author: Nicholas Beato +// Created: Aug 17, 2010 +// +// (C) Copyright 2010, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TABLERECOG_H_ +#define TABLERECOG_H_ + +#include "colpartitiongrid.h" +#include "genericvector.h" + +namespace tesseract { + +// There are 2 classes in this file. They have 2 different purposes. +// - StructuredTable contains the methods to find the structure given +// a specific bounding box and grow that structure. +// - TableRecognizer contains the methods to adjust the possible positions +// of a table without worrying about structure. +// +// To use these classes, the assumption is that the TableFinder will +// have a guess of the location of a table (or possibly over/undersegmented +// tables). The TableRecognizer is responsible for finding the table boundaries +// at a high level. The StructuredTable class is responsible for determining +// the structure of the table and trying to maximize its bounds while retaining +// the structure. +// (The latter part is not implemented yet, but that was the goal). +// +// While on the boundary discussion, keep in mind that this is a first pass. +// There should eventually be some things like internal structure checks, +// and, more importantly, surrounding text flow checks. +// + +// Usage: +// The StructuredTable class contains methods to query a potential table. +// It has functions to find structure, count rows, find ColPartitions that +// intersect gridlines, etc. It is not meant to blindly find a table. It +// is meant to start with a known table location and enhance it. +// Usage: +// ColPartitionGrid text_grid, line_grid; // init +// TBOX table_box; // known location of table location +// +// StructuredTable table; +// table.Init(); // construction code +// table.set_text_grid(/* text */); // These 2 grids can be the same! +// table.set_line_grid(/* lines */); +// table.set_min_text_height(10); // Filter vertical and tall text. +// // IMPORTANT! The table needs to be told where it is! +// table.set_bounding_box(table_box); // Set initial table location. +// if (table.FindWhitespacedStructure()) { +// // process table +// table.column_count(); // number of columns +// table.row_count(); // number of rows +// table.cells_count(); // number of cells +// table.bounding_box(); // updated bounding box +// // etc. +// } +// +class TESS_API StructuredTable { + public: + StructuredTable(); + ~StructuredTable() = default; + + // Initialization code. Must be called after the constructor. + void Init(); + + // Sets the grids used by the table. These can be changed between + // calls to Recognize. They are treated as read-only data. + void set_text_grid(ColPartitionGrid* text); + void set_line_grid(ColPartitionGrid* lines); + // Filters text partitions that are ridiculously tall to prevent + // merging rows. + void set_max_text_height(int height); + + // Basic accessors. Some are treated as attributes despite having indirect + // representation. + bool is_lined() const; + int row_count() const; + int column_count() const; + int cell_count() const; + void set_bounding_box(const TBOX& box); + const TBOX& bounding_box() const; + int median_cell_height(); + int median_cell_width(); + int row_height(int row) const; + int column_width(int column) const; + int space_above() const; + int space_below() const; + + // Given enough horizontal and vertical lines in a region, create this table + // based on the structure given by the lines. Return true if it worked out. + // Code assumes the lines exist. It is the caller's responsibility to check + // for lines and find an appropriate bounding box. + bool FindLinedStructure(); + + // The main subroutine for finding generic table structure. The function + // finds the grid structure in the given box. Returns true if a good grid + // exists, implying that "this" table is valid. + bool FindWhitespacedStructure(); + + //////// + //////// Functions to query table info. + //////// + + // Returns true if inserting part into the table does not cause any + // cell merges. + bool DoesPartitionFit(const ColPartition& part) const; + // Checks if a sub-table has multiple data cells filled. + int CountFilledCells(); + int CountFilledCellsInRow(int row); + int CountFilledCellsInColumn(int column); + int CountFilledCells(int row_start, int row_end, + int column_start, int column_end); + + // Makes sure that at least one cell in a row has substantial area filled. + // This can filter out large whitespace caused by growing tables too far + // and page numbers. + // (currently bugged for some reason). + bool VerifyRowFilled(int row); + // Finds the filled area in a cell. + double CalculateCellFilledPercentage(int row, int column); + + // Debug display, draws the table in the given color. If the table is not + // valid, the table and "best" grid lines are still drawn in the given color. + void Display(ScrollView* window, ScrollView::Color color); + + protected: + // Clear the structure information. + void ClearStructure(); + + //////// + //////// Lined tables + //////// + + // Verifies the lines do not intersect partitions. This happens when + // the lines are in column boundaries and extend the full page. As a result, + // the grid lines go through column text. The condition is detectable. + bool VerifyLinedTableCells(); + + //////// + //////// Tables with whitespace + //////// + + // This is the function to change if you want to filter resulting tables + // better. Right now it just checks for a minimum cell count and such. + // You could add things like maximum number of ColPartitions per cell or + // similar. + bool VerifyWhitespacedTable(); + // Find the columns of a table using whitespace. + void FindWhitespacedColumns(); + // Find the rows of a table using whitespace. + void FindWhitespacedRows(); + + //////// + //////// Functions to provide information about the table. + //////// + + // Calculates the whitespace around the table using the table boundary and + // the supplied grids (set_text_grid and set_line_grid). + void CalculateMargins(); + // Update the table margins with the supplied grid. This is + // only called by calculate margins to use multiple grid sources. + void UpdateMargins(ColPartitionGrid* grid); + int FindVerticalMargin(ColPartitionGrid* grid, int start_x, + bool decrease) const; + int FindHorizontalMargin(ColPartitionGrid* grid, int start_y, + bool decrease) const; + // Calculates stats on the table, namely the median cell height and width. + void CalculateStats(); + + //////// + //////// Functions to try to "fix" some table errors. + //////// + + // Given a whitespaced table, this looks for bordering lines that might + // be page layout boxes around the table. It is necessary to get the margins + // correct on the table. If the lines are not joined, the margins will be + // the distance to the line, which is not right. + void AbsorbNearbyLines(); + + // Nice utility function for finding partition gaps. You feed it a sorted + // list of all of the mins/maxes of the partitions in the table, and it gives + // you the gaps (middle). This works for both vertical and horizontal + // gaps. + // + // If you want to allow slight overlap in the division and the partitions, + // just scale down the partitions before inserting them in the list. + // Likewise, you can force at least some space between partitions. + // This trick is how the horizontal partitions are done (since the page + // skew could make it hard to find splits in the text). + // + // As a result, "0 distance" between closest partitions causes a gap. + // This is not a programmatic assumption. It is intentional and simplifies + // things. + // + // "max_merged" indicates both the minimum number of stacked partitions + // to cause a cell (add 1 to it), and the maximum number of partitions that + // a grid line can intersect. For example, if max_merged is 0, then lines + // are inserted wherever space exists between partitions. If it is 2, + // lines may intersect 2 partitions at most, but you also need at least + // 2 partitions to generate a line. + static void FindCellSplitLocations(const GenericVector<int>& min_list, + const GenericVector<int>& max_list, + int max_merged, + GenericVector<int>* locations); + + //////// + //////// Utility function for table queries + //////// + + // Counts the number of ColPartitions that intersect vertical cell + // division at this x value. Used by VerifyLinedTable. + int CountVerticalIntersections(int x); + int CountHorizontalIntersections(int y); + + // Counts how many text partitions are in this box. + int CountPartitions(const TBOX& box); + + //////// + //////// Data members. + //////// + + // Input data, used as read only data to make decisions. + ColPartitionGrid* text_grid_; // Text ColPartitions + ColPartitionGrid* line_grid_; // Line ColPartitions + // Table structure. + // bounding box is a convenient external representation. + // cell_x_ and cell_y_ indicate the grid lines. + TBOX bounding_box_; // Bounding box + GenericVector<int> cell_x_; // Locations of vertical divisions (sorted) + GenericVector<int> cell_y_; // Locations of horizontal divisions (sorted) + bool is_lined_; // Is the table backed up by a line structure + // Table margins, set via CalculateMargins + int space_above_; + int space_below_; + int space_left_; + int space_right_; + int median_cell_height_; + int median_cell_width_; + // Filters, used to prevent awkward partitions from destroying structure. + int max_text_height_; +}; + +class TESS_API TableRecognizer { + public: + TableRecognizer(); + ~TableRecognizer(); + + // Initialization code. Must be called after the constructor. + void Init(); + + //////// + //////// Pre-recognize methods to initial table constraints. + //////// + + // Sets the grids used by the table. These can be changed between + // calls to Recognize. They are treated as read-only data. + void set_text_grid(ColPartitionGrid* text); + void set_line_grid(ColPartitionGrid* lines); + // Sets some additional constraints on the table. + void set_min_height(int height); + void set_min_width(int width); + // Filters text partitions that are ridiculously tall to prevent + // merging rows. Note that "filters" refers to allowing horizontal + // cells to slice through them on the premise that they were + // merged text rows during previous layout. + void set_max_text_height(int height); + + // Given a guess location, the RecognizeTable function will try to find a + // structured grid in the area. On success, it will return a new + // StructuredTable (and assumes you will delete it). Otherwise, + // nullptr is returned. + // + // Keep in mind, this may "overgrow" or "undergrow" the size of guess. + // Ideally, there is a either a one-to-one correspondence between + // the guess and table or no table at all. This is not the best of + // assumptions right now, but was made to try to keep things simple in + // the first pass. + // + // If a line structure is available on the page in the given region, + // the table will use the linear structure as it is. + // Otherwise, it will try to maximize the whitespace around it while keeping + // a grid structure. This is somewhat working. + // + // Since the combination of adjustments can get high, effort was + // originally made to keep the number of adjustments linear in the number + // of partitions. The underlying structure finding code used to be + // much more complex. I don't know how necessary this constraint is anymore. + // The evaluation of a possible table is kept within O(nlogn) in the size of + // the table (where size is the number of partitions in the table). + // As a result, the algorithm is capable of O(n^2 log n). Depending + // on the grid search size, it may be higher. + // + // Last note: it is possible to just try all partition boundaries at a high + // level O(n^4) and do a verification scheme (at least O(nlogn)). If there + // area 200 partitions on a page, this could be too costly. Effort could go + // into pruning the search, but I opted for something quicker. I'm confident + // that the independent adjustments can get similar results and keep the + // complextiy down. However, the other approach could work without using + // TableFinder at all if it is fast enough. It comes down to properly + // deciding what is a table. The code currently relies on TableFinder's + // guess to the location of a table for that. + StructuredTable* RecognizeTable(const TBOX& guess_box); + + protected: + //////// + //////// Lined tables + //////// + + // Returns true if the given box has a lined table within it. The + // table argument will be updated with the table if the table exists. + bool RecognizeLinedTable(const TBOX& guess_box, StructuredTable* table); + // Returns true if the given box has a large number of horizontal and + // vertical lines present. If so, we assume the extent of these lines + // uniquely defines a table and find that table via SolveLinedTable. + bool HasSignificantLines(const TBOX& guess); + + // Given enough horizontal and vertical lines in a region, find a bounding + // box that encloses all of them (as well as newly introduced lines). + // The bounding box is the smallest box that encloses the lines in guess + // without having any lines sticking out of it. + // bounding_box is an in/out parameter. + // On input, it in the extents of the box to search. + // On output, it is the resulting bounding box. + bool FindLinesBoundingBox(TBOX* bounding_box); + // Iteration in above search. + // bounding_box is an in/out parameter. + // On input, it in the extents of the box to search. + // On output, it is the resulting bounding box. + bool FindLinesBoundingBoxIteration(TBOX* bounding_box); + + //////// + //////// Generic "whitespaced" tables + //////// + + // Returns true if the given box has a whitespaced table within it. The + // table argument will be updated if the table exists. Also note + // that this method will fail if the guess_box center is not + // mostly within the table. + bool RecognizeWhitespacedTable(const TBOX& guess_box, StructuredTable* table); + + // Finds the location of a horizontal split relative to y. + // This function is mostly unused now. If the SolveWhitespacedTable + // changes much, it can be removed. Note, it isn't really as reliable + // as I thought. I went with alternatives for most of the other uses. + int NextHorizontalSplit(int left, int right, int y, bool top_to_bottom); + + // Indicates that a table row is weak. This means that it has + // many missing data cells or very large cell heights compared. + // to the rest of the table. + static bool IsWeakTableRow(StructuredTable* table, int row); + + // Input data, used as read only data to make decisions. + ColPartitionGrid* text_grid_; // Text ColPartitions + ColPartitionGrid* line_grid_; // Line ColPartitions + // Table constraints, a "good" table must satisfy these. + int min_height_; + int min_width_; + // Filters, used to prevent awkward partitions from destroying structure. + int max_text_height_; // Horizontal lines may intersect taller text. +}; + +} // namespace tesseract + +#endif /* TABLERECOG_H_ */ diff --git a/tesseract/src/textord/tabvector.cpp b/tesseract/src/textord/tabvector.cpp new file mode 100644 index 00000000..95e75a38 --- /dev/null +++ b/tesseract/src/textord/tabvector.cpp @@ -0,0 +1,982 @@ +/////////////////////////////////////////////////////////////////////// +// File: tabvector.cpp +// Description: Class to hold a near-vertical vector representing a tab-stop. +// Author: Ray Smith +// +// (C) Copyright 2008, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#include "tabvector.h" +#include "blobbox.h" +#include "colfind.h" +#include "colpartitionset.h" +#include "detlinefit.h" +#include "statistc.h" + +#include <algorithm> + +namespace tesseract { + +// Multiple of height used as a gutter for evaluation search. +const int kGutterMultiple = 4; +// Multiple of neighbour gap that we expect the gutter gap to be at minimum. +const int kGutterToNeighbourRatio = 3; +// Pixel distance for tab vectors to be considered the same. +const int kSimilarVectorDist = 10; +// Pixel distance for ragged tab vectors to be considered the same if there +// is nothing in the overlap box +const int kSimilarRaggedDist = 50; +// Max multiple of height to allow filling in between blobs when evaluating. +const int kMaxFillinMultiple = 11; +// Min fraction of mean gutter size to allow a gutter on a good tab blob. +const double kMinGutterFraction = 0.5; +// Multiple of 1/n lines as a minimum gutter in evaluation. +const double kLineCountReciprocal = 4.0; +// Constant add-on for minimum gutter for aligned tabs. +const double kMinAlignedGutter = 0.25; +// Constant add-on for minimum gutter for ragged tabs. +const double kMinRaggedGutter = 1.5; + +double_VAR(textord_tabvector_vertical_gap_fraction, 0.5, + "max fraction of mean blob width allowed for vertical gaps in vertical text"); + +double_VAR(textord_tabvector_vertical_box_ratio, 0.5, + "Fraction of box matches required to declare a line vertical"); + +ELISTIZE(TabConstraint) + +// Create a constraint for the top or bottom of this TabVector. +void TabConstraint::CreateConstraint(TabVector* vector, bool is_top) { + auto* constraint = new TabConstraint(vector, is_top); + auto* constraints = new TabConstraint_LIST; + TabConstraint_IT it(constraints); + it.add_to_end(constraint); + if (is_top) + vector->set_top_constraints(constraints); + else + vector->set_bottom_constraints(constraints); +} + +// Test to see if the constraints are compatible enough to merge. +bool TabConstraint::CompatibleConstraints(TabConstraint_LIST* list1, + TabConstraint_LIST* list2) { + if (list1 == list2) + return false; + int y_min = -INT32_MAX; + int y_max = INT32_MAX; + if (textord_debug_tabfind > 3) + tprintf("Testing constraint compatibility\n"); + GetConstraints(list1, &y_min, &y_max); + GetConstraints(list2, &y_min, &y_max); + if (textord_debug_tabfind > 3) + tprintf("Resulting range = [%d,%d]\n", y_min, y_max); + return y_max >= y_min; +} + +// Merge the lists of constraints and update the TabVector pointers. +// The second list is deleted. +void TabConstraint::MergeConstraints(TabConstraint_LIST* list1, + TabConstraint_LIST* list2) { + if (list1 == list2) + return; + TabConstraint_IT it(list2); + if (textord_debug_tabfind > 3) + tprintf("Merging constraints\n"); + // The vectors of all constraints on list2 are now going to be on list1. + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + TabConstraint* constraint = it.data(); + if (textord_debug_tabfind> 3) + constraint->vector_->Print("Merge"); + if (constraint->is_top_) + constraint->vector_->set_top_constraints(list1); + else + constraint->vector_->set_bottom_constraints(list1); + } + it = list1; + it.add_list_before(list2); + delete list2; +} + +// Set all the tops and bottoms as appropriate to a mean of the +// constrained range. Delete all the constraints and list. +void TabConstraint::ApplyConstraints(TabConstraint_LIST* constraints) { + int y_min = -INT32_MAX; + int y_max = INT32_MAX; + GetConstraints(constraints, &y_min, &y_max); + int y = (y_min + y_max) / 2; + TabConstraint_IT it(constraints); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + TabConstraint* constraint = it.data(); + TabVector* v = constraint->vector_; + if (constraint->is_top_) { + v->SetYEnd(y); + v->set_top_constraints(nullptr); + } else { + v->SetYStart(y); + v->set_bottom_constraints(nullptr); + } + } + delete constraints; +} + +TabConstraint::TabConstraint(TabVector* vector, bool is_top) + : vector_(vector), is_top_(is_top) { + if (is_top) { + y_min_ = vector->endpt().y(); + y_max_ = vector->extended_ymax(); + } else { + y_max_ = vector->startpt().y(); + y_min_ = vector->extended_ymin(); + } +} + +// Get the max of the mins and the min of the maxes. +void TabConstraint::GetConstraints(TabConstraint_LIST* constraints, + int* y_min, int* y_max) { + TabConstraint_IT it(constraints); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + TabConstraint* constraint = it.data(); + if (textord_debug_tabfind > 3) { + tprintf("Constraint is [%d,%d]", constraint->y_min_, constraint->y_max_); + constraint->vector_->Print(" for"); + } + *y_min = std::max(*y_min, constraint->y_min_); + *y_max = std::min(*y_max, constraint->y_max_); + } +} + +ELIST2IZE(TabVector) +CLISTIZE(TabVector) + +// The constructor is private. See the bottom of the file... + + +// Public factory to build a TabVector from a list of boxes. +// The TabVector will be of the given alignment type. +// The input vertical vector is used in fitting, and the output +// vertical_x, vertical_y have the resulting line vector added to them +// if the alignment is not ragged. +// The extended_start_y and extended_end_y are the maximum possible +// extension to the line segment that can be used to align with others. +// The input CLIST of BLOBNBOX good_points is consumed and taken over. +TabVector* TabVector::FitVector(TabAlignment alignment, ICOORD vertical, + int extended_start_y, int extended_end_y, + BLOBNBOX_CLIST* good_points, + int* vertical_x, int* vertical_y) { + auto* vector = new TabVector(extended_start_y, extended_end_y, + alignment, good_points); + if (!vector->Fit(vertical, false)) { + delete vector; + return nullptr; + } + if (!vector->IsRagged()) { + vertical = vector->endpt_ - vector->startpt_; + int weight = vector->BoxCount(); + *vertical_x += vertical.x() * weight; + *vertical_y += vertical.y() * weight; + } + return vector; +} + +// Build a ragged TabVector by copying another's direction, shifting it +// to match the given blob, and making its initial extent the height +// of the blob, but its extended bounds from the bounds of the original. +TabVector::TabVector(const TabVector& src, TabAlignment alignment, + const ICOORD& vertical_skew, BLOBNBOX* blob) + : extended_ymin_(src.extended_ymin_), extended_ymax_(src.extended_ymax_), + needs_refit_(true), needs_evaluation_(true), + alignment_(alignment) { + BLOBNBOX_C_IT it(&boxes_); + it.add_to_end(blob); + TBOX box = blob->bounding_box(); + if (IsLeftTab()) { + startpt_ = box.botleft(); + endpt_ = box.topleft(); + } else { + startpt_ = box.botright(); + endpt_ = box.topright(); + } + sort_key_ = SortKey(vertical_skew, + (startpt_.x() + endpt_.x()) / 2, + (startpt_.y() + endpt_.y()) / 2); + if (textord_debug_tabfind > 3) + Print("Constructed a new tab vector:"); +} + +// Copies basic attributes of a tab vector for simple operations. +// Copies things such startpt, endpt, range. +// Does not copy things such as partners, boxes, or constraints. +// This is useful if you only need vector information for processing, such +// as in the table detection code. +TabVector* TabVector::ShallowCopy() const { + auto* copy = new TabVector(); + copy->startpt_ = startpt_; + copy->endpt_ = endpt_; + copy->alignment_ = alignment_; + copy->extended_ymax_ = extended_ymax_; + copy->extended_ymin_ = extended_ymin_; + copy->intersects_other_lines_ = intersects_other_lines_; + return copy; +} + +// Extend this vector to include the supplied blob if it doesn't +// already have it. +void TabVector::ExtendToBox(BLOBNBOX* new_blob) { + TBOX new_box = new_blob->bounding_box(); + BLOBNBOX_C_IT it(&boxes_); + if (!it.empty()) { + BLOBNBOX* blob = it.data(); + TBOX box = blob->bounding_box(); + while (!it.at_last() && box.top() <= new_box.top()) { + if (blob == new_blob) + return; // We have it already. + it.forward(); + blob = it.data(); + box = blob->bounding_box(); + } + if (box.top() >= new_box.top()) { + it.add_before_stay_put(new_blob); + needs_refit_ = true; + return; + } + } + needs_refit_ = true; + it.add_after_stay_put(new_blob); +} + +// Set the ycoord of the start and move the xcoord to match. +void TabVector::SetYStart(int start_y) { + startpt_.set_x(XAtY(start_y)); + startpt_.set_y(start_y); +} +// Set the ycoord of the end and move the xcoord to match. +void TabVector::SetYEnd(int end_y) { + endpt_.set_x(XAtY(end_y)); + endpt_.set_y(end_y); +} + +// Rotate the ends by the given vector. Auto flip start and end if needed. +void TabVector::Rotate(const FCOORD& rotation) { + startpt_.rotate(rotation); + endpt_.rotate(rotation); + int dx = endpt_.x() - startpt_.x(); + int dy = endpt_.y() - startpt_.y(); + if ((dy < 0 && abs(dy) > abs(dx)) || (dx < 0 && abs(dx) > abs(dy))) { + // Need to flip start/end. + ICOORD tmp = startpt_; + startpt_ = endpt_; + endpt_ = tmp; + } +} + +// Setup the initial constraints, being the limits of +// the vector and the extended ends. +void TabVector::SetupConstraints() { + TabConstraint::CreateConstraint(this, false); + TabConstraint::CreateConstraint(this, true); +} + +// Setup the constraints between the partners of this TabVector. +void TabVector::SetupPartnerConstraints() { + // With the first and last partner, we want a common bottom and top, + // respectively, and for each change of partner, we want a common + // top of first with bottom of next. + TabVector_C_IT it(&partners_); + TabVector* prev_partner = nullptr; + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + TabVector* partner = it.data(); + if (partner->top_constraints_ == nullptr || + partner->bottom_constraints_ == nullptr) { + partner->Print("Impossible: has no constraints"); + Print("This vector has it as a partner"); + continue; + } + if (prev_partner == nullptr) { + // This is the first partner, so common bottom. + if (TabConstraint::CompatibleConstraints(bottom_constraints_, + partner->bottom_constraints_)) + TabConstraint::MergeConstraints(bottom_constraints_, + partner->bottom_constraints_); + } else { + // We need prev top to be common with partner bottom. + if (TabConstraint::CompatibleConstraints(prev_partner->top_constraints_, + partner->bottom_constraints_)) + TabConstraint::MergeConstraints(prev_partner->top_constraints_, + partner->bottom_constraints_); + } + prev_partner = partner; + if (it.at_last()) { + // This is the last partner, so common top. + if (TabConstraint::CompatibleConstraints(top_constraints_, + partner->top_constraints_)) + TabConstraint::MergeConstraints(top_constraints_, + partner->top_constraints_); + } + } +} + +// Setup the constraints between this and its partner. +void TabVector::SetupPartnerConstraints(TabVector* partner) { + if (TabConstraint::CompatibleConstraints(bottom_constraints_, + partner->bottom_constraints_)) + TabConstraint::MergeConstraints(bottom_constraints_, + partner->bottom_constraints_); + if (TabConstraint::CompatibleConstraints(top_constraints_, + partner->top_constraints_)) + TabConstraint::MergeConstraints(top_constraints_, + partner->top_constraints_); +} + +// Use the constraints to modify the top and bottom. +void TabVector::ApplyConstraints() { + if (top_constraints_ != nullptr) + TabConstraint::ApplyConstraints(top_constraints_); + if (bottom_constraints_ != nullptr) + TabConstraint::ApplyConstraints(bottom_constraints_); +} + +// Merge close tab vectors of the same side that overlap. +void TabVector::MergeSimilarTabVectors(const ICOORD& vertical, + TabVector_LIST* vectors, + BlobGrid* grid) { + TabVector_IT it1(vectors); + for (it1.mark_cycle_pt(); !it1.cycled_list(); it1.forward()) { + TabVector* v1 = it1.data(); + TabVector_IT it2(it1); + for (it2.forward(); !it2.at_first(); it2.forward()) { + TabVector* v2 = it2.data(); + if (v2->SimilarTo(vertical, *v1, grid)) { + // Merge into the forward one, in case the combined vector now + // overlaps one in between. + if (textord_debug_tabfind) { + v2->Print("Merging"); + v1->Print("by deleting"); + } + v2->MergeWith(vertical, it1.extract()); + if (textord_debug_tabfind) { + v2->Print("Producing"); + } + ICOORD merged_vector = v2->endpt(); + merged_vector -= v2->startpt(); + if (textord_debug_tabfind && abs(merged_vector.x()) > 100) { + v2->Print("Garbage result of merge?"); + } + break; + } + } + } +} + +// Return true if this vector is the same side, overlaps, and close +// enough to the other to be merged. +bool TabVector::SimilarTo(const ICOORD& vertical, + const TabVector& other, BlobGrid* grid) const { + if ((IsRightTab() && other.IsRightTab()) || + (IsLeftTab() && other.IsLeftTab())) { + // If they don't overlap, at least in extensions, then there is no chance. + if (ExtendedOverlap(other.extended_ymax_, other.extended_ymin_) < 0) + return false; + // A fast approximation to the scale factor of the sort_key_. + int v_scale = abs(vertical.y()); + if (v_scale == 0) + v_scale = 1; + // If they are close enough, then OK. + if (sort_key_ + kSimilarVectorDist * v_scale >= other.sort_key_ && + sort_key_ - kSimilarVectorDist * v_scale <= other.sort_key_) + return true; + // Ragged tabs get a bigger threshold. + if (!IsRagged() || !other.IsRagged() || + sort_key_ + kSimilarRaggedDist * v_scale < other.sort_key_ || + sort_key_ - kSimilarRaggedDist * v_scale > other.sort_key_) + return false; + if (grid == nullptr) { + // There is nothing else to test! + return true; + } + // If there is nothing in the rectangle between the vector that is going to + // move, and the place it is moving to, then they can be merged. + // Setup a vertical search for any blob. + const TabVector* mover = (IsRightTab() && + sort_key_ < other.sort_key_) ? this : &other; + int top_y = mover->endpt_.y(); + int bottom_y = mover->startpt_.y(); + int left = std::min(mover->XAtY(top_y), mover->XAtY(bottom_y)); + int right = std::max(mover->XAtY(top_y), mover->XAtY(bottom_y)); + int shift = abs(sort_key_ - other.sort_key_) / v_scale; + if (IsRightTab()) { + right += shift; + } else { + left -= shift; + } + + GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> vsearch(grid); + vsearch.StartVerticalSearch(left, right, top_y); + BLOBNBOX* blob; + while ((blob = vsearch.NextVerticalSearch(true)) != nullptr) { + const TBOX& box = blob->bounding_box(); + if (box.top() > bottom_y) + return true; // Nothing found. + if (box.bottom() < top_y) + continue; // Doesn't overlap. + int left_at_box = XAtY(box.bottom()); + int right_at_box = left_at_box; + if (IsRightTab()) + right_at_box += shift; + else + left_at_box -= shift; + if (std::min(right_at_box, static_cast<int>(box.right())) > std::max(left_at_box, static_cast<int>(box.left()))) + return false; + } + return true; // Nothing found. + } + return false; +} + +// Eat the other TabVector into this and delete it. +void TabVector::MergeWith(const ICOORD& vertical, TabVector* other) { + extended_ymin_ = std::min(extended_ymin_, other->extended_ymin_); + extended_ymax_ = std::max(extended_ymax_, other->extended_ymax_); + if (other->IsRagged()) { + alignment_ = other->alignment_; + } + // Merge sort the two lists of boxes. + BLOBNBOX_C_IT it1(&boxes_); + BLOBNBOX_C_IT it2(&other->boxes_); + while (!it2.empty()) { + BLOBNBOX* bbox2 = it2.extract(); + it2.forward(); + TBOX box2 = bbox2->bounding_box(); + BLOBNBOX* bbox1 = it1.data(); + TBOX box1 = bbox1->bounding_box(); + while (box1.bottom() < box2.bottom() && !it1.at_last()) { + it1.forward(); + bbox1 = it1.data(); + box1 = bbox1->bounding_box(); + } + if (box1.bottom() < box2.bottom()) { + it1.add_to_end(bbox2); + } else if (bbox1 != bbox2) { + it1.add_before_stay_put(bbox2); + } + } + Fit(vertical, true); + other->Delete(this); +} + +// Add a new element to the list of partner TabVectors. +// Partners must be added in order of increasing y coordinate of the text line +// that makes them partners. +// Groups of identical partners are merged into one. +void TabVector::AddPartner(TabVector* partner) { + if (IsSeparator() || partner->IsSeparator()) + return; + TabVector_C_IT it(&partners_); + if (!it.empty()) { + it.move_to_last(); + if (it.data() == partner) + return; + } + it.add_after_then_move(partner); +} + +// Return true if other is a partner of this. +bool TabVector::IsAPartner(const TabVector* other) { + TabVector_C_IT it(&partners_); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + if (it.data() == other) + return true; + } + return false; +} + +// These names must be synced with the TabAlignment enum in tabvector.h. +static const char* const kAlignmentNames[] = { + "Left Aligned", + "Left Ragged", + "Center", + "Right Aligned", + "Right Ragged", + "Separator" +}; + +// Print basic information about this tab vector. +void TabVector::Print(const char* prefix) { + tprintf( + "%s %s (%d,%d)->(%d,%d) w=%d s=%d, sort key=%d, boxes=%d," + " partners=%d\n", + prefix, kAlignmentNames[alignment_], startpt_.x(), startpt_.y(), + endpt_.x(), endpt_.y(), mean_width_, percent_score_, sort_key_, + boxes_.length(), partners_.length()); +} + +// Print basic information about this tab vector and every box in it. +void TabVector::Debug(const char* prefix) { + Print(prefix); + BLOBNBOX_C_IT it(&boxes_); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + BLOBNBOX* bbox = it.data(); + const TBOX& box = bbox->bounding_box(); + tprintf("Box at (%d,%d)->(%d,%d)\n", + box.left(), box.bottom(), box.right(), box.top()); + } +} + +#ifndef GRAPHICS_DISABLED + +// Draw this tabvector in place in the given window. +void TabVector::Display(ScrollView* tab_win) { + if (textord_debug_printable) + tab_win->Pen(ScrollView::BLUE); + else if (alignment_ == TA_LEFT_ALIGNED) + tab_win->Pen(ScrollView::LIME_GREEN); + else if (alignment_ == TA_LEFT_RAGGED) + tab_win->Pen(ScrollView::DARK_GREEN); + else if (alignment_ == TA_RIGHT_ALIGNED) + tab_win->Pen(ScrollView::PINK); + else if (alignment_ == TA_RIGHT_RAGGED) + tab_win->Pen(ScrollView::CORAL); + else + tab_win->Pen(ScrollView::WHITE); + tab_win->Line(startpt_.x(), startpt_.y(), endpt_.x(), endpt_.y()); + tab_win->Pen(ScrollView::GREY); + tab_win->Line(startpt_.x(), startpt_.y(), startpt_.x(), extended_ymin_); + tab_win->Line(endpt_.x(), extended_ymax_, endpt_.x(), endpt_.y()); + char score_buf[64]; + snprintf(score_buf, sizeof(score_buf), "%d", percent_score_); + tab_win->TextAttributes("Times", 50, false, false, false); + tab_win->Text(startpt_.x(), startpt_.y(), score_buf); +} + +#endif + +// Refit the line and/or re-evaluate the vector if the dirty flags are set. +void TabVector::FitAndEvaluateIfNeeded(const ICOORD& vertical, + TabFind* finder) { + if (needs_refit_) + Fit(vertical, true); + if (needs_evaluation_) + Evaluate(vertical, finder); +} + +// Evaluate the vector in terms of coverage of its length by good-looking +// box edges. A good looking box is one where its nearest neighbour on the +// inside is nearer than half the distance its nearest neighbour on the +// outside of the putative column. Bad boxes are removed from the line. +// A second pass then further filters boxes by requiring that the gutter +// width be a minimum fraction of the mean gutter along the line. +void TabVector::Evaluate(const ICOORD& vertical, TabFind* finder) { + bool debug = false; + needs_evaluation_ = false; + int length = endpt_.y() - startpt_.y(); + if (length == 0 || boxes_.empty()) { + percent_score_ = 0; + Print("Zero length in evaluate"); + return; + } + // Compute the mean box height. + BLOBNBOX_C_IT it(&boxes_); + int mean_height = 0; + int height_count = 0; + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + BLOBNBOX* bbox = it.data(); + const TBOX& box = bbox->bounding_box(); + int height = box.height(); + mean_height += height; + ++height_count; + } + if (height_count > 0) mean_height /= height_count; + int max_gutter = kGutterMultiple * mean_height; + if (IsRagged()) { + // Ragged edges face a tougher test in that the gap must always be within + // the height of the blob. + max_gutter = kGutterToNeighbourRatio * mean_height; + } + + STATS gutters(0, max_gutter + 1); + // Evaluate the boxes for their goodness, calculating the coverage as we go. + // Remove boxes that are not good and shorten the list to the first and + // last good boxes. + int num_deleted_boxes = 0; + bool text_on_image = false; + int good_length = 0; + const TBOX* prev_good_box = nullptr; + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + BLOBNBOX* bbox = it.data(); + const TBOX& box = bbox->bounding_box(); + int mid_y = (box.top() + box.bottom()) / 2; + if (TabFind::WithinTestRegion(2, XAtY(box.bottom()), box.bottom())) { + if (!debug) { + tprintf("After already deleting %d boxes, ", num_deleted_boxes); + Print("Starting evaluation"); + } + debug = true; + } + // A good box is one where the nearest neighbour on the inside is closer + // than half the distance to the nearest neighbour on the outside + // (of the putative column). + bool left = IsLeftTab(); + int tab_x = XAtY(mid_y); + int gutter_width; + int neighbour_gap; + finder->GutterWidthAndNeighbourGap(tab_x, mean_height, max_gutter, left, + bbox, &gutter_width, &neighbour_gap); + if (debug) { + tprintf("Box (%d,%d)->(%d,%d) has gutter %d, ndist %d\n", + box.left(), box.bottom(), box.right(), box.top(), + gutter_width, neighbour_gap); + } + // Now we can make the test. + if (neighbour_gap * kGutterToNeighbourRatio <= gutter_width) { + // A good box contributes its height to the good_length. + good_length += box.top() - box.bottom(); + gutters.add(gutter_width, 1); + // Two good boxes together contribute the gap between them + // to the good_length as well, as long as the gap is not + // too big. + if (prev_good_box != nullptr) { + int vertical_gap = box.bottom() - prev_good_box->top(); + double size1 = sqrt(static_cast<double>(prev_good_box->area())); + double size2 = sqrt(static_cast<double>(box.area())); + if (vertical_gap < kMaxFillinMultiple * std::min(size1, size2)) + good_length += vertical_gap; + if (debug) { + tprintf("Box and prev good, gap=%d, target %g, goodlength=%d\n", + vertical_gap, kMaxFillinMultiple * std::min(size1, size2), + good_length); + } + } else { + // Adjust the start to the first good box. + SetYStart(box.bottom()); + } + prev_good_box = &box; + if (bbox->flow() == BTFT_TEXT_ON_IMAGE) + text_on_image = true; + } else { + // Get rid of boxes that are not good. + if (debug) { + tprintf("Bad Box (%d,%d)->(%d,%d) with gutter %d, ndist %d\n", + box.left(), box.bottom(), box.right(), box.top(), + gutter_width, neighbour_gap); + } + it.extract(); + ++num_deleted_boxes; + } + } + if (debug) { + Print("Evaluating:"); + } + // If there are any good boxes, do it again, except this time get rid of + // boxes that have a gutter that is a small fraction of the mean gutter. + // This filters out ends that run into a coincidental gap in the text. + int search_top = endpt_.y(); + int search_bottom = startpt_.y(); + int median_gutter = IntCastRounded(gutters.median()); + if (gutters.get_total() > 0) { + prev_good_box = nullptr; + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + BLOBNBOX* bbox = it.data(); + const TBOX& box = bbox->bounding_box(); + int mid_y = (box.top() + box.bottom()) / 2; + // A good box is one where the gutter width is at least some constant + // fraction of the mean gutter width. + bool left = IsLeftTab(); + int tab_x = XAtY(mid_y); + int max_gutter = kGutterMultiple * mean_height; + if (IsRagged()) { + // Ragged edges face a tougher test in that the gap must always be + // within the height of the blob. + max_gutter = kGutterToNeighbourRatio * mean_height; + } + int gutter_width; + int neighbour_gap; + finder->GutterWidthAndNeighbourGap(tab_x, mean_height, max_gutter, left, + bbox, &gutter_width, &neighbour_gap); + // Now we can make the test. + if (gutter_width >= median_gutter * kMinGutterFraction) { + if (prev_good_box == nullptr) { + // Adjust the start to the first good box. + SetYStart(box.bottom()); + search_bottom = box.top(); + } + prev_good_box = &box; + search_top = box.bottom(); + } else { + // Get rid of boxes that are not good. + if (debug) { + tprintf("Bad Box (%d,%d)->(%d,%d) with gutter %d, mean gutter %d\n", + box.left(), box.bottom(), box.right(), box.top(), + gutter_width, median_gutter); + } + it.extract(); + ++num_deleted_boxes; + } + } + } + // If there has been a good box, adjust the end. + if (prev_good_box != nullptr) { + SetYEnd(prev_good_box->top()); + // Compute the percentage of the vector that is occupied by good boxes. + int length = endpt_.y() - startpt_.y(); + percent_score_ = 100 * good_length / length; + if (num_deleted_boxes > 0) { + needs_refit_ = true; + FitAndEvaluateIfNeeded(vertical, finder); + if (boxes_.empty()) + return; + } + // Test the gutter over the whole vector, instead of just at the boxes. + int required_shift; + if (search_bottom > search_top) { + search_bottom = startpt_.y(); + search_top = endpt_.y(); + } + double min_gutter_width = kLineCountReciprocal / boxes_.length(); + min_gutter_width += IsRagged() ? kMinRaggedGutter : kMinAlignedGutter; + min_gutter_width *= mean_height; + int max_gutter_width = IntCastRounded(min_gutter_width) + 1; + if (median_gutter > max_gutter_width) + max_gutter_width = median_gutter; + int gutter_width = finder->GutterWidth(search_bottom, search_top, *this, + text_on_image, max_gutter_width, + &required_shift); + if (gutter_width < min_gutter_width) { + if (debug) { + tprintf("Rejecting bad tab Vector with %d gutter vs %g min\n", + gutter_width, min_gutter_width); + } + boxes_.shallow_clear(); + percent_score_ = 0; + } else if (debug) { + tprintf("Final gutter %d, vs limit of %g, required shift = %d\n", + gutter_width, min_gutter_width, required_shift); + } + } else { + // There are no good boxes left, so score is 0. + percent_score_ = 0; + } + + if (debug) { + Print("Evaluation complete:"); + } +} + +// (Re)Fit a line to the stored points. Returns false if the line +// is degenerate. Althougth the TabVector code mostly doesn't care about the +// direction of lines, XAtY would give silly results for a horizontal line. +// The class is mostly aimed at use for vertical lines representing +// horizontal tab stops. +bool TabVector::Fit(ICOORD vertical, bool force_parallel) { + needs_refit_ = false; + if (boxes_.empty()) { + // Don't refit something with no boxes, as that only happens + // in Evaluate, and we don't want to end up with a zero vector. + if (!force_parallel) + return false; + // If we are forcing parallel, then we just need to set the sort_key_. + ICOORD midpt = startpt_; + midpt += endpt_; + midpt /= 2; + sort_key_ = SortKey(vertical, midpt.x(), midpt.y()); + return startpt_.y() != endpt_.y(); + } + if (!force_parallel && !IsRagged()) { + // Use a fitted line as the vertical. + DetLineFit linepoints; + BLOBNBOX_C_IT it(&boxes_); + // Fit a line to all the boxes in the list. + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + BLOBNBOX* bbox = it.data(); + const TBOX& box = bbox->bounding_box(); + int x1 = IsRightTab() ? box.right() : box.left(); + ICOORD boxpt(x1, box.bottom()); + linepoints.Add(boxpt); + if (it.at_last()) { + ICOORD top_pt(x1, box.top()); + linepoints.Add(top_pt); + } + } + linepoints.Fit(&startpt_, &endpt_); + if (startpt_.y() != endpt_.y()) { + vertical = endpt_; + vertical -= startpt_; + } + } + int start_y = startpt_.y(); + int end_y = endpt_.y(); + sort_key_ = IsLeftTab() ? INT32_MAX : -INT32_MAX; + BLOBNBOX_C_IT it(&boxes_); + // Choose a line parallel to the vertical such that all boxes are on the + // correct side of it. + mean_width_ = 0; + int width_count = 0; + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + BLOBNBOX* bbox = it.data(); + const TBOX& box = bbox->bounding_box(); + mean_width_ += box.width(); + ++width_count; + int x1 = IsRightTab() ? box.right() : box.left(); + // Test both the bottom and the top, as one will be more extreme, depending + // on the direction of skew. + int bottom_y = box.bottom(); + int top_y = box.top(); + int key = SortKey(vertical, x1, bottom_y); + if (IsLeftTab() == (key < sort_key_)) { + sort_key_ = key; + startpt_ = ICOORD(x1, bottom_y); + } + key = SortKey(vertical, x1, top_y); + if (IsLeftTab() == (key < sort_key_)) { + sort_key_ = key; + startpt_ = ICOORD(x1, top_y); + } + if (it.at_first()) + start_y = bottom_y; + if (it.at_last()) + end_y = top_y; + } + if (width_count > 0) { + mean_width_ = (mean_width_ + width_count - 1) / width_count; + } + endpt_ = startpt_ + vertical; + needs_evaluation_ = true; + if (start_y != end_y) { + // Set the ends of the vector to fully include the first and last blobs. + startpt_.set_x(XAtY(vertical, sort_key_, start_y)); + startpt_.set_y(start_y); + endpt_.set_x(XAtY(vertical, sort_key_, end_y)); + endpt_.set_y(end_y); + return true; + } + return false; +} + +// Returns the singleton partner if there is one, or nullptr otherwise. +TabVector* TabVector::GetSinglePartner() { + if (!partners_.singleton()) + return nullptr; + TabVector_C_IT partner_it(&partners_); + TabVector* partner = partner_it.data(); + return partner; +} + +// Return the partner of this TabVector if the vector qualifies as +// being a vertical text line, otherwise nullptr. +TabVector* TabVector::VerticalTextlinePartner() { + if (!partners_.singleton()) + return nullptr; + TabVector_C_IT partner_it(&partners_); + TabVector* partner = partner_it.data(); + BLOBNBOX_C_IT box_it1(&boxes_); + BLOBNBOX_C_IT box_it2(&partner->boxes_); + // Count how many boxes are also in the other list. + // At the same time, gather the mean width and median vertical gap. + if (textord_debug_tabfind > 1) { + Print("Testing for vertical text"); + partner->Print(" partner"); + } + int num_matched = 0; + int num_unmatched = 0; + int total_widths = 0; + int width = startpt().x() - partner->startpt().x(); + if (width < 0) + width = -width; + STATS gaps(0, width * 2); + BLOBNBOX* prev_bbox = nullptr; + box_it2.mark_cycle_pt(); + for (box_it1.mark_cycle_pt(); !box_it1.cycled_list(); box_it1.forward()) { + BLOBNBOX* bbox = box_it1.data(); + TBOX box = bbox->bounding_box(); + if (prev_bbox != nullptr) { + gaps.add(box.bottom() - prev_bbox->bounding_box().top(), 1); + } + while (!box_it2.cycled_list() && box_it2.data() != bbox && + box_it2.data()->bounding_box().bottom() < box.bottom()) { + box_it2.forward(); + } + if (!box_it2.cycled_list() && box_it2.data() == bbox && + bbox->region_type() >= BRT_UNKNOWN && + (prev_bbox == nullptr || prev_bbox->region_type() >= BRT_UNKNOWN)) + ++num_matched; + else + ++num_unmatched; + total_widths += box.width(); + prev_bbox = bbox; + } + if (num_unmatched + num_matched == 0) return nullptr; + double avg_width = total_widths * 1.0 / (num_unmatched + num_matched); + double max_gap = textord_tabvector_vertical_gap_fraction * avg_width; + int min_box_match = static_cast<int>((num_matched + num_unmatched) * + textord_tabvector_vertical_box_ratio); + bool is_vertical = (gaps.get_total() > 0 && + num_matched >= min_box_match && + gaps.median() <= max_gap); + if (textord_debug_tabfind > 1) { + tprintf("gaps=%d, matched=%d, unmatched=%d, min_match=%d " + "median gap=%.2f, width=%.2f max_gap=%.2f Vertical=%s\n", + gaps.get_total(), num_matched, num_unmatched, min_box_match, + gaps.median(), avg_width, max_gap, is_vertical?"Yes":"No"); + } + return (is_vertical) ? partner : nullptr; +} + +// The constructor is private. +TabVector::TabVector(int extended_ymin, int extended_ymax, + TabAlignment alignment, BLOBNBOX_CLIST* boxes) + : extended_ymin_(extended_ymin), extended_ymax_(extended_ymax), + sort_key_(0), percent_score_(0), mean_width_(0), + needs_refit_(true), needs_evaluation_(true), alignment_(alignment), + top_constraints_(nullptr), bottom_constraints_(nullptr) { + BLOBNBOX_C_IT it(&boxes_); + it.add_list_after(boxes); +} + +// Delete this, but first, repoint all the partners to point to +// replacement. If replacement is nullptr, then partner relationships +// are removed. +void TabVector::Delete(TabVector* replacement) { + TabVector_C_IT it(&partners_); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + TabVector* partner = it.data(); + TabVector_C_IT p_it(&partner->partners_); + // If partner already has replacement in its list, then make + // replacement null, and just remove this TabVector when we find it. + TabVector* partner_replacement = replacement; + for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward()) { + TabVector* p_partner = p_it.data(); + if (p_partner == partner_replacement) { + partner_replacement = nullptr; + break; + } + } + // Remove all references to this, and replace with replacement if not nullptr. + for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward()) { + TabVector* p_partner = p_it.data(); + if (p_partner == this) { + p_it.extract(); + if (partner_replacement != nullptr) + p_it.add_before_stay_put(partner_replacement); + } + } + if (partner_replacement != nullptr) { + partner_replacement->AddPartner(partner); + } + } + delete this; +} + + +} // namespace tesseract. diff --git a/tesseract/src/textord/tabvector.h b/tesseract/src/textord/tabvector.h new file mode 100644 index 00000000..ce7464b8 --- /dev/null +++ b/tesseract/src/textord/tabvector.h @@ -0,0 +1,429 @@ +/////////////////////////////////////////////////////////////////////// +// File: tabvector.h +// Description: Class to hold a near-vertical vector representing a tab-stop. +// Author: Ray Smith +// +// (C) Copyright 2008, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_TEXTORD_TABVECTOR_H_ +#define TESSERACT_TEXTORD_TABVECTOR_H_ + +#include "blobgrid.h" +#include "clst.h" +#include "elst.h" +#include "elst2.h" +#include "rect.h" +#include "bbgrid.h" + +#include <algorithm> + +class BLOBNBOX; +class ScrollView; + +namespace tesseract { + + +extern double_VAR_H(textord_tabvector_vertical_gap_fraction, 0.5, + "Max fraction of mean blob width allowed for vertical gaps in vertical text"); +extern double_VAR_H(textord_tabvector_vertical_box_ratio, 0.5, + "Fraction of box matches required to declare a line vertical"); + +// The alignment type that a tab vector represents. +// Keep this enum synced with kAlignmentNames in tabvector.cpp. +enum TabAlignment { + TA_LEFT_ALIGNED, + TA_LEFT_RAGGED, + TA_CENTER_JUSTIFIED, + TA_RIGHT_ALIGNED, + TA_RIGHT_RAGGED, + TA_SEPARATOR, + TA_COUNT +}; + +// Forward declarations. The classes use their own list types, so we +// need to make the list types first. +class TabFind; +class TabVector; +class TabConstraint; + +ELIST2IZEH(TabVector) +CLISTIZEH(TabVector) +ELISTIZEH(TabConstraint) + +// TabConstraint is a totally self-contained class to maintain +// a list of [min,max] constraints, each referring to a TabVector. +// The constraints are manipulated through static methods that act +// on a list of constraints. The list itself is cooperatively owned +// by the TabVectors of the constraints on the list and managed +// by implicit reference counting via the elements of the list. +class TabConstraint : public ELIST_LINK { + public: + // This empty constructor is here only so that the class can be ELISTIZED. + // TODO(rays) change deep_copy in elst.h line 955 to take a callback copier + // and eliminate CLASSNAME##_copier. + TabConstraint() = default; + + // Create a constraint for the top or bottom of this TabVector. + static void CreateConstraint(TabVector* vector, bool is_top); + + // Test to see if the constraints are compatible enough to merge. + static bool CompatibleConstraints(TabConstraint_LIST* list1, + TabConstraint_LIST* list2); + + // Merge the lists of constraints and update the TabVector pointers. + // The second list is deleted. + static void MergeConstraints(TabConstraint_LIST* list1, + TabConstraint_LIST* list2); + + // Set all the tops and bottoms as appropriate to a mean of the + // constrained range. Delete all the constraints and list. + static void ApplyConstraints(TabConstraint_LIST* constraints); + + private: + TabConstraint(TabVector* vector, bool is_top); + + // Get the max of the mins and the min of the maxes. + static void GetConstraints(TabConstraint_LIST* constraints, + int* y_min, int* y_max); + + // The TabVector this constraint applies to. + TabVector* vector_; + // If true then we refer to the top of the vector_. + bool is_top_; + // The allowed range of this vector_. + int y_min_; + int y_max_; +}; + +// Class to hold information about a single vector +// that represents a tab stop or a rule line. +class TabVector : public ELIST2_LINK { + public: + // TODO(rays) fix this in elst.h line 1076, where it should use the + // copy constructor instead of operator=. + TabVector() = default; + ~TabVector() = default; + + // Public factory to build a TabVector from a list of boxes. + // The TabVector will be of the given alignment type. + // The input vertical vector is used in fitting, and the output + // vertical_x, vertical_y have the resulting line vector added to them + // if the alignment is not ragged. + // The extended_start_y and extended_end_y are the maximum possible + // extension to the line segment that can be used to align with others. + // The input CLIST of BLOBNBOX good_points is consumed and taken over. + static TabVector* FitVector(TabAlignment alignment, ICOORD vertical, + int extended_start_y, int extended_end_y, + BLOBNBOX_CLIST* good_points, + int* vertical_x, int* vertical_y); + + // Build a ragged TabVector by copying another's direction, shifting it + // to match the given blob, and making its initial extent the height + // of the blob, but its extended bounds from the bounds of the original. + TabVector(const TabVector& src, TabAlignment alignment, + const ICOORD& vertical_skew, BLOBNBOX* blob); + + // Copies basic attributes of a tab vector for simple operations. + // Copies things such startpt, endpt, range, width. + // Does not copy things such as partners, boxes, or constraints. + // This is useful if you only need vector information for processing, such + // as in the table detection code. + TabVector* ShallowCopy() const; + + // Simple accessors. + const ICOORD& startpt() const { + return startpt_; + } + const ICOORD& endpt() const { + return endpt_; + } + int extended_ymax() const { + return extended_ymax_; + } + int extended_ymin() const { + return extended_ymin_; + } + int sort_key() const { + return sort_key_; + } + int mean_width() const { + return mean_width_; + } + void set_top_constraints(TabConstraint_LIST* constraints) { + top_constraints_ = constraints; + } + void set_bottom_constraints(TabConstraint_LIST* constraints) { + bottom_constraints_ = constraints; + } + TabVector_CLIST* partners() { + return &partners_; + } + void set_startpt(const ICOORD& start) { + startpt_ = start; + } + void set_endpt(const ICOORD& end) { + endpt_ = end; + } + bool intersects_other_lines() const { + return intersects_other_lines_; + } + void set_intersects_other_lines(bool value) { + intersects_other_lines_ = value; + } + + // Inline quasi-accessors that require some computation. + + // Compute the x coordinate at the given y coordinate. + int XAtY(int y) const { + int height = endpt_.y() - startpt_.y(); + if (height != 0) + return (y - startpt_.y()) * (endpt_.x() - startpt_.x()) / height + + startpt_.x(); + else + return startpt_.x(); + } + + // Compute the vertical overlap with the other TabVector. + int VOverlap(const TabVector& other) const { + return std::min(other.endpt_.y(), endpt_.y()) - + std::max(other.startpt_.y(), startpt_.y()); + } + // Compute the vertical overlap with the given y bounds. + int VOverlap(int top_y, int bottom_y) const { + return std::min(top_y, static_cast<int>(endpt_.y())) - std::max(bottom_y, static_cast<int>(startpt_.y())); + } + // Compute the extended vertical overlap with the given y bounds. + int ExtendedOverlap(int top_y, int bottom_y) const { + return std::min(top_y, extended_ymax_) - std::max(bottom_y, extended_ymin_); + } + + // Return true if this is a left tab stop, either aligned, or ragged. + bool IsLeftTab() const { + return alignment_ == TA_LEFT_ALIGNED || alignment_ == TA_LEFT_RAGGED; + } + // Return true if this is a right tab stop, either aligned, or ragged. + bool IsRightTab() const { + return alignment_ == TA_RIGHT_ALIGNED || alignment_ == TA_RIGHT_RAGGED; + } + // Return true if this is a separator. + bool IsSeparator() const { + return alignment_ == TA_SEPARATOR; + } + // Return true if this is a center aligned tab stop. + bool IsCenterTab() const { + return alignment_ == TA_CENTER_JUSTIFIED; + } + // Return true if this is a ragged tab top, either left or right. + bool IsRagged() const { + return alignment_ == TA_LEFT_RAGGED || alignment_ == TA_RIGHT_RAGGED; + } + + // Return true if this vector is to the left of the other in terms + // of sort_key_. + bool IsLeftOf(const TabVector& other) const { + return sort_key_ < other.sort_key_; + } + + // Return true if the vector has no partners. + bool Partnerless() { + return partners_.empty(); + } + + // Return the number of tab boxes in this vector. + int BoxCount() { + return boxes_.length(); + } + + // Lock the vector from refits by clearing the boxes_ list. + void Freeze() { + boxes_.shallow_clear(); + } + + // Flip x and y on the ends so a vector can be created from flipped input. + void XYFlip() { + int x = startpt_.y(); + startpt_.set_y(startpt_.x()); + startpt_.set_x(x); + x = endpt_.y(); + endpt_.set_y(endpt_.x()); + endpt_.set_x(x); + } + + // Reflect the tab vector in the y-axis. + void ReflectInYAxis() { + startpt_.set_x(-startpt_.x()); + endpt_.set_x(-endpt_.x()); + sort_key_ = -sort_key_; + if (alignment_ == TA_LEFT_ALIGNED) + alignment_ = TA_RIGHT_ALIGNED; + else if (alignment_ == TA_RIGHT_ALIGNED) + alignment_ = TA_LEFT_ALIGNED; + if (alignment_ == TA_LEFT_RAGGED) + alignment_ = TA_RIGHT_RAGGED; + else if (alignment_ == TA_RIGHT_RAGGED) + alignment_ = TA_LEFT_RAGGED; + } + + // Separate function to compute the sort key for a given coordinate pair. + static int SortKey(const ICOORD& vertical, int x, int y) { + ICOORD pt(x, y); + return pt * vertical; + } + + // Return the x at the given y for the given sort key. + static int XAtY(const ICOORD& vertical, int sort_key, int y) { + if (vertical.y() != 0) + return (vertical.x() * y + sort_key) / vertical.y(); + else + return sort_key; + } + + // Sort function for E2LIST::sort to sort by sort_key_. + static int SortVectorsByKey(const void* v1, const void* v2) { + const TabVector* tv1 = *static_cast<const TabVector* const*>(v1); + const TabVector* tv2 = *static_cast<const TabVector* const*>(v2); + return tv1->sort_key_ - tv2->sort_key_; + } + + // More complex members. + + // Extend this vector to include the supplied blob if it doesn't + // already have it. + void ExtendToBox(BLOBNBOX* blob); + + // Set the ycoord of the start and move the xcoord to match. + void SetYStart(int start_y); + // Set the ycoord of the end and move the xcoord to match. + void SetYEnd(int end_y); + + // Rotate the ends by the given vector. + void Rotate(const FCOORD& rotation); + + // Setup the initial constraints, being the limits of + // the vector and the extended ends. + void SetupConstraints(); + + // Setup the constraints between the partners of this TabVector. + void SetupPartnerConstraints(); + + // Setup the constraints between this and its partner. + void SetupPartnerConstraints(TabVector* partner); + + // Use the constraints to modify the top and bottom. + void ApplyConstraints(); + + // Merge close tab vectors of the same side that overlap. + static void MergeSimilarTabVectors(const ICOORD& vertical, + TabVector_LIST* vectors, BlobGrid* grid); + + // Return true if this vector is the same side, overlaps, and close + // enough to the other to be merged. + bool SimilarTo(const ICOORD& vertical, + const TabVector& other, BlobGrid* grid) const; + + // Eat the other TabVector into this and delete it. + void MergeWith(const ICOORD& vertical, TabVector* other); + + // Add a new element to the list of partner TabVectors. + // Partners must be added in order of increasing y coordinate of the text line + // that makes them partners. + // Groups of identical partners are merged into one. + void AddPartner(TabVector* partner); + + // Return true if other is a partner of this. + bool IsAPartner(const TabVector* other); + + // Print basic information about this tab vector. + void Print(const char* prefix); + + // Print basic information about this tab vector and every box in it. + void Debug(const char* prefix); + + // Draw this tabvector in place in the given window. + void Display(ScrollView* tab_win); + + // Refit the line and/or re-evaluate the vector if the dirty flags are set. + void FitAndEvaluateIfNeeded(const ICOORD& vertical, TabFind* finder); + + // Evaluate the vector in terms of coverage of its length by good-looking + // box edges. A good looking box is one where its nearest neighbour on the + // inside is nearer than half the distance its nearest neighbour on the + // outside of the putative column. Bad boxes are removed from the line. + // A second pass then further filters boxes by requiring that the gutter + // width be a minimum fraction of the mean gutter along the line. + void Evaluate(const ICOORD& vertical, TabFind* finder); + + // (Re)Fit a line to the stored points. Returns false if the line + // is degenerate. Althougth the TabVector code mostly doesn't care about the + // direction of lines, XAtY would give silly results for a horizontal line. + // The class is mostly aimed at use for vertical lines representing + // horizontal tab stops. + bool Fit(ICOORD vertical, bool force_parallel); + + // Return the partner of this TabVector if the vector qualifies as + // being a vertical text line, otherwise nullptr. + TabVector* VerticalTextlinePartner(); + + // Return the matching tabvector if there is exactly one partner, or + // nullptr otherwise. This can be used after matching is done, eg. by + // VerticalTextlinePartner(), without checking if the line is vertical. + TabVector* GetSinglePartner(); + + private: + // Constructor is private as the static factory is the external way + // to build a TabVector. + TabVector(int extended_ymin, int extended_ymax, + TabAlignment alignment, BLOBNBOX_CLIST* boxes); + + // Delete this, but first, repoint all the partners to point to + // replacement. If replacement is nullptr, then partner relationships + // are removed. + void Delete(TabVector* replacement); + + private: + // The bottom of the tab line. + ICOORD startpt_; + // The top of the tab line. + ICOORD endpt_; + // The lowest y that the vector might extend to. + int extended_ymin_ = 0; + // The highest y that the vector might extend to. + int extended_ymax_ = 0; + // Perpendicular distance of vector from a given vertical for sorting. + int sort_key_ = 0; + // Result of Evaluate 0-100. Coverage of line with good boxes. + int percent_score_ = 0; + // The mean width of the blobs. Meaningful only for separator lines. + int mean_width_ = 0; + // True if the boxes_ list has been modified, so a refit is needed. + bool needs_refit_ = false; + // True if a fit has been done, so re-evaluation is needed. + bool needs_evaluation_ = false; + // True if a separator line intersects at least 2 other lines. + bool intersects_other_lines_ = false; + // The type of this TabVector. + TabAlignment alignment_ = TA_LEFT_ALIGNED; + // The list of boxes whose edges are aligned at this TabVector. + BLOBNBOX_CLIST boxes_; + // List of TabVectors that have a connection with this via a text line. + TabVector_CLIST partners_; + // Constraints used to resolve the exact location of the top and bottom + // of the tab line. + TabConstraint_LIST* top_constraints_ = nullptr; + TabConstraint_LIST* bottom_constraints_ = nullptr; +}; + +} // namespace tesseract. + +#endif // TESSERACT_TEXTORD_TABVECTOR_H_ diff --git a/tesseract/src/textord/textlineprojection.cpp b/tesseract/src/textord/textlineprojection.cpp new file mode 100644 index 00000000..e52abaa0 --- /dev/null +++ b/tesseract/src/textord/textlineprojection.cpp @@ -0,0 +1,779 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// Author: rays@google.com (Ray Smith) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#include "textlineprojection.h" +#include "allheaders.h" +#include "bbgrid.h" // Base class. +#include "blobbox.h" // BlobNeighourDir. +#include "blobs.h" +#include "colpartition.h" +#include "normalis.h" + +#include <algorithm> + +// Padding factor to use on definitely oriented blobs +const int kOrientedPadFactor = 8; +// Padding factor to use on not definitely oriented blobs. +const int kDefaultPadFactor = 2; +// Penalty factor for going away from the line center. +const int kWrongWayPenalty = 4; +// Ratio between parallel gap and perpendicular gap used to measure total +// distance of a box from a target box in curved textline space. +// parallel-gap is treated more favorably by this factor to allow catching +// quotes and elipsis at the end of textlines. +const int kParaPerpDistRatio = 4; +// Multiple of scale_factor_ that the inter-line gap must be before we start +// padding the increment box perpendicular to the text line. +const int kMinLineSpacingFactor = 4; +// Maximum tab-stop overrun for horizontal padding, in projection pixels. +const int kMaxTabStopOverrun = 6; + +namespace tesseract { + +TextlineProjection::TextlineProjection(int resolution) + : x_origin_(0), y_origin_(0), pix_(nullptr) { + // The projection map should be about 100 ppi, whatever the input. + scale_factor_ = IntCastRounded(resolution / 100.0); + if (scale_factor_ < 1) scale_factor_ = 1; +} +TextlineProjection::~TextlineProjection() { + pixDestroy(&pix_); +} + +// Build the projection profile given the input_block containing lists of +// blobs, a rotation to convert to image coords, +// and a full-resolution nontext_map, marking out areas to avoid. +// During construction, we have the following assumptions: +// The rotation is a multiple of 90 degrees, ie no deskew yet. +// The blobs have had their left and right rules set to also limit +// the range of projection. +void TextlineProjection::ConstructProjection(TO_BLOCK* input_block, + const FCOORD& rotation, + Pix* nontext_map) { + pixDestroy(&pix_); + TBOX image_box(0, 0, pixGetWidth(nontext_map), pixGetHeight(nontext_map)); + x_origin_ = 0; + y_origin_ = image_box.height(); + int width = (image_box.width() + scale_factor_ - 1) / scale_factor_; + int height = (image_box.height() + scale_factor_ - 1) / scale_factor_; + + pix_ = pixCreate(width, height, 8); + ProjectBlobs(&input_block->blobs, rotation, image_box, nontext_map); + ProjectBlobs(&input_block->large_blobs, rotation, image_box, nontext_map); + Pix* final_pix = pixBlockconv(pix_, 1, 1); +// Pix* final_pix = pixBlockconv(pix_, 2, 2); + pixDestroy(&pix_); + pix_ = final_pix; +} + +#ifndef GRAPHICS_DISABLED + +// Display the blobs in the window colored according to textline quality. +void TextlineProjection::PlotGradedBlobs(BLOBNBOX_LIST* blobs, + ScrollView* win) { + BLOBNBOX_IT it(blobs); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + BLOBNBOX* blob = it.data(); + const TBOX& box = blob->bounding_box(); + bool bad_box = BoxOutOfHTextline(box, nullptr, false); + if (blob->UniquelyVertical()) + win->Pen(ScrollView::YELLOW); + else + win->Pen(bad_box ? ScrollView::RED : ScrollView::BLUE); + win->Rectangle(box.left(), box.bottom(), box.right(), box.top()); + } + win->Update(); +} + +#endif // !GRAPHICS_DISABLED + +// Moves blobs that look like they don't sit well on a textline from the +// input blobs list to the output small_blobs list. +// This gets them away from initial textline finding to stop diacritics +// from forming incorrect textlines. (Introduced mainly to fix Thai.) +void TextlineProjection::MoveNonTextlineBlobs( + BLOBNBOX_LIST* blobs, BLOBNBOX_LIST* small_blobs) const { + BLOBNBOX_IT it(blobs); + BLOBNBOX_IT small_it(small_blobs); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + BLOBNBOX* blob = it.data(); + const TBOX& box = blob->bounding_box(); + bool debug = AlignedBlob::WithinTestRegion(2, box.left(), + box.bottom()); + if (BoxOutOfHTextline(box, nullptr, debug) && !blob->UniquelyVertical()) { + blob->ClearNeighbours(); + small_it.add_to_end(it.extract()); + } + } +} + +#ifndef GRAPHICS_DISABLED + +// Create a window and display the projection in it. +void TextlineProjection::DisplayProjection() const { + int width = pixGetWidth(pix_); + int height = pixGetHeight(pix_); + Pix* pixc = pixCreate(width, height, 32); + int src_wpl = pixGetWpl(pix_); + int col_wpl = pixGetWpl(pixc); + uint32_t* src_data = pixGetData(pix_); + uint32_t* col_data = pixGetData(pixc); + for (int y = 0; y < height; ++y, src_data += src_wpl, col_data += col_wpl) { + for (int x = 0; x < width; ++x) { + int pixel = GET_DATA_BYTE(src_data, x); + l_uint32 result; + if (pixel <= 17) + composeRGBPixel(0, 0, pixel * 15, &result); + else if (pixel <= 145) + composeRGBPixel(0, (pixel - 17) * 2, 255, &result); + else + composeRGBPixel((pixel - 145) * 2, 255, 255, &result); + col_data[x] = result; + } + } + auto* win = new ScrollView("Projection", 0, 0, + width, height, width, height); + win->Image(pixc, 0, 0); + win->Update(); + pixDestroy(&pixc); +} + +#endif // !GRAPHICS_DISABLED + +// Compute the distance of the box from the partition using curved projection +// space. As DistanceOfBoxFromBox, except that the direction is taken from +// the ColPartition and the median bounds of the ColPartition are used as +// the to_box. +int TextlineProjection::DistanceOfBoxFromPartition(const TBOX& box, + const ColPartition& part, + const DENORM* denorm, + bool debug) const { + // Compute a partition box that uses the median top/bottom of the blobs + // within and median left/right for vertical. + TBOX part_box = part.bounding_box(); + if (part.IsHorizontalType()) { + part_box.set_top(part.median_top()); + part_box.set_bottom(part.median_bottom()); + } else { + part_box.set_left(part.median_left()); + part_box.set_right(part.median_right()); + } + // Now use DistanceOfBoxFromBox to make the actual calculation. + return DistanceOfBoxFromBox(box, part_box, part.IsHorizontalType(), + denorm, debug); +} + +// Compute the distance from the from_box to the to_box using curved +// projection space. Separation that involves a decrease in projection +// density (moving from the from_box to the to_box) is weighted more heavily +// than constant density, and an increase is weighted less. +// If horizontal_textline is true, then curved space is used vertically, +// as for a diacritic on the edge of a textline. +// The projection uses original image coords, so denorm is used to get +// back to the image coords from box/part space. +// How the calculation works: Think of a diacritic near a textline. +// Distance is measured from the far side of the from_box to the near side of +// the to_box. Shown is the horizontal textline case. +// |------^-----| +// | from | box | +// |------|-----| +// perpendicular | +// <------v-------->|--------------------| +// parallel | to box | +// |--------------------| +// Perpendicular distance uses "curved space" See VerticalDistance below. +// Parallel distance is linear. +// Result is perpendicular_gap + parallel_gap / kParaPerpDistRatio. +int TextlineProjection::DistanceOfBoxFromBox(const TBOX& from_box, + const TBOX& to_box, + bool horizontal_textline, + const DENORM* denorm, + bool debug) const { + // The parallel_gap is the horizontal gap between a horizontal textline and + // the box. Analogous for vertical. + int parallel_gap = 0; + // start_pt is the box end of the line to be modified for curved space. + TPOINT start_pt; + // end_pt is the partition end of the line to be modified for curved space. + TPOINT end_pt; + if (horizontal_textline) { + parallel_gap = from_box.x_gap(to_box) + from_box.width(); + start_pt.x = (from_box.left() + from_box.right()) / 2; + end_pt.x = start_pt.x; + if (from_box.top() - to_box.top() >= to_box.bottom() - from_box.bottom()) { + start_pt.y = from_box.top(); + end_pt.y = std::min(to_box.top(), start_pt.y); + } else { + start_pt.y = from_box.bottom(); + end_pt.y = std::max(to_box.bottom(), start_pt.y); + } + } else { + parallel_gap = from_box.y_gap(to_box) + from_box.height(); + if (from_box.right() - to_box.right() >= to_box.left() - from_box.left()) { + start_pt.x = from_box.right(); + end_pt.x = std::min(to_box.right(), start_pt.x); + } else { + start_pt.x = from_box.left(); + end_pt.x = std::max(to_box.left(), start_pt.x); + } + start_pt.y = (from_box.bottom() + from_box.top()) / 2; + end_pt.y = start_pt.y; + } + // The perpendicular gap is the max vertical distance gap out of: + // top of from_box to to_box top and bottom of from_box to to_box bottom. + // This value is then modified for curved projection space. + // Analogous for vertical. + int perpendicular_gap = 0; + // If start_pt == end_pt, then the from_box lies entirely within the to_box + // (in the perpendicular direction), so we don't need to calculate the + // perpendicular_gap. + if (start_pt.x != end_pt.x || start_pt.y != end_pt.y) { + if (denorm != nullptr) { + // Denormalize the start and end. + denorm->DenormTransform(nullptr, start_pt, &start_pt); + denorm->DenormTransform(nullptr, end_pt, &end_pt); + } + if (abs(start_pt.y - end_pt.y) >= abs(start_pt.x - end_pt.x)) { + perpendicular_gap = VerticalDistance(debug, start_pt.x, start_pt.y, + end_pt.y); + } else { + perpendicular_gap = HorizontalDistance(debug, start_pt.x, end_pt.x, + start_pt.y); + } + } + // The parallel_gap weighs less than the perpendicular_gap. + return perpendicular_gap + parallel_gap / kParaPerpDistRatio; +} + +// Compute the distance between (x, y1) and (x, y2) using the rule that +// a decrease in textline density is weighted more heavily than an increase. +// The coordinates are in source image space, ie processed by any denorm +// already, but not yet scaled by scale_factor_. +// Going from the outside of a textline to the inside should measure much +// less distance than going from the inside of a textline to the outside. +// How it works: +// An increase is cheap (getting closer to a textline). +// Constant costs unity. +// A decrease is expensive (getting further from a textline). +// Pixels in projection map Counted distance +// 2 +// 3 1/x +// 3 1 +// 2 x +// 5 1/x +// 7 1/x +// Total: 1 + x + 3/x where x = kWrongWayPenalty. +int TextlineProjection::VerticalDistance(bool debug, int x, + int y1, int y2) const { + x = ImageXToProjectionX(x); + y1 = ImageYToProjectionY(y1); + y2 = ImageYToProjectionY(y2); + if (y1 == y2) return 0; + int wpl = pixGetWpl(pix_); + int step = y1 < y2 ? 1 : -1; + uint32_t* data = pixGetData(pix_) + y1 * wpl; + wpl *= step; + int prev_pixel = GET_DATA_BYTE(data, x); + int distance = 0; + int right_way_steps = 0; + for (int y = y1; y != y2; y += step) { + data += wpl; + int pixel = GET_DATA_BYTE(data, x); + if (debug) + tprintf("At (%d,%d), pix = %d, prev=%d\n", + x, y + step, pixel, prev_pixel); + if (pixel < prev_pixel) + distance += kWrongWayPenalty; + else if (pixel > prev_pixel) + ++right_way_steps; + else + ++distance; + prev_pixel = pixel; + } + return distance * scale_factor_ + + right_way_steps * scale_factor_ / kWrongWayPenalty; +} + +// Compute the distance between (x1, y) and (x2, y) using the rule that +// a decrease in textline density is weighted more heavily than an increase. +int TextlineProjection::HorizontalDistance(bool debug, int x1, int x2, + int y) const { + x1 = ImageXToProjectionX(x1); + x2 = ImageXToProjectionX(x2); + y = ImageYToProjectionY(y); + if (x1 == x2) return 0; + int wpl = pixGetWpl(pix_); + int step = x1 < x2 ? 1 : -1; + uint32_t* data = pixGetData(pix_) + y * wpl; + int prev_pixel = GET_DATA_BYTE(data, x1); + int distance = 0; + int right_way_steps = 0; + for (int x = x1; x != x2; x += step) { + int pixel = GET_DATA_BYTE(data, x + step); + if (debug) + tprintf("At (%d,%d), pix = %d, prev=%d\n", + x + step, y, pixel, prev_pixel); + if (pixel < prev_pixel) + distance += kWrongWayPenalty; + else if (pixel > prev_pixel) + ++right_way_steps; + else + ++distance; + prev_pixel = pixel; + } + return distance * scale_factor_ + + right_way_steps * scale_factor_ / kWrongWayPenalty; +} + +// Returns true if the blob appears to be outside of a textline. +// Such blobs are potentially diacritics (even if large in Thai) and should +// be kept away from initial textline finding. +bool TextlineProjection::BoxOutOfHTextline(const TBOX& box, + const DENORM* denorm, + bool debug) const { + int grad1 = 0; + int grad2 = 0; + EvaluateBoxInternal(box, denorm, debug, &grad1, &grad2, nullptr, nullptr); + int worst_result = std::min(grad1, grad2); + int total_result = grad1 + grad2; + if (total_result >= 6) return false; // Strongly in textline. + // Medium strength: if either gradient is negative, it is likely outside + // the body of the textline. + if (worst_result < 0) + return true; + return false; +} + +// Evaluates the textlineiness of a ColPartition. Uses EvaluateBox below, +// but uses the median top/bottom for horizontal and median left/right for +// vertical instead of the bounding box edges. +// Evaluates for both horizontal and vertical and returns the best result, +// with a positive value for horizontal and a negative value for vertical. +int TextlineProjection::EvaluateColPartition(const ColPartition& part, + const DENORM* denorm, + bool debug) const { + if (part.IsSingleton()) + return EvaluateBox(part.bounding_box(), denorm, debug); + // Test vertical orientation. + TBOX box = part.bounding_box(); + // Use the partition median for left/right. + box.set_left(part.median_left()); + box.set_right(part.median_right()); + int vresult = EvaluateBox(box, denorm, debug); + + // Test horizontal orientation. + box = part.bounding_box(); + // Use the partition median for top/bottom. + box.set_top(part.median_top()); + box.set_bottom(part.median_bottom()); + int hresult = EvaluateBox(box, denorm, debug); + if (debug) { + tprintf("Partition hresult=%d, vresult=%d from:", hresult, vresult); + part.bounding_box().print(); + part.Print(); + } + return hresult >= -vresult ? hresult : vresult; +} + +// Computes the mean projection gradients over the horizontal and vertical +// edges of the box: +// -h-h-h-h-h-h +// |------------| mean=htop -v|+v--------+v|-v +// |+h+h+h+h+h+h| -v|+v +v|-v +// | | -v|+v +v|-v +// | box | -v|+v box +v|-v +// | | -v|+v +v|-v +// |+h+h+h+h+h+h| -v|+v +v|-v +// |------------| mean=hbot -v|+v--------+v|-v +// -h-h-h-h-h-h +// mean=vleft mean=vright +// +// Returns MAX(htop,hbot) - MAX(vleft,vright), which is a positive number +// for a horizontal textline, a negative number for a vertical textline, +// and near zero for undecided. Undecided is most likely non-text. +// All the gradients are truncated to remain non-negative, since negative +// horizontal gradients don't give any indication of being vertical and +// vice versa. +// Additional complexity: The coordinates have to be transformed to original +// image coordinates with denorm (if not null), scaled to match the projection +// pix, and THEN step out 2 pixels each way from the edge to compute the +// gradient, and tries 3 positions, each measuring the gradient over a +// 4-pixel spread: (+3/-1), (+2/-2), (+1/-3). This complexity is handled by +// several layers of helpers below. +int TextlineProjection::EvaluateBox(const TBOX& box, const DENORM* denorm, + bool debug) const { + return EvaluateBoxInternal(box, denorm, debug, nullptr, nullptr, nullptr, nullptr); +} + +// Internal version of EvaluateBox returns the unclipped gradients as well +// as the result of EvaluateBox. +// hgrad1 and hgrad2 are the gradients for the horizontal textline. +int TextlineProjection::EvaluateBoxInternal(const TBOX& box, + const DENORM* denorm, bool debug, + int* hgrad1, int* hgrad2, + int* vgrad1, int* vgrad2) const { + int top_gradient = BestMeanGradientInRow(denorm, box.left(), box.right(), + box.top(), true); + int bottom_gradient = -BestMeanGradientInRow(denorm, box.left(), box.right(), + box.bottom(), false); + int left_gradient = BestMeanGradientInColumn(denorm, box.left(), box.bottom(), + box.top(), true); + int right_gradient = -BestMeanGradientInColumn(denorm, box.right(), + box.bottom(), box.top(), + false); + int top_clipped = std::max(top_gradient, 0); + int bottom_clipped = std::max(bottom_gradient, 0); + int left_clipped = std::max(left_gradient, 0); + int right_clipped = std::max(right_gradient, 0); + if (debug) { + tprintf("Gradients: top = %d, bottom = %d, left= %d, right= %d for box:", + top_gradient, bottom_gradient, left_gradient, right_gradient); + box.print(); + } + int result = std::max(top_clipped, bottom_clipped) - + std::max(left_clipped, right_clipped); + if (hgrad1 != nullptr && hgrad2 != nullptr) { + *hgrad1 = top_gradient; + *hgrad2 = bottom_gradient; + } + if (vgrad1 != nullptr && vgrad2 != nullptr) { + *vgrad1 = left_gradient; + *vgrad2 = right_gradient; + } + return result; +} + +// Helper returns the mean gradient value for the horizontal row at the given +// y, (in the external coordinates) by subtracting the mean of the transformed +// row 2 pixels above from the mean of the transformed row 2 pixels below. +// This gives a positive value for a good top edge and negative for bottom. +// Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge. +int TextlineProjection::BestMeanGradientInRow(const DENORM* denorm, + int16_t min_x, int16_t max_x, int16_t y, + bool best_is_max) const { + TPOINT start_pt(min_x, y); + TPOINT end_pt(max_x, y); + int upper = MeanPixelsInLineSegment(denorm, -2, start_pt, end_pt); + int lower = MeanPixelsInLineSegment(denorm, 2, start_pt, end_pt); + int best_gradient = lower - upper; + upper = MeanPixelsInLineSegment(denorm, -1, start_pt, end_pt); + lower = MeanPixelsInLineSegment(denorm, 3, start_pt, end_pt); + int gradient = lower - upper; + if ((gradient > best_gradient) == best_is_max) + best_gradient = gradient; + upper = MeanPixelsInLineSegment(denorm, -3, start_pt, end_pt); + lower = MeanPixelsInLineSegment(denorm, 1, start_pt, end_pt); + gradient = lower - upper; + if ((gradient > best_gradient) == best_is_max) + best_gradient = gradient; + return best_gradient; +} + +// Helper returns the mean gradient value for the vertical column at the +// given x, (in the external coordinates) by subtracting the mean of the +// transformed column 2 pixels left from the mean of the transformed column +// 2 pixels to the right. +// This gives a positive value for a good left edge and negative for right. +// Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge. +int TextlineProjection::BestMeanGradientInColumn(const DENORM* denorm, int16_t x, + int16_t min_y, int16_t max_y, + bool best_is_max) const { + TPOINT start_pt(x, min_y); + TPOINT end_pt(x, max_y); + int left = MeanPixelsInLineSegment(denorm, -2, start_pt, end_pt); + int right = MeanPixelsInLineSegment(denorm, 2, start_pt, end_pt); + int best_gradient = right - left; + left = MeanPixelsInLineSegment(denorm, -1, start_pt, end_pt); + right = MeanPixelsInLineSegment(denorm, 3, start_pt, end_pt); + int gradient = right - left; + if ((gradient > best_gradient) == best_is_max) + best_gradient = gradient; + left = MeanPixelsInLineSegment(denorm, -3, start_pt, end_pt); + right = MeanPixelsInLineSegment(denorm, 1, start_pt, end_pt); + gradient = right - left; + if ((gradient > best_gradient) == best_is_max) + best_gradient = gradient; + return best_gradient; +} + +// Helper returns the mean pixel value over the line between the start_pt and +// end_pt (inclusive), but shifted perpendicular to the line in the projection +// image by offset pixels. For simplicity, it is assumed that the vector is +// either nearly horizontal or nearly vertical. It works on skewed textlines! +// The end points are in external coordinates, and will be denormalized with +// the denorm if not nullptr before further conversion to pix coordinates. +// After all the conversions, the offset is added to the direction +// perpendicular to the line direction. The offset is thus in projection image +// coordinates, which allows the caller to get a guaranteed displacement +// between pixels used to calculate gradients. +int TextlineProjection::MeanPixelsInLineSegment(const DENORM* denorm, + int offset, + TPOINT start_pt, + TPOINT end_pt) const { + TransformToPixCoords(denorm, &start_pt); + TransformToPixCoords(denorm, &end_pt); + TruncateToImageBounds(&start_pt); + TruncateToImageBounds(&end_pt); + int wpl = pixGetWpl(pix_); + uint32_t* data = pixGetData(pix_); + int total = 0; + int count = 0; + int x_delta = end_pt.x - start_pt.x; + int y_delta = end_pt.y - start_pt.y; + if (abs(x_delta) >= abs(y_delta)) { + if (x_delta == 0) + return 0; + // Horizontal line. Add the offset vertically. + int x_step = x_delta > 0 ? 1 : -1; + // Correct offset for rotation, keeping it anti-clockwise of the delta. + offset *= x_step; + start_pt.y += offset; + end_pt.y += offset; + TruncateToImageBounds(&start_pt); + TruncateToImageBounds(&end_pt); + x_delta = end_pt.x - start_pt.x; + y_delta = end_pt.y - start_pt.y; + count = x_delta * x_step + 1; + for (int x = start_pt.x; x != end_pt.x; x += x_step) { + int y = start_pt.y + DivRounded(y_delta * (x - start_pt.x), x_delta); + total += GET_DATA_BYTE(data + wpl * y, x); + } + } else { + // Vertical line. Add the offset horizontally. + int y_step = y_delta > 0 ? 1 : -1; + // Correct offset for rotation, keeping it anti-clockwise of the delta. + // Pix holds the image with y=0 at the top, so the offset is negated. + offset *= -y_step; + start_pt.x += offset; + end_pt.x += offset; + TruncateToImageBounds(&start_pt); + TruncateToImageBounds(&end_pt); + x_delta = end_pt.x - start_pt.x; + y_delta = end_pt.y - start_pt.y; + count = y_delta * y_step + 1; + for (int y = start_pt.y; y != end_pt.y; y += y_step) { + int x = start_pt.x + DivRounded(x_delta * (y - start_pt.y), y_delta); + total += GET_DATA_BYTE(data + wpl * y, x); + } + } + return DivRounded(total, count); +} + +// Given an input pix, and a box, the sides of the box are shrunk inwards until +// they bound any black pixels found within the original box. +// The function converts between tesseract coords and the pix coords assuming +// that this pix is full resolution equal in size to the original image. +// Returns an empty box if there are no black pixels in the source box. +static TBOX BoundsWithinBox(Pix* pix, const TBOX& box) { + int im_height = pixGetHeight(pix); + Box* input_box = boxCreate(box.left(), im_height - box.top(), + box.width(), box.height()); + Box* output_box = nullptr; + pixClipBoxToForeground(pix, input_box, nullptr, &output_box); + TBOX result_box; + if (output_box != nullptr) { + l_int32 x, y, width, height; + boxGetGeometry(output_box, &x, &y, &width, &height); + result_box.set_left(x); + result_box.set_right(x + width); + result_box.set_top(im_height - y); + result_box.set_bottom(result_box.top() - height); + boxDestroy(&output_box); + } + boxDestroy(&input_box); + return result_box; +} + +// Splits the given box in half at x_middle or y_middle according to split_on_x +// and checks for nontext_map pixels in each half. Reduces the bbox so that it +// still includes the middle point, but does not touch any fg pixels in +// nontext_map. An empty box may be returned if there is no such box. +static void TruncateBoxToMissNonText(int x_middle, int y_middle, + bool split_on_x, Pix* nontext_map, + TBOX* bbox) { + TBOX box1(*bbox); + TBOX box2(*bbox); + TBOX im_box; + if (split_on_x) { + box1.set_right(x_middle); + im_box = BoundsWithinBox(nontext_map, box1); + if (!im_box.null_box()) box1.set_left(im_box.right()); + box2.set_left(x_middle); + im_box = BoundsWithinBox(nontext_map, box2); + if (!im_box.null_box()) box2.set_right(im_box.left()); + } else { + box1.set_bottom(y_middle); + im_box = BoundsWithinBox(nontext_map, box1); + if (!im_box.null_box()) box1.set_top(im_box.bottom()); + box2.set_top(y_middle); + im_box = BoundsWithinBox(nontext_map, box2); + if (!im_box.null_box()) box2.set_bottom(im_box.top()); + } + box1 += box2; + *bbox = box1; +} + + +// Helper function to add 1 to a rectangle in source image coords to the +// internal projection pix_. +void TextlineProjection::IncrementRectangle8Bit(const TBOX& box) { + int scaled_left = ImageXToProjectionX(box.left()); + int scaled_top = ImageYToProjectionY(box.top()); + int scaled_right = ImageXToProjectionX(box.right()); + int scaled_bottom = ImageYToProjectionY(box.bottom()); + int wpl = pixGetWpl(pix_); + uint32_t* data = pixGetData(pix_) + scaled_top * wpl; + for (int y = scaled_top; y <= scaled_bottom; ++y) { + for (int x = scaled_left; x <= scaled_right; ++x) { + int pixel = GET_DATA_BYTE(data, x); + if (pixel < 255) + SET_DATA_BYTE(data, x, pixel + 1); + } + data += wpl; + } +} + +// Inserts a list of blobs into the projection. +// Rotation is a multiple of 90 degrees to get from blob coords to +// nontext_map coords, nontext_map_box is the bounds of the nontext_map. +// Blobs are spread horizontally or vertically according to their internal +// flags, but the spreading is truncated by set pixels in the nontext_map +// and also by the horizontal rule line limits on the blobs. +void TextlineProjection::ProjectBlobs(BLOBNBOX_LIST* blobs, + const FCOORD& rotation, + const TBOX& nontext_map_box, + Pix* nontext_map) { + BLOBNBOX_IT blob_it(blobs); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + BLOBNBOX* blob = blob_it.data(); + TBOX bbox = blob->bounding_box(); + ICOORD middle((bbox.left() + bbox.right()) / 2, + (bbox.bottom() + bbox.top()) / 2); + bool spreading_horizontally = PadBlobBox(blob, &bbox); + // Rotate to match the nontext_map. + bbox.rotate(rotation); + middle.rotate(rotation); + if (rotation.x() == 0.0f) + spreading_horizontally = !spreading_horizontally; + // Clip to the image before applying the increments. + bbox &= nontext_map_box; // This is in-place box intersection. + // Check for image pixels before spreading. + TruncateBoxToMissNonText(middle.x(), middle.y(), spreading_horizontally, + nontext_map, &bbox); + if (bbox.area() > 0) { + IncrementRectangle8Bit(bbox); + } + } +} + +// Pads the bounding box of the given blob according to whether it is on +// a horizontal or vertical text line, taking into account tab-stops near +// the blob. Returns true if padding was in the horizontal direction. +bool TextlineProjection::PadBlobBox(BLOBNBOX* blob, TBOX* bbox) { + // Determine which direction to spread. + // If text is well spaced out, it can be useful to pad perpendicular to + // the textline direction, so as to ensure diacritics get absorbed + // correctly, but if the text is tightly spaced, this will destroy the + // blank space between textlines in the projection map, and that would + // be very bad. + int pad_limit = scale_factor_ * kMinLineSpacingFactor; + int xpad = 0; + int ypad = 0; + bool padding_horizontally = false; + if (blob->UniquelyHorizontal()) { + xpad = bbox->height() * kOrientedPadFactor; + padding_horizontally = true; + // If the text appears to be very well spaced, pad the other direction by a + // single pixel in the projection profile space to help join diacritics to + // the textline. + if ((blob->neighbour(BND_ABOVE) == nullptr || + bbox->y_gap(blob->neighbour(BND_ABOVE)->bounding_box()) > pad_limit) && + (blob->neighbour(BND_BELOW) == nullptr || + bbox->y_gap(blob->neighbour(BND_BELOW)->bounding_box()) > pad_limit)) { + ypad = scale_factor_; + } + } else if (blob->UniquelyVertical()) { + ypad = bbox->width() * kOrientedPadFactor; + if ((blob->neighbour(BND_LEFT) == nullptr || + bbox->x_gap(blob->neighbour(BND_LEFT)->bounding_box()) > pad_limit) && + (blob->neighbour(BND_RIGHT) == nullptr || + bbox->x_gap(blob->neighbour(BND_RIGHT)->bounding_box()) > pad_limit)) { + xpad = scale_factor_; + } + } else { + if ((blob->neighbour(BND_ABOVE) != nullptr && + blob->neighbour(BND_ABOVE)->neighbour(BND_BELOW) == blob) || + (blob->neighbour(BND_BELOW) != nullptr && + blob->neighbour(BND_BELOW)->neighbour(BND_ABOVE) == blob)) { + ypad = bbox->width() * kDefaultPadFactor; + } + if ((blob->neighbour(BND_RIGHT) != nullptr && + blob->neighbour(BND_RIGHT)->neighbour(BND_LEFT) == blob) || + (blob->neighbour(BND_LEFT) != nullptr && + blob->neighbour(BND_LEFT)->neighbour(BND_RIGHT) == blob)) { + xpad = bbox->height() * kDefaultPadFactor; + padding_horizontally = true; + } + } + bbox->pad(xpad, ypad); + pad_limit = scale_factor_ * kMaxTabStopOverrun; + // Now shrink horizontally to avoid stepping more than pad_limit over a + // tab-stop. + if (bbox->left() < blob->left_rule() - pad_limit) { + bbox->set_left(blob->left_rule() - pad_limit); + } + if (bbox->right() > blob->right_rule() + pad_limit) { + bbox->set_right(blob->right_rule() + pad_limit); + } + return padding_horizontally; +} + +// Helper denormalizes the TPOINT with the denorm if not nullptr, then +// converts to pix_ coordinates. +void TextlineProjection::TransformToPixCoords(const DENORM* denorm, + TPOINT* pt) const { + if (denorm != nullptr) { + // Denormalize the point. + denorm->DenormTransform(nullptr, *pt, pt); + } + pt->x = ImageXToProjectionX(pt->x); + pt->y = ImageYToProjectionY(pt->y); +} + +#if defined(_MSC_VER) && !defined(__clang__) +#pragma optimize("g", off) +#endif // _MSC_VER +// Helper truncates the TPOINT to be within the pix_. +void TextlineProjection::TruncateToImageBounds(TPOINT* pt) const { + pt->x = ClipToRange<int>(pt->x, 0, pixGetWidth(pix_) - 1); + pt->y = ClipToRange<int>(pt->y, 0, pixGetHeight(pix_) - 1); +} +#if defined(_MSC_VER) && !defined(__clang__) +#pragma optimize("", on) +#endif // _MSC_VER + +// Transform tesseract image coordinates to coordinates used in the projection. +int TextlineProjection::ImageXToProjectionX(int x) const { + x = ClipToRange((x - x_origin_) / scale_factor_, 0, pixGetWidth(pix_) - 1); + return x; +} +int TextlineProjection::ImageYToProjectionY(int y) const { + y = ClipToRange((y_origin_ - y) / scale_factor_, 0, pixGetHeight(pix_) - 1); + return y; +} + +} // namespace tesseract. diff --git a/tesseract/src/textord/textlineprojection.h b/tesseract/src/textord/textlineprojection.h new file mode 100644 index 00000000..c35ae350 --- /dev/null +++ b/tesseract/src/textord/textlineprojection.h @@ -0,0 +1,206 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// Author: rays@google.com (Ray Smith) +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef TESSERACT_TEXTORD_TEXTLINEPROJECTION_H_ +#define TESSERACT_TEXTORD_TEXTLINEPROJECTION_H_ + +#include "blobgrid.h" // For BlobGrid + +struct Pix; + +namespace tesseract { + +class DENORM; +struct TPOINT; +class ColPartition; + +// Simple class to encapsulate the computation of an image representing +// local textline density, and function(s) to make use of it. +// The underlying principle is that if you smear connected components +// horizontally (vertically for components on a vertically written textline) +// and count the number of smeared components in an image, then the resulting +// image shows the density of the textlines at each image position. +class TESS_API TextlineProjection { + public: + // The down-scaling factor is computed to obtain a projection resolution + // of about 100 dpi, whatever the input. + explicit TextlineProjection(int resolution); + ~TextlineProjection(); + + // Build the projection profile given the input_block containing lists of + // blobs, a rotation to convert to image coords, + // and a full-resolution nontext_map, marking out areas to avoid. + // During construction, we have the following assumptions: + // The rotation is a multiple of 90 degrees, ie no deskew yet. + // The blobs have had their left and right rules set to also limit + // the range of projection. + void ConstructProjection(TO_BLOCK* input_block, + const FCOORD& rotation, Pix* nontext_map); + + // Display the blobs in the window colored according to textline quality. + void PlotGradedBlobs(BLOBNBOX_LIST* blobs, ScrollView* win); + + // Moves blobs that look like they don't sit well on a textline from the + // input blobs list to the output small_blobs list. + // This gets them away from initial textline finding to stop diacritics + // from forming incorrect textlines. (Introduced mainly to fix Thai.) + void MoveNonTextlineBlobs(BLOBNBOX_LIST* blobs, + BLOBNBOX_LIST* small_blobs) const; + + // Create a window and display the projection in it. + void DisplayProjection() const; + + // Compute the distance of the box from the partition using curved projection + // space. As DistanceOfBoxFromBox, except that the direction is taken from + // the ColPartition and the median bounds of the ColPartition are used as + // the to_box. + int DistanceOfBoxFromPartition(const TBOX& box, const ColPartition& part, + const DENORM* denorm, bool debug) const; + + // Compute the distance from the from_box to the to_box using curved + // projection space. Separation that involves a decrease in projection + // density (moving from the from_box to the to_box) is weighted more heavily + // than constant density, and an increase is weighted less. + // If horizontal_textline is true, then curved space is used vertically, + // as for a diacritic on the edge of a textline. + // The projection uses original image coords, so denorm is used to get + // back to the image coords from box/part space. + int DistanceOfBoxFromBox(const TBOX& from_box, const TBOX& to_box, + bool horizontal_textline, + const DENORM* denorm, bool debug) const; + + // Compute the distance between (x, y1) and (x, y2) using the rule that + // a decrease in textline density is weighted more heavily than an increase. + // The coordinates are in source image space, ie processed by any denorm + // already, but not yet scaled by scale_factor_. + // Going from the outside of a textline to the inside should measure much + // less distance than going from the inside of a textline to the outside. + int VerticalDistance(bool debug, int x, int y1, int y2) const; + + // Compute the distance between (x1, y) and (x2, y) using the rule that + // a decrease in textline density is weighted more heavily than an increase. + int HorizontalDistance(bool debug, int x1, int x2, int y) const; + + // Returns true if the blob appears to be outside of a horizontal textline. + // Such blobs are potentially diacritics (even if large in Thai) and should + // be kept away from initial textline finding. + bool BoxOutOfHTextline(const TBOX& box, const DENORM* denorm, + bool debug) const; + + // Evaluates the textlineiness of a ColPartition. Uses EvaluateBox below, + // but uses the median top/bottom for horizontal and median left/right for + // vertical instead of the bounding box edges. + // Evaluates for both horizontal and vertical and returns the best result, + // with a positive value for horizontal and a negative value for vertical. + int EvaluateColPartition(const ColPartition& part, const DENORM* denorm, + bool debug) const; + + // Computes the mean projection gradients over the horizontal and vertical + // edges of the box: + // -h-h-h-h-h-h + // |------------| mean=htop -v|+v--------+v|-v + // |+h+h+h+h+h+h| -v|+v +v|-v + // | | -v|+v +v|-v + // | box | -v|+v box +v|-v + // | | -v|+v +v|-v + // |+h+h+h+h+h+h| -v|+v +v|-v + // |------------| mean=hbot -v|+v--------+v|-v + // -h-h-h-h-h-h + // mean=vleft mean=vright + // + // Returns MAX(htop,hbot) - MAX(vleft,vright), which is a positive number + // for a horizontal textline, a negative number for a vertical textline, + // and near zero for undecided. Undecided is most likely non-text. + int EvaluateBox(const TBOX& box, const DENORM* denorm, bool debug) const; + + private: + // Internal version of EvaluateBox returns the unclipped gradients as well + // as the result of EvaluateBox. + // hgrad1 and hgrad2 are the gradients for the horizontal textline. + int EvaluateBoxInternal(const TBOX& box, const DENORM* denorm, bool debug, + int* hgrad1, int* hgrad2, + int* vgrad1, int* vgrad2) const; + + // Helper returns the mean gradient value for the horizontal row at the given + // y, (in the external coordinates) by subtracting the mean of the transformed + // row 2 pixels above from the mean of the transformed row 2 pixels below. + // This gives a positive value for a good top edge and negative for bottom. + // Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge. + int BestMeanGradientInRow(const DENORM* denorm, int16_t min_x, int16_t max_x, + int16_t y, bool best_is_max) const; + + // Helper returns the mean gradient value for the vertical column at the + // given x, (in the external coordinates) by subtracting the mean of the + // transformed column 2 pixels left from the mean of the transformed column + // 2 pixels to the right. + // This gives a positive value for a good left edge and negative for right. + // Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge. + int BestMeanGradientInColumn(const DENORM* denorm, int16_t x, int16_t min_y, + int16_t max_y, bool best_is_max) const; + + // Helper returns the mean pixel value over the line between the start_pt and + // end_pt (inclusive), but shifted perpendicular to the line in the projection + // image by offset pixels. For simplicity, it is assumed that the vector is + // either nearly horizontal or nearly vertical. It works on skewed textlines! + // The end points are in external coordinates, and will be denormalized with + // the denorm if not nullptr before further conversion to pix coordinates. + // After all the conversions, the offset is added to the direction + // perpendicular to the line direction. The offset is thus in projection image + // coordinates, which allows the caller to get a guaranteed displacement + // between pixels used to calculate gradients. + int MeanPixelsInLineSegment(const DENORM* denorm, int offset, + TPOINT start_pt, TPOINT end_pt) const; + + // Helper function to add 1 to a rectangle in source image coords to the + // internal projection pix_. + void IncrementRectangle8Bit(const TBOX& box); + // Inserts a list of blobs into the projection. + // Rotation is a multiple of 90 degrees to get from blob coords to + // nontext_map coords, image_box is the bounds of the nontext_map. + // Blobs are spread horizontally or vertically according to their internal + // flags, but the spreading is truncated by set pixels in the nontext_map + // and also by the horizontal rule line limits on the blobs. + void ProjectBlobs(BLOBNBOX_LIST* blobs, const FCOORD& rotation, + const TBOX& image_box, Pix* nontext_map); + // Pads the bounding box of the given blob according to whether it is on + // a horizontal or vertical text line, taking into account tab-stops near + // the blob. Returns true if padding was in the horizontal direction. + bool PadBlobBox(BLOBNBOX* blob, TBOX* bbox); + + // Helper denormalizes the TPOINT with the denorm if not nullptr, then + // converts to pix_ coordinates. + void TransformToPixCoords(const DENORM* denorm, TPOINT* pt) const; + + // Helper truncates the TPOINT to be within the pix_. + void TruncateToImageBounds(TPOINT* pt) const; + + // Transform tesseract coordinates to coordinates used in the pix. + int ImageXToProjectionX(int x) const; + int ImageYToProjectionY(int y) const; + + // The down-sampling scale factor used in building the image. + int scale_factor_; + // The blob coordinates of the top-left (origin of the pix_) in tesseract + // coordinates. Used to transform the bottom-up tesseract coordinates to + // the top-down coordinates of the pix. + int x_origin_; + int y_origin_; + // The image of horizontally smeared blob boxes summed to provide a + // textline density map. As with a horizontal projection, the map has + // dips in the gaps between textlines. + Pix* pix_; +}; + +} // namespace tesseract. + +#endif // TESSERACT_TEXTORD_TEXTLINEPROJECTION_H_ diff --git a/tesseract/src/textord/textord.cpp b/tesseract/src/textord/textord.cpp new file mode 100644 index 00000000..756ca78a --- /dev/null +++ b/tesseract/src/textord/textord.cpp @@ -0,0 +1,349 @@ +/////////////////////////////////////////////////////////////////////// +// File: textord.cpp +// Description: The top-level text line and word finding functionality. +// Author: Ray Smith +// Created: Fri Mar 13 14:43:01 PDT 2009 +// +// (C) Copyright 2009, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +// Include automatically generated configuration file if running autoconf. +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#include "baselinedetect.h" +#include "drawtord.h" +#include "textord.h" +#include "makerow.h" +#include "pageres.h" +#include "tordmain.h" +#include "wordseg.h" + +namespace tesseract { + +Textord::Textord(CCStruct* ccstruct) + : ccstruct_(ccstruct), + use_cjk_fp_model_(false), + // makerow.cpp /////////////////////////////////////////// + BOOL_MEMBER(textord_single_height_mode, false, + "Script has no xheight, so use a single mode", + ccstruct_->params()), + // tospace.cpp /////////////////////////////////////////// + BOOL_MEMBER(tosp_old_to_method, false, "Space stats use prechopping?", + ccstruct_->params()), + BOOL_MEMBER(tosp_old_to_constrain_sp_kn, false, + "Constrain relative values of inter and intra-word gaps for " + "old_to_method.", + ccstruct_->params()), + BOOL_MEMBER(tosp_only_use_prop_rows, true, + "Block stats to use fixed pitch rows?", ccstruct_->params()), + BOOL_MEMBER(tosp_force_wordbreak_on_punct, false, + "Force word breaks on punct to break long lines in non-space " + "delimited langs", + ccstruct_->params()), + BOOL_MEMBER(tosp_use_pre_chopping, false, "Space stats use prechopping?", + ccstruct_->params()), + BOOL_MEMBER(tosp_old_to_bug_fix, false, "Fix suspected bug in old code", + ccstruct_->params()), + BOOL_MEMBER(tosp_block_use_cert_spaces, true, "Only stat OBVIOUS spaces", + ccstruct_->params()), + BOOL_MEMBER(tosp_row_use_cert_spaces, true, "Only stat OBVIOUS spaces", + ccstruct_->params()), + BOOL_MEMBER(tosp_narrow_blobs_not_cert, true, "Only stat OBVIOUS spaces", + ccstruct_->params()), + BOOL_MEMBER(tosp_row_use_cert_spaces1, true, "Only stat OBVIOUS spaces", + ccstruct_->params()), + BOOL_MEMBER(tosp_recovery_isolated_row_stats, true, + "Use row alone when inadequate cert spaces", + ccstruct_->params()), + BOOL_MEMBER(tosp_only_small_gaps_for_kern, false, "Better guess", + ccstruct_->params()), + BOOL_MEMBER(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?", + ccstruct_->params()), + BOOL_MEMBER(tosp_fuzzy_limit_all, true, + "Don't restrict kn->sp fuzzy limit to tables", + ccstruct_->params()), + BOOL_MEMBER(tosp_stats_use_xht_gaps, true, + "Use within xht gap for wd breaks", ccstruct_->params()), + BOOL_MEMBER(tosp_use_xht_gaps, true, "Use within xht gap for wd breaks", + ccstruct_->params()), + BOOL_MEMBER(tosp_only_use_xht_gaps, false, + "Only use within xht gap for wd breaks", ccstruct_->params()), + BOOL_MEMBER(tosp_rule_9_test_punct, false, + "Don't chng kn to space next to punct", ccstruct_->params()), + BOOL_MEMBER(tosp_flip_fuzz_kn_to_sp, true, "Default flip", + ccstruct_->params()), + BOOL_MEMBER(tosp_flip_fuzz_sp_to_kn, true, "Default flip", + ccstruct_->params()), + BOOL_MEMBER(tosp_improve_thresh, false, "Enable improvement heuristic", + ccstruct_->params()), + INT_MEMBER(tosp_debug_level, 0, "Debug data", ccstruct_->params()), + INT_MEMBER(tosp_enough_space_samples_for_median, 3, + "or should we use mean", ccstruct_->params()), + INT_MEMBER(tosp_redo_kern_limit, 10, + "No.samples reqd to reestimate for row", ccstruct_->params()), + INT_MEMBER(tosp_few_samples, 40, + "No.gaps reqd with 1 large gap to treat as a table", + ccstruct_->params()), + INT_MEMBER(tosp_short_row, 20, + "No.gaps reqd with few cert spaces to use certs", + ccstruct_->params()), + INT_MEMBER(tosp_sanity_method, 1, "How to avoid being silly", + ccstruct_->params()), + double_MEMBER(tosp_old_sp_kn_th_factor, 2.0, + "Factor for defining space threshold in terms of space and " + "kern sizes", + ccstruct_->params()), + double_MEMBER(tosp_threshold_bias1, 0, "how far between kern and space?", + ccstruct_->params()), + double_MEMBER(tosp_threshold_bias2, 0, "how far between kern and space?", + ccstruct_->params()), + double_MEMBER(tosp_narrow_fraction, 0.3, "Fract of xheight for narrow", + ccstruct_->params()), + double_MEMBER(tosp_narrow_aspect_ratio, 0.48, + "narrow if w/h less than this", ccstruct_->params()), + double_MEMBER(tosp_wide_fraction, 0.52, "Fract of xheight for wide", + ccstruct_->params()), + double_MEMBER(tosp_wide_aspect_ratio, 0.0, "wide if w/h less than this", + ccstruct_->params()), + double_MEMBER(tosp_fuzzy_space_factor, 0.6, + "Fract of xheight for fuzz sp", ccstruct_->params()), + double_MEMBER(tosp_fuzzy_space_factor1, 0.5, + "Fract of xheight for fuzz sp", ccstruct_->params()), + double_MEMBER(tosp_fuzzy_space_factor2, 0.72, + "Fract of xheight for fuzz sp", ccstruct_->params()), + double_MEMBER(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern", + ccstruct_->params()), + double_MEMBER(tosp_kern_gap_factor1, 2.0, "gap ratio to flip kern->sp", + ccstruct_->params()), + double_MEMBER(tosp_kern_gap_factor2, 1.3, "gap ratio to flip kern->sp", + ccstruct_->params()), + double_MEMBER(tosp_kern_gap_factor3, 2.5, "gap ratio to flip kern->sp", + ccstruct_->params()), + double_MEMBER(tosp_ignore_big_gaps, -1, "xht multiplier", + ccstruct_->params()), + double_MEMBER(tosp_ignore_very_big_gaps, 3.5, "xht multiplier", + ccstruct_->params()), + double_MEMBER(tosp_rep_space, 1.6, "rep gap multiplier for space", + ccstruct_->params()), + double_MEMBER(tosp_enough_small_gaps, 0.65, + "Fract of kerns reqd for isolated row stats", + ccstruct_->params()), + double_MEMBER(tosp_table_kn_sp_ratio, 2.25, + "Min difference of kn & sp in table", ccstruct_->params()), + double_MEMBER(tosp_table_xht_sp_ratio, 0.33, + "Expect spaces bigger than this", ccstruct_->params()), + double_MEMBER(tosp_table_fuzzy_kn_sp_ratio, 3.0, + "Fuzzy if less than this", ccstruct_->params()), + double_MEMBER(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg", + ccstruct_->params()), + double_MEMBER(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg", + ccstruct_->params()), + double_MEMBER(tosp_min_sane_kn_sp, 1.5, + "Don't trust spaces less than this time kn", + ccstruct_->params()), + double_MEMBER(tosp_init_guess_kn_mult, 2.2, + "Thresh guess - mult kn by this", ccstruct_->params()), + double_MEMBER(tosp_init_guess_xht_mult, 0.28, + "Thresh guess - mult xht by this", ccstruct_->params()), + double_MEMBER(tosp_max_sane_kn_thresh, 5.0, + "Multiplier on kn to limit thresh", ccstruct_->params()), + double_MEMBER(tosp_flip_caution, 0.0, + "Don't autoflip kn to sp when large separation", + ccstruct_->params()), + double_MEMBER(tosp_large_kerning, 0.19, + "Limit use of xht gap with large kns", ccstruct_->params()), + double_MEMBER(tosp_dont_fool_with_small_kerns, -1, + "Limit use of xht gap with odd small kns", + ccstruct_->params()), + double_MEMBER(tosp_near_lh_edge, 0, + "Don't reduce box if the top left is non blank", + ccstruct_->params()), + double_MEMBER(tosp_silly_kn_sp_gap, 0.2, + "Don't let sp minus kn get too small", ccstruct_->params()), + double_MEMBER(tosp_pass_wide_fuzz_sp_to_context, 0.75, + "How wide fuzzies need context", ccstruct_->params()), + // tordmain.cpp /////////////////////////////////////////// + BOOL_MEMBER(textord_no_rejects, false, "Don't remove noise blobs", + ccstruct_->params()), + BOOL_MEMBER(textord_show_blobs, false, "Display unsorted blobs", + ccstruct_->params()), + BOOL_MEMBER(textord_show_boxes, false, "Display unsorted blobs", + ccstruct_->params()), + INT_MEMBER(textord_max_noise_size, 7, "Pixel size of noise", + ccstruct_->params()), + INT_MEMBER(textord_baseline_debug, 0, "Baseline debug level", + ccstruct_->params()), + double_MEMBER(textord_noise_area_ratio, 0.7, + "Fraction of bounding box for noise", ccstruct_->params()), + double_MEMBER(textord_initialx_ile, 0.75, + "Ile of sizes for xheight guess", ccstruct_->params()), + double_MEMBER(textord_initialasc_ile, 0.90, + "Ile of sizes for xheight guess", ccstruct_->params()), + INT_MEMBER(textord_noise_sizefraction, 10, "Fraction of size for maxima", + ccstruct_->params()), + double_MEMBER(textord_noise_sizelimit, 0.5, + "Fraction of x for big t count", ccstruct_->params()), + INT_MEMBER(textord_noise_translimit, 16, "Transitions for normal blob", + ccstruct_->params()), + double_MEMBER(textord_noise_normratio, 2.0, + "Dot to norm ratio for deletion", ccstruct_->params()), + BOOL_MEMBER(textord_noise_rejwords, true, "Reject noise-like words", + ccstruct_->params()), + BOOL_MEMBER(textord_noise_rejrows, true, "Reject noise-like rows", + ccstruct_->params()), + double_MEMBER(textord_noise_syfract, 0.2, + "xh fract height error for norm blobs", + ccstruct_->params()), + double_MEMBER(textord_noise_sxfract, 0.4, + "xh fract width error for norm blobs", ccstruct_->params()), + double_MEMBER(textord_noise_hfract, 1.0 / 64, + "Height fraction to discard outlines as speckle noise", + ccstruct_->params()), + INT_MEMBER(textord_noise_sncount, 1, "super norm blobs to save row", + ccstruct_->params()), + double_MEMBER(textord_noise_rowratio, 6.0, + "Dot to norm ratio for deletion", ccstruct_->params()), + BOOL_MEMBER(textord_noise_debug, false, "Debug row garbage detector", + ccstruct_->params()), + double_MEMBER(textord_blshift_maxshift, 0.00, "Max baseline shift", + ccstruct_->params()), + double_MEMBER(textord_blshift_xfraction, 9.99, + "Min size of baseline shift", ccstruct_->params()) {} + +// Make the textlines and words inside each block. +void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD& reskew, + int width, int height, Pix* binary_pix, + Pix* thresholds_pix, Pix* grey_pix, + bool use_box_bottoms, BLOBNBOX_LIST* diacritic_blobs, + BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks) { + page_tr_.set_x(width); + page_tr_.set_y(height); + if (to_blocks->empty()) { + // AutoPageSeg was not used, so we need to find_components first. + find_components(binary_pix, blocks, to_blocks); + TO_BLOCK_IT it(to_blocks); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + TO_BLOCK* to_block = it.data(); + // Compute the edge offsets whether or not there is a grey_pix. + // We have by-passed auto page seg, so we have to run it here. + // By page segmentation mode there is no non-text to avoid running on. + to_block->ComputeEdgeOffsets(thresholds_pix, grey_pix); + } + } else if (!PSM_SPARSE(pageseg_mode)) { + // AutoPageSeg does not need to find_components as it did that already. + // Filter_blobs sets up the TO_BLOCKs the same as find_components does. + filter_blobs(page_tr_, to_blocks, true); + } + + ASSERT_HOST(!to_blocks->empty()); + if (pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT) { + const FCOORD anticlockwise90(0.0f, 1.0f); + const FCOORD clockwise90(0.0f, -1.0f); + TO_BLOCK_IT it(to_blocks); + for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { + TO_BLOCK* to_block = it.data(); + BLOCK* block = to_block->block; + // Create a fake poly_block in block from its bounding box. + block->pdblk.set_poly_block(new POLY_BLOCK(block->pdblk.bounding_box(), + PT_VERTICAL_TEXT)); + // Rotate the to_block along with its contained block and blobnbox lists. + to_block->rotate(anticlockwise90); + // Set the block's rotation values to obey the convention followed in + // layout analysis for vertical text. + block->set_re_rotation(clockwise90); + block->set_classify_rotation(clockwise90); + } + } + + TO_BLOCK_IT to_block_it(to_blocks); + TO_BLOCK* to_block = to_block_it.data(); + // Make the rows in the block. + float gradient; + // Do it the old fashioned way. + if (PSM_LINE_FIND_ENABLED(pageseg_mode)) { + gradient = make_rows(page_tr_, to_blocks); + } else if (!PSM_SPARSE(pageseg_mode)) { + // RAW_LINE, SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row. + gradient = make_single_row(page_tr_, pageseg_mode != PSM_RAW_LINE, + to_block, to_blocks); + } else { + gradient = 0.0f; + } + BaselineDetect baseline_detector(textord_baseline_debug, + reskew, to_blocks); + baseline_detector.ComputeStraightBaselines(use_box_bottoms); + baseline_detector.ComputeBaselineSplinesAndXheights( + page_tr_, pageseg_mode != PSM_RAW_LINE, textord_heavy_nr, + textord_show_final_rows, this); + // Now make the words in the lines. + if (PSM_WORD_FIND_ENABLED(pageseg_mode)) { + // SINGLE_LINE uses the old word maker on the single line. + make_words(this, page_tr_, gradient, blocks, to_blocks); + } else { + // SINGLE_WORD and SINGLE_CHAR cram all the blobs into a + // single word, and in SINGLE_CHAR mode, all the outlines + // go in a single blob. + TO_BLOCK* to_block = to_block_it.data(); + make_single_word(pageseg_mode == PSM_SINGLE_CHAR, + to_block->get_rows(), to_block->block->row_list()); + } + // Remove empties. + cleanup_blocks(PSM_WORD_FIND_ENABLED(pageseg_mode), blocks); + TransferDiacriticsToBlockGroups(diacritic_blobs, blocks); + // Compute the margins for each row in the block, to be used later for + // paragraph detection. + BLOCK_IT b_it(blocks); + for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { + b_it.data()->compute_row_margins(); + } +#ifndef GRAPHICS_DISABLED + close_to_win(); +#endif +} + +// If we were supposed to return only a single textline, and there is more +// than one, clean up and leave only the best. +void Textord::CleanupSingleRowResult(PageSegMode pageseg_mode, + PAGE_RES* page_res) { + if (PSM_LINE_FIND_ENABLED(pageseg_mode) || PSM_SPARSE(pageseg_mode)) + return; // No cleanup required. + PAGE_RES_IT it(page_res); + // Find the best row, being the greatest mean word conf. + float row_total_conf = 0.0f; + int row_word_count = 0; + ROW_RES* best_row = nullptr; + float best_conf = 0.0f; + for (it.restart_page(); it.word() != nullptr; it.forward()) { + WERD_RES* word = it.word(); + row_total_conf += word->best_choice->certainty(); + ++row_word_count; + if (it.next_row() != it.row()) { + row_total_conf /= row_word_count; + if (best_row == nullptr || best_conf < row_total_conf) { + best_row = it.row(); + best_conf = row_total_conf; + } + row_total_conf = 0.0f; + row_word_count = 0; + } + } + // Now eliminate any word not in the best row. + for (it.restart_page(); it.word() != nullptr; it.forward()) { + if (it.row() != best_row) + it.DeleteCurrentWord(); + } +} + +} // namespace tesseract. diff --git a/tesseract/src/textord/textord.h b/tesseract/src/textord/textord.h new file mode 100644 index 00000000..b2ca7079 --- /dev/null +++ b/tesseract/src/textord/textord.h @@ -0,0 +1,403 @@ +/////////////////////////////////////////////////////////////////////// +// File: textord.h +// Description: The Textord class definition gathers text line and word +// finding functionality. +// Author: Ray Smith +// Created: Fri Mar 13 14:29:01 PDT 2009 +// +// (C) Copyright 2009, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_TEXTORD_TEXTORD_H_ +#define TESSERACT_TEXTORD_TEXTORD_H_ + +#include "ccstruct.h" +#include "bbgrid.h" +#include "blobbox.h" +#include "gap_map.h" + +#include <tesseract/publictypes.h> // For PageSegMode. + +namespace tesseract { + +class FCOORD; +class BLOCK_LIST; +class PAGE_RES; +class TO_BLOCK; +class TO_BLOCK_LIST; +class ScrollView; + +// A simple class that can be used by BBGrid to hold a word and an expanded +// bounding box that makes it easy to find words to put diacritics. +class WordWithBox { + public: + WordWithBox() : word_(nullptr) {} + explicit WordWithBox(WERD *word) + : word_(word), bounding_box_(word->bounding_box()) { + int height = bounding_box_.height(); + bounding_box_.pad(height, height); + } + + const TBOX &bounding_box() const { return bounding_box_; } + // Returns the bounding box of only the good blobs. + TBOX true_bounding_box() const { return word_->true_bounding_box(); } + C_BLOB_LIST *RejBlobs() const { return word_->rej_cblob_list(); } + const WERD *word() const { return word_; } + + private: + // Borrowed pointer to a real word somewhere that must outlive this class. + WERD *word_; + // Cached expanded bounding box of the word, padded all round by its height. + TBOX bounding_box_; +}; + +// Make it usable by BBGrid. +CLISTIZEH(WordWithBox) +using WordGrid = BBGrid<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT>; +using WordSearch = GridSearch<WordWithBox, WordWithBox_CLIST, WordWithBox_C_IT>; + +class Textord { + public: + explicit Textord(CCStruct* ccstruct); + ~Textord() = default; + + // Make the textlines and words inside each block. + // binary_pix is mandatory and is the binarized input after line removal. + // grey_pix is optional, but if present must match the binary_pix in size, + // and must be a *real* grey image instead of binary_pix * 255. + // thresholds_pix is expected to be present iff grey_pix is present and + // can be an integer factor reduction of the grey_pix. It represents the + // thresholds that were used to create the binary_pix from the grey_pix. + // diacritic_blobs contain small confusing components that should be added + // to the appropriate word(s) in case they are really diacritics. + void TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, + int height, Pix *binary_pix, Pix *thresholds_pix, + Pix *grey_pix, bool use_box_bottoms, + BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, + TO_BLOCK_LIST *to_blocks); + + // If we were supposed to return only a single textline, and there is more + // than one, clean up and leave only the best. + void CleanupSingleRowResult(PageSegMode pageseg_mode, PAGE_RES* page_res); + + bool use_cjk_fp_model() const { + return use_cjk_fp_model_; + } + void set_use_cjk_fp_model(bool flag) { + use_cjk_fp_model_ = flag; + } + + // tospace.cpp /////////////////////////////////////////// + void to_spacing( + ICOORD page_tr, //topright of page + TO_BLOCK_LIST *blocks //blocks on page + ); + ROW *make_prop_words(TO_ROW *row, // row to make + FCOORD rotation // for drawing + ); + ROW *make_blob_words(TO_ROW *row, // row to make + FCOORD rotation // for drawing + ); + // tordmain.cpp /////////////////////////////////////////// + void find_components(Pix* pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks); + void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST* blocks, bool testing_on); + + private: + // For underlying memory management and other utilities. + CCStruct* ccstruct_; + + // The size of the input image. + ICOORD page_tr_; + + bool use_cjk_fp_model_; + + // makerow.cpp /////////////////////////////////////////// + // Make the textlines inside each block. + void MakeRows(PageSegMode pageseg_mode, const FCOORD& skew, + int width, int height, TO_BLOCK_LIST* to_blocks); + // Make the textlines inside a single block. + void MakeBlockRows(int min_spacing, int max_spacing, + const FCOORD& skew, TO_BLOCK* block, + ScrollView* win); + + public: + void compute_block_xheight(TO_BLOCK *block, float gradient); + void compute_row_xheight(TO_ROW *row, // row to do + const FCOORD& rotation, + float gradient, // global skew + int block_line_size); + void make_spline_rows(TO_BLOCK* block, // block to do + float gradient, // gradient to fit + bool testing_on); + private: + //// oldbasel.cpp //////////////////////////////////////// + void make_old_baselines(TO_BLOCK* block, // block to do + bool testing_on, // correct orientation + float gradient); + void correlate_lines(TO_BLOCK *block, float gradient); + void correlate_neighbours(TO_BLOCK *block, // block rows are in. + TO_ROW **rows, // rows of block. + int rowcount); // no of rows to do. + int correlate_with_stats(TO_ROW **rows, // rows of block. + int rowcount, // no of rows to do. + TO_BLOCK* block); + void find_textlines(TO_BLOCK *block, // block row is in + TO_ROW *row, // row to do + int degree, // required approximation + QSPLINE *spline); // starting spline + // tospace.cpp /////////////////////////////////////////// + //DEBUG USE ONLY + void block_spacing_stats(TO_BLOCK* block, + GAPMAP* gapmap, + bool& old_text_ord_proportional, + //resulting estimate + int16_t& block_space_gap_width, + //resulting estimate + int16_t& block_non_space_gap_width + ); + void row_spacing_stats(TO_ROW *row, + GAPMAP *gapmap, + int16_t block_idx, + int16_t row_idx, + //estimate for block + int16_t block_space_gap_width, + //estimate for block + int16_t block_non_space_gap_width + ); + void old_to_method(TO_ROW *row, + STATS *all_gap_stats, + STATS *space_gap_stats, + STATS *small_gap_stats, + int16_t block_space_gap_width, + //estimate for block + int16_t block_non_space_gap_width + ); + bool isolated_row_stats(TO_ROW* row, + GAPMAP* gapmap, + STATS* all_gap_stats, + bool suspected_table, + int16_t block_idx, + int16_t row_idx); + int16_t stats_count_under(STATS *stats, int16_t threshold); + void improve_row_threshold(TO_ROW *row, STATS *all_gap_stats); + bool make_a_word_break(TO_ROW* row, // row being made + TBOX blob_box, // for next_blob // how many blanks? + int16_t prev_gap, + TBOX prev_blob_box, + int16_t real_current_gap, + int16_t within_xht_current_gap, + TBOX next_blob_box, + int16_t next_gap, + uint8_t& blanks, + bool& fuzzy_sp, + bool& fuzzy_non, + bool& prev_gap_was_a_space, + bool& break_at_next_gap); + bool narrow_blob(TO_ROW* row, TBOX blob_box); + bool wide_blob(TO_ROW* row, TBOX blob_box); + bool suspected_punct_blob(TO_ROW* row, TBOX box); + void peek_at_next_gap(TO_ROW *row, + BLOBNBOX_IT box_it, + TBOX &next_blob_box, + int16_t &next_gap, + int16_t &next_within_xht_gap); + void mark_gap(TBOX blob, //blob following gap + int16_t rule, // heuristic id + int16_t prev_gap, + int16_t prev_blob_width, + int16_t current_gap, + int16_t next_blob_width, + int16_t next_gap); + float find_mean_blob_spacing(WERD *word); + bool ignore_big_gap(TO_ROW* row, + int32_t row_length, + GAPMAP* gapmap, + int16_t left, + int16_t right); + //get bounding box + TBOX reduced_box_next(TO_ROW *row, //current row + BLOBNBOX_IT *it //iterator to blobds + ); + TBOX reduced_box_for_blob(BLOBNBOX *blob, TO_ROW *row, int16_t *left_above_xht); + // tordmain.cpp /////////////////////////////////////////// + float filter_noise_blobs(BLOBNBOX_LIST *src_list, + BLOBNBOX_LIST *noise_list, + BLOBNBOX_LIST *small_list, + BLOBNBOX_LIST *large_list); + // Fixes the block so it obeys all the rules: + // Must have at least one ROW. + // Must have at least one WERD. + // WERDs contain a fake blob. + void cleanup_nontext_block(BLOCK* block); + void cleanup_blocks(bool clean_noise, BLOCK_LIST *blocks); + bool clean_noise_from_row(ROW* row); + void clean_noise_from_words(ROW *row); + // Remove outlines that are a tiny fraction in either width or height + // of the word height. + void clean_small_noise_from_words(ROW *row); + // Groups blocks by rotation, then, for each group, makes a WordGrid and calls + // TransferDiacriticsToWords to copy the diacritic blobs to the most + // appropriate words in the group of blocks. Source blobs are not touched. + void TransferDiacriticsToBlockGroups(BLOBNBOX_LIST* diacritic_blobs, + BLOCK_LIST* blocks); + // Places a copy of blobs that are near a word (after applying rotation to the + // blob) in the most appropriate word, unless there is doubt, in which case a + // blob can end up in two words. Source blobs are not touched. + void TransferDiacriticsToWords(BLOBNBOX_LIST *diacritic_blobs, + const FCOORD &rotation, WordGrid *word_grid); + + public: + // makerow.cpp /////////////////////////////////////////// + BOOL_VAR_H(textord_single_height_mode, false, + "Script has no xheight, so use a single mode for horizontal text"); + // tospace.cpp /////////////////////////////////////////// + BOOL_VAR_H(tosp_old_to_method, false, "Space stats use prechopping?"); + BOOL_VAR_H(tosp_old_to_constrain_sp_kn, false, + "Constrain relative values of inter and intra-word gaps for " + "old_to_method."); + BOOL_VAR_H(tosp_only_use_prop_rows, true, + "Block stats to use fixed pitch rows?"); + BOOL_VAR_H(tosp_force_wordbreak_on_punct, false, + "Force word breaks on punct to break long lines in non-space " + "delimited langs"); + BOOL_VAR_H(tosp_use_pre_chopping, false, + "Space stats use prechopping?"); + BOOL_VAR_H(tosp_old_to_bug_fix, false, + "Fix suspected bug in old code"); + BOOL_VAR_H(tosp_block_use_cert_spaces, true, + "Only stat OBVIOUS spaces"); + BOOL_VAR_H(tosp_row_use_cert_spaces, true, + "Only stat OBVIOUS spaces"); + BOOL_VAR_H(tosp_narrow_blobs_not_cert, true, + "Only stat OBVIOUS spaces"); + BOOL_VAR_H(tosp_row_use_cert_spaces1, true, + "Only stat OBVIOUS spaces"); + BOOL_VAR_H(tosp_recovery_isolated_row_stats, true, + "Use row alone when inadequate cert spaces"); + BOOL_VAR_H(tosp_only_small_gaps_for_kern, false, "Better guess"); + BOOL_VAR_H(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?"); + BOOL_VAR_H(tosp_fuzzy_limit_all, true, + "Don't restrict kn->sp fuzzy limit to tables"); + BOOL_VAR_H(tosp_stats_use_xht_gaps, true, + "Use within xht gap for wd breaks"); + BOOL_VAR_H(tosp_use_xht_gaps, true, + "Use within xht gap for wd breaks"); + BOOL_VAR_H(tosp_only_use_xht_gaps, false, + "Only use within xht gap for wd breaks"); + BOOL_VAR_H(tosp_rule_9_test_punct, false, + "Don't chng kn to space next to punct"); + BOOL_VAR_H(tosp_flip_fuzz_kn_to_sp, true, "Default flip"); + BOOL_VAR_H(tosp_flip_fuzz_sp_to_kn, true, "Default flip"); + BOOL_VAR_H(tosp_improve_thresh, false, + "Enable improvement heuristic"); + INT_VAR_H(tosp_debug_level, 0, "Debug data"); + INT_VAR_H(tosp_enough_space_samples_for_median, 3, + "or should we use mean"); + INT_VAR_H(tosp_redo_kern_limit, 10, + "No.samples reqd to reestimate for row"); + INT_VAR_H(tosp_few_samples, 40, + "No.gaps reqd with 1 large gap to treat as a table"); + INT_VAR_H(tosp_short_row, 20, + "No.gaps reqd with few cert spaces to use certs"); + INT_VAR_H(tosp_sanity_method, 1, "How to avoid being silly"); + double_VAR_H(tosp_old_sp_kn_th_factor, 2.0, + "Factor for defining space threshold in terms of space and " + "kern sizes"); + double_VAR_H(tosp_threshold_bias1, 0, + "how far between kern and space?"); + double_VAR_H(tosp_threshold_bias2, 0, + "how far between kern and space?"); + double_VAR_H(tosp_narrow_fraction, 0.3, + "Fract of xheight for narrow"); + double_VAR_H(tosp_narrow_aspect_ratio, 0.48, + "narrow if w/h less than this"); + double_VAR_H(tosp_wide_fraction, 0.52, "Fract of xheight for wide"); + double_VAR_H(tosp_wide_aspect_ratio, 0.0, + "wide if w/h less than this"); + double_VAR_H(tosp_fuzzy_space_factor, 0.6, + "Fract of xheight for fuzz sp"); + double_VAR_H(tosp_fuzzy_space_factor1, 0.5, + "Fract of xheight for fuzz sp"); + double_VAR_H(tosp_fuzzy_space_factor2, 0.72, + "Fract of xheight for fuzz sp"); + double_VAR_H(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern"); + double_VAR_H(tosp_kern_gap_factor1, 2.0, + "gap ratio to flip kern->sp"); + double_VAR_H(tosp_kern_gap_factor2, 1.3, + "gap ratio to flip kern->sp"); + double_VAR_H(tosp_kern_gap_factor3, 2.5, + "gap ratio to flip kern->sp"); + double_VAR_H(tosp_ignore_big_gaps, -1, "xht multiplier"); + double_VAR_H(tosp_ignore_very_big_gaps, 3.5, "xht multiplier"); + double_VAR_H(tosp_rep_space, 1.6, "rep gap multiplier for space"); + double_VAR_H(tosp_enough_small_gaps, 0.65, + "Fract of kerns reqd for isolated row stats"); + double_VAR_H(tosp_table_kn_sp_ratio, 2.25, + "Min difference of kn & sp in table"); + double_VAR_H(tosp_table_xht_sp_ratio, 0.33, + "Expect spaces bigger than this"); + double_VAR_H(tosp_table_fuzzy_kn_sp_ratio, 3.0, + "Fuzzy if less than this"); + double_VAR_H(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg"); + double_VAR_H(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg"); + double_VAR_H(tosp_min_sane_kn_sp, 1.5, + "Don't trust spaces less than this time kn"); + double_VAR_H(tosp_init_guess_kn_mult, 2.2, + "Thresh guess - mult kn by this"); + double_VAR_H(tosp_init_guess_xht_mult, 0.28, + "Thresh guess - mult xht by this"); + double_VAR_H(tosp_max_sane_kn_thresh, 5.0, + "Multiplier on kn to limit thresh"); + double_VAR_H(tosp_flip_caution, 0.0, + "Don't autoflip kn to sp when large separation"); + double_VAR_H(tosp_large_kerning, 0.19, + "Limit use of xht gap with large kns"); + double_VAR_H(tosp_dont_fool_with_small_kerns, -1, + "Limit use of xht gap with odd small kns"); + double_VAR_H(tosp_near_lh_edge, 0, + "Don't reduce box if the top left is non blank"); + double_VAR_H(tosp_silly_kn_sp_gap, 0.2, + "Don't let sp minus kn get too small"); + double_VAR_H(tosp_pass_wide_fuzz_sp_to_context, 0.75, + "How wide fuzzies need context"); + // tordmain.cpp /////////////////////////////////////////// + BOOL_VAR_H(textord_no_rejects, false, "Don't remove noise blobs"); + BOOL_VAR_H(textord_show_blobs, false, "Display unsorted blobs"); + BOOL_VAR_H(textord_show_boxes, false, "Display boxes"); + INT_VAR_H(textord_max_noise_size, 7, "Pixel size of noise"); + INT_VAR_H(textord_baseline_debug, 0, "Baseline debug level"); + double_VAR_H(textord_noise_area_ratio, 0.7, + "Fraction of bounding box for noise"); + double_VAR_H(textord_initialx_ile, 0.75, "Ile of sizes for xheight guess"); + double_VAR_H(textord_initialasc_ile, 0.90, "Ile of sizes for xheight guess"); + INT_VAR_H(textord_noise_sizefraction, 10, "Fraction of size for maxima"); + double_VAR_H(textord_noise_sizelimit, 0.5, "Fraction of x for big t count"); + INT_VAR_H(textord_noise_translimit, 16, "Transitions for normal blob"); + double_VAR_H(textord_noise_normratio, 2.0, "Dot to norm ratio for deletion"); + BOOL_VAR_H(textord_noise_rejwords, true, "Reject noise-like words"); + BOOL_VAR_H(textord_noise_rejrows, true, "Reject noise-like rows"); + double_VAR_H(textord_noise_syfract, 0.2, "xh fract error for norm blobs"); + double_VAR_H(textord_noise_sxfract, 0.4, + "xh fract width error for norm blobs"); + double_VAR_H(textord_noise_hfract, 1.0/64, + "Height fraction to discard outlines as speckle noise"); + INT_VAR_H(textord_noise_sncount, 1, "super norm blobs to save row"); + double_VAR_H(textord_noise_rowratio, 6.0, "Dot to norm ratio for deletion"); + BOOL_VAR_H(textord_noise_debug, false, "Debug row garbage detector"); + double_VAR_H(textord_blshift_maxshift, 0.00, "Max baseline shift"); + double_VAR_H(textord_blshift_xfraction, 9.99, "Min size of baseline shift"); +}; + +} // namespace tesseract + +#endif // TESSERACT_TEXTORD_TEXTORD_H_ diff --git a/tesseract/src/textord/topitch.cpp b/tesseract/src/textord/topitch.cpp new file mode 100644 index 00000000..655f75bd --- /dev/null +++ b/tesseract/src/textord/topitch.cpp @@ -0,0 +1,1847 @@ +/********************************************************************** + * File: topitch.cpp (Formerly to_pitch.c) + * Description: Code to determine fixed pitchness and the pitch if fixed. + * Author: Ray Smith + * + * (C) Copyright 1993, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + + // Include automatically generated configuration file if running autoconf. +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#include "topitch.h" + +#include "blobbox.h" +#include "statistc.h" +#include "drawtord.h" +#include "makerow.h" +#include "pitsync1.h" +#include "pithsync.h" +#include "tovars.h" +#include "wordseg.h" + +#include "helpers.h" + +#include <memory> + +namespace tesseract { + +static BOOL_VAR (textord_all_prop, false, "All doc is proportial text"); +BOOL_VAR (textord_debug_pitch_test, false, +"Debug on fixed pitch test"); +static BOOL_VAR (textord_disable_pitch_test, false, +"Turn off dp fixed pitch algorithm"); +BOOL_VAR (textord_fast_pitch_test, false, +"Do even faster pitch algorithm"); +BOOL_VAR (textord_debug_pitch_metric, false, +"Write full metric stuff"); +BOOL_VAR (textord_show_row_cuts, false, "Draw row-level cuts"); +BOOL_VAR (textord_show_page_cuts, false, "Draw page-level cuts"); +BOOL_VAR (textord_pitch_cheat, false, +"Use correct answer for fixed/prop"); +BOOL_VAR (textord_blockndoc_fixed, false, +"Attempt whole doc/block fixed pitch"); +double_VAR (textord_projection_scale, 0.200, "Ding rate for mid-cuts"); +double_VAR (textord_balance_factor, 1.0, +"Ding rate for unbalanced char cells"); + +#define BLOCK_STATS_CLUSTERS 10 +#define MAX_ALLOWED_PITCH 100 //max pixel pitch. + +// qsort function to sort 2 floats. +static int sort_floats(const void *arg1, const void *arg2) { + float diff = *reinterpret_cast<const float*>(arg1) - + *reinterpret_cast<const float*>(arg2); + if (diff > 0) { + return 1; + } else if (diff < 0) { + return -1; + } else { + return 0; + } +} + +/********************************************************************** + * compute_fixed_pitch + * + * Decide whether each row is fixed pitch individually. + * Correlate definite and uncertain results to obtain an individual + * result for each row in the TO_ROW class. + **********************************************************************/ + +void compute_fixed_pitch(ICOORD page_tr, // top right + TO_BLOCK_LIST* port_blocks, // input list + float gradient, // page skew + FCOORD rotation, // for drawing + bool testing_on) { // correct orientation + TO_BLOCK_IT block_it; //iterator + TO_BLOCK *block; //current block; + TO_ROW *row; //current row + int block_index; //block number + int row_index; //row number + +#ifndef GRAPHICS_DISABLED + if (textord_show_initial_words && testing_on) { + if (to_win == nullptr) + create_to_win(page_tr); + } +#endif + + block_it.set_to_list (port_blocks); + block_index = 1; + for (block_it.mark_cycle_pt (); !block_it.cycled_list (); + block_it.forward ()) { + block = block_it.data (); + compute_block_pitch(block, rotation, block_index, testing_on); + block_index++; + } + + if (!try_doc_fixed (page_tr, port_blocks, gradient)) { + block_index = 1; + for (block_it.mark_cycle_pt (); !block_it.cycled_list (); + block_it.forward ()) { + block = block_it.data (); + if (!try_block_fixed (block, block_index)) + try_rows_fixed(block, block_index, testing_on); + block_index++; + } + } + + block_index = 1; + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); + block_it.forward()) { + block = block_it.data (); + POLY_BLOCK* pb = block->block->pdblk.poly_block(); + if (pb != nullptr && !pb->IsText()) continue; // Non-text doesn't exist! + // row iterator + TO_ROW_IT row_it(block->get_rows()); + row_index = 1; + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { + row = row_it.data (); + fix_row_pitch(row, block, port_blocks, row_index, block_index); + row_index++; + } + block_index++; + } +#ifndef GRAPHICS_DISABLED + if (textord_show_initial_words && testing_on) { + ScrollView::Update(); + } +#endif +} + + +/********************************************************************** + * fix_row_pitch + * + * Get a pitch_decision for this row by voting among similar rows in the + * block, then similar rows over all the page, or any other rows at all. + **********************************************************************/ + +void fix_row_pitch(TO_ROW *bad_row, // row to fix + TO_BLOCK *bad_block, // block of bad_row + TO_BLOCK_LIST *blocks, // blocks to scan + int32_t row_target, // number of row + int32_t block_target) { // number of block + int16_t mid_cuts; + int block_votes; //votes in block + int like_votes; //votes over page + int other_votes; //votes of unlike blocks + int block_index; //number of block + int row_index; //number of row + int maxwidth; //max pitch + TO_BLOCK_IT block_it = blocks; //block iterator + TO_BLOCK *block; //current block + TO_ROW *row; //current row + float sp_sd; //space deviation + STATS block_stats; //pitches in block + STATS like_stats; //pitches in page + + block_votes = like_votes = other_votes = 0; + maxwidth = static_cast<int32_t>(ceil (bad_row->xheight * textord_words_maxspace)); + if (bad_row->pitch_decision != PITCH_DEF_FIXED + && bad_row->pitch_decision != PITCH_DEF_PROP) { + block_stats.set_range (0, maxwidth); + like_stats.set_range (0, maxwidth); + block_index = 1; + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); + block_it.forward()) { + block = block_it.data(); + POLY_BLOCK* pb = block->block->pdblk.poly_block(); + if (pb != nullptr && !pb->IsText()) continue; // Non text doesn't exist! + row_index = 1; + TO_ROW_IT row_it(block->get_rows()); + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); + row_it.forward ()) { + row = row_it.data (); + if ((bad_row->all_caps + && row->xheight + row->ascrise + < + (bad_row->xheight + bad_row->ascrise) * (1 + + textord_pitch_rowsimilarity) + && row->xheight + row->ascrise > + (bad_row->xheight + bad_row->ascrise) * (1 - + textord_pitch_rowsimilarity)) + || (!bad_row->all_caps + && row->xheight < + bad_row->xheight * (1 + textord_pitch_rowsimilarity) + && row->xheight > + bad_row->xheight * (1 - textord_pitch_rowsimilarity))) { + if (block_index == block_target) { + if (row->pitch_decision == PITCH_DEF_FIXED) { + block_votes += textord_words_veto_power; + block_stats.add (static_cast<int32_t>(row->fixed_pitch), + textord_words_veto_power); + } + else if (row->pitch_decision == PITCH_MAYBE_FIXED + || row->pitch_decision == PITCH_CORR_FIXED) { + block_votes++; + block_stats.add (static_cast<int32_t>(row->fixed_pitch), 1); + } + else if (row->pitch_decision == PITCH_DEF_PROP) + block_votes -= textord_words_veto_power; + else if (row->pitch_decision == PITCH_MAYBE_PROP + || row->pitch_decision == PITCH_CORR_PROP) + block_votes--; + } + else { + if (row->pitch_decision == PITCH_DEF_FIXED) { + like_votes += textord_words_veto_power; + like_stats.add (static_cast<int32_t>(row->fixed_pitch), + textord_words_veto_power); + } + else if (row->pitch_decision == PITCH_MAYBE_FIXED + || row->pitch_decision == PITCH_CORR_FIXED) { + like_votes++; + like_stats.add (static_cast<int32_t>(row->fixed_pitch), 1); + } + else if (row->pitch_decision == PITCH_DEF_PROP) + like_votes -= textord_words_veto_power; + else if (row->pitch_decision == PITCH_MAYBE_PROP + || row->pitch_decision == PITCH_CORR_PROP) + like_votes--; + } + } + else { + if (row->pitch_decision == PITCH_DEF_FIXED) + other_votes += textord_words_veto_power; + else if (row->pitch_decision == PITCH_MAYBE_FIXED + || row->pitch_decision == PITCH_CORR_FIXED) + other_votes++; + else if (row->pitch_decision == PITCH_DEF_PROP) + other_votes -= textord_words_veto_power; + else if (row->pitch_decision == PITCH_MAYBE_PROP + || row->pitch_decision == PITCH_CORR_PROP) + other_votes--; + } + row_index++; + } + block_index++; + } + if (block_votes > textord_words_veto_power) { + bad_row->fixed_pitch = block_stats.ile (0.5); + bad_row->pitch_decision = PITCH_CORR_FIXED; + } + else if (block_votes <= textord_words_veto_power && like_votes > 0) { + bad_row->fixed_pitch = like_stats.ile (0.5); + bad_row->pitch_decision = PITCH_CORR_FIXED; + } + else { + bad_row->pitch_decision = PITCH_CORR_PROP; + if (block_votes == 0 && like_votes == 0 && other_votes > 0 + && (textord_debug_pitch_test || textord_debug_pitch_metric)) + tprintf + ("Warning:row %d of block %d set prop with no like rows against trend\n", + row_target, block_target); + } + } + if (textord_debug_pitch_metric) { + tprintf(":b_votes=%d:l_votes=%d:o_votes=%d", + block_votes, like_votes, other_votes); + tprintf("x=%g:asc=%g\n", bad_row->xheight, bad_row->ascrise); + } + if (bad_row->pitch_decision == PITCH_CORR_FIXED) { + if (bad_row->fixed_pitch < textord_min_xheight) { + if (block_votes > 0) + bad_row->fixed_pitch = block_stats.ile (0.5); + else if (block_votes == 0 && like_votes > 0) + bad_row->fixed_pitch = like_stats.ile (0.5); + else { + tprintf + ("Warning:guessing pitch as xheight on row %d, block %d\n", + row_target, block_target); + bad_row->fixed_pitch = bad_row->xheight; + } + } + if (bad_row->fixed_pitch < textord_min_xheight) + bad_row->fixed_pitch = (float) textord_min_xheight; + bad_row->kern_size = bad_row->fixed_pitch / 4; + bad_row->min_space = static_cast<int32_t>(bad_row->fixed_pitch * 0.6); + bad_row->max_nonspace = static_cast<int32_t>(bad_row->fixed_pitch * 0.4); + bad_row->space_threshold = + (bad_row->min_space + bad_row->max_nonspace) / 2; + bad_row->space_size = bad_row->fixed_pitch; + if (bad_row->char_cells.empty() && !bad_row->blob_list()->empty()) { + tune_row_pitch (bad_row, &bad_row->projection, + bad_row->projection_left, bad_row->projection_right, + (bad_row->fixed_pitch + + bad_row->max_nonspace * 3) / 4, bad_row->fixed_pitch, + sp_sd, mid_cuts, &bad_row->char_cells, false); + } + } + else if (bad_row->pitch_decision == PITCH_CORR_PROP + || bad_row->pitch_decision == PITCH_DEF_PROP) { + bad_row->fixed_pitch = 0.0f; + bad_row->char_cells.clear (); + } +} + + +/********************************************************************** + * compute_block_pitch + * + * Decide whether each block is fixed pitch individually. + **********************************************************************/ + +void compute_block_pitch(TO_BLOCK* block, // input list + FCOORD rotation, // for drawing + int32_t block_index, // block number + bool testing_on) { // correct orientation + TBOX block_box; //bounding box + + block_box = block->block->pdblk.bounding_box (); + if (testing_on && textord_debug_pitch_test) { + tprintf ("Block %d at (%d,%d)->(%d,%d)\n", + block_index, + block_box.left (), block_box.bottom (), + block_box.right (), block_box.top ()); + } + block->min_space = static_cast<int32_t>(floor (block->xheight + * textord_words_default_minspace)); + block->max_nonspace = static_cast<int32_t>(ceil (block->xheight + * textord_words_default_nonspace)); + block->fixed_pitch = 0.0f; + block->space_size = static_cast<float>(block->min_space); + block->kern_size = static_cast<float>(block->max_nonspace); + block->pr_nonsp = block->xheight * words_default_prop_nonspace; + block->pr_space = block->pr_nonsp * textord_spacesize_ratioprop; + if (!block->get_rows ()->empty ()) { + ASSERT_HOST (block->xheight > 0); + find_repeated_chars(block, textord_show_initial_words && testing_on); +#ifndef GRAPHICS_DISABLED + if (textord_show_initial_words && testing_on) + //overlap_picture_ops(true); + ScrollView::Update(); +#endif + compute_rows_pitch(block, + block_index, + textord_debug_pitch_test && testing_on); + } +} + + +/********************************************************************** + * compute_rows_pitch + * + * Decide whether each row is fixed pitch individually. + **********************************************************************/ + +bool compute_rows_pitch( //find line stats + TO_BLOCK* block, //block to do + int32_t block_index, //block number + bool testing_on //correct orientation +) { + int32_t maxwidth; //of spaces + TO_ROW *row; //current row + int32_t row_index; //row number. + float lower, upper; //cluster thresholds + TO_ROW_IT row_it = block->get_rows (); + + row_index = 1; + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { + row = row_it.data (); + ASSERT_HOST (row->xheight > 0); + row->compute_vertical_projection (); + maxwidth = static_cast<int32_t>(ceil (row->xheight * textord_words_maxspace)); + if (row_pitch_stats (row, maxwidth, testing_on) + && find_row_pitch (row, maxwidth, + textord_dotmatrix_gap + 1, block, block_index, + row_index, testing_on)) { + if (row->fixed_pitch == 0) { + lower = row->pr_nonsp; + upper = row->pr_space; + row->space_size = upper; + row->kern_size = lower; + } + } + else { + row->fixed_pitch = 0.0f; //insufficient data + row->pitch_decision = PITCH_DUNNO; + } + row_index++; + } + return false; +} + + +/********************************************************************** + * try_doc_fixed + * + * Attempt to call the entire document fixed pitch. + **********************************************************************/ + +bool try_doc_fixed( //determine pitch + ICOORD page_tr, //top right + TO_BLOCK_LIST* port_blocks, //input list + float gradient //page skew +) { + int16_t master_x; //uniform shifts + int16_t pitch; //median pitch. + int x; //profile coord + int prop_blocks; //correct counts + int fixed_blocks; + int total_row_count; //total in page + //iterator + TO_BLOCK_IT block_it = port_blocks; + TO_BLOCK *block; //current block; + TO_ROW *row; //current row + int16_t projection_left; //edges + int16_t projection_right; + int16_t row_left; //edges of row + int16_t row_right; + float master_y; //uniform shifts + float shift_factor; //page skew correction + float final_pitch; //output pitch + float row_y; //baseline + STATS projection; //entire page + STATS pitches (0, MAX_ALLOWED_PITCH); + //for median + float sp_sd; //space sd + int16_t mid_cuts; //no of cheap cuts + float pitch_sd; //sync rating + + if (block_it.empty () + // || block_it.data()==block_it.data_relative(1) + || !textord_blockndoc_fixed) + return false; + shift_factor = gradient / (gradient * gradient + 1); + // row iterator + TO_ROW_IT row_it(block_it.data ()->get_rows()); + master_x = row_it.data ()->projection_left; + master_y = row_it.data ()->baseline.y (master_x); + projection_left = INT16_MAX; + projection_right = -INT16_MAX; + prop_blocks = 0; + fixed_blocks = 0; + total_row_count = 0; + + for (block_it.mark_cycle_pt (); !block_it.cycled_list (); + block_it.forward ()) { + block = block_it.data (); + row_it.set_to_list (block->get_rows ()); + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { + row = row_it.data (); + total_row_count++; + if (row->fixed_pitch > 0) + pitches.add (static_cast<int32_t>(row->fixed_pitch), 1); + //find median + row_y = row->baseline.y (master_x); + row_left = + static_cast<int16_t>(row->projection_left - + shift_factor * (master_y - row_y)); + row_right = + static_cast<int16_t>(row->projection_right - + shift_factor * (master_y - row_y)); + if (row_left < projection_left) + projection_left = row_left; + if (row_right > projection_right) + projection_right = row_right; + } + } + if (pitches.get_total () == 0) + return false; + projection.set_range (projection_left, projection_right); + + for (block_it.mark_cycle_pt (); !block_it.cycled_list (); + block_it.forward ()) { + block = block_it.data (); + row_it.set_to_list (block->get_rows ()); + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { + row = row_it.data (); + row_y = row->baseline.y (master_x); + row_left = + static_cast<int16_t>(row->projection_left - + shift_factor * (master_y - row_y)); + for (x = row->projection_left; x < row->projection_right; + x++, row_left++) { + projection.add (row_left, row->projection.pile_count (x)); + } + } + } + + row_it.set_to_list (block_it.data ()->get_rows ()); + row = row_it.data (); +#ifndef GRAPHICS_DISABLED + if (textord_show_page_cuts && to_win != nullptr) + projection.plot (to_win, projection_left, + row->intercept (), 1.0f, -1.0f, ScrollView::CORAL); +#endif + final_pitch = pitches.ile (0.5); + pitch = static_cast<int16_t>(final_pitch); + pitch_sd = + tune_row_pitch (row, &projection, projection_left, projection_right, + pitch * 0.75, final_pitch, sp_sd, mid_cuts, + &row->char_cells, false); + + if (textord_debug_pitch_metric) + tprintf + ("try_doc:props=%d:fixed=%d:pitch=%d:final_pitch=%g:pitch_sd=%g:sp_sd=%g:sd/trc=%g:sd/p=%g:sd/trc/p=%g\n", + prop_blocks, fixed_blocks, pitch, final_pitch, pitch_sd, sp_sd, + pitch_sd / total_row_count, pitch_sd / pitch, + pitch_sd / total_row_count / pitch); + +#ifndef GRAPHICS_DISABLED + if (textord_show_page_cuts && to_win != nullptr) { + float row_shift; //shift for row + ICOORDELT_LIST *master_cells; //cells for page + master_cells = &row->char_cells; + for (block_it.mark_cycle_pt (); !block_it.cycled_list (); + block_it.forward ()) { + block = block_it.data (); + row_it.set_to_list (block->get_rows ()); + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); + row_it.forward ()) { + row = row_it.data (); + row_y = row->baseline.y (master_x); + row_shift = shift_factor * (master_y - row_y); + plot_row_cells(to_win, ScrollView::GOLDENROD, row, row_shift, master_cells); + } + } + } +#endif + row->char_cells.clear (); + return false; +} + + +/********************************************************************** + * try_block_fixed + * + * Try to call the entire block fixed. + **********************************************************************/ + +bool try_block_fixed( //find line stats + TO_BLOCK* block, //block to do + int32_t block_index //block number +) { + return false; +} + + +/********************************************************************** + * try_rows_fixed + * + * Decide whether each row is fixed pitch individually. + **********************************************************************/ + +bool try_rows_fixed( //find line stats + TO_BLOCK* block, //block to do + int32_t block_index, //block number + bool testing_on //correct orientation +) { + TO_ROW *row; //current row + int32_t row_index; //row number. + int32_t def_fixed = 0; //counters + int32_t def_prop = 0; + int32_t maybe_fixed = 0; + int32_t maybe_prop = 0; + int32_t dunno = 0; + int32_t corr_fixed = 0; + int32_t corr_prop = 0; + float lower, upper; //cluster thresholds + TO_ROW_IT row_it = block->get_rows (); + + row_index = 1; + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { + row = row_it.data (); + ASSERT_HOST (row->xheight > 0); + if (row->fixed_pitch > 0 && + fixed_pitch_row(row, block->block, block_index)) { + if (row->fixed_pitch == 0) { + lower = row->pr_nonsp; + upper = row->pr_space; + row->space_size = upper; + row->kern_size = lower; + } + } + row_index++; + } + count_block_votes(block, + def_fixed, + def_prop, + maybe_fixed, + maybe_prop, + corr_fixed, + corr_prop, + dunno); + if (testing_on + && (textord_debug_pitch_test + || textord_blocksall_prop || textord_blocksall_fixed)) { + tprintf ("Initially:"); + print_block_counts(block, block_index); + } + if (def_fixed > def_prop * textord_words_veto_power) + block->pitch_decision = PITCH_DEF_FIXED; + else if (def_prop > def_fixed * textord_words_veto_power) + block->pitch_decision = PITCH_DEF_PROP; + else if (def_fixed > 0 || def_prop > 0) + block->pitch_decision = PITCH_DUNNO; + else if (maybe_fixed > maybe_prop * textord_words_veto_power) + block->pitch_decision = PITCH_MAYBE_FIXED; + else if (maybe_prop > maybe_fixed * textord_words_veto_power) + block->pitch_decision = PITCH_MAYBE_PROP; + else + block->pitch_decision = PITCH_DUNNO; + return false; +} + + +/********************************************************************** + * print_block_counts + * + * Count up how many rows have what decision and print the results. + **********************************************************************/ + +void print_block_counts( //find line stats + TO_BLOCK *block, //block to do + int32_t block_index //block number + ) { + int32_t def_fixed = 0; //counters + int32_t def_prop = 0; + int32_t maybe_fixed = 0; + int32_t maybe_prop = 0; + int32_t dunno = 0; + int32_t corr_fixed = 0; + int32_t corr_prop = 0; + + count_block_votes(block, + def_fixed, + def_prop, + maybe_fixed, + maybe_prop, + corr_fixed, + corr_prop, + dunno); + tprintf ("Block %d has (%d,%d,%d)", + block_index, def_fixed, maybe_fixed, corr_fixed); + if (textord_blocksall_prop && (def_fixed || maybe_fixed || corr_fixed)) + tprintf (" (Wrongly)"); + tprintf (" fixed, (%d,%d,%d)", def_prop, maybe_prop, corr_prop); + if (textord_blocksall_fixed && (def_prop || maybe_prop || corr_prop)) + tprintf (" (Wrongly)"); + tprintf (" prop, %d dunno\n", dunno); +} + + +/********************************************************************** + * count_block_votes + * + * Count the number of rows in the block with each kind of pitch_decision. + **********************************************************************/ + +void count_block_votes( //find line stats + TO_BLOCK *block, //block to do + int32_t &def_fixed, //add to counts + int32_t &def_prop, + int32_t &maybe_fixed, + int32_t &maybe_prop, + int32_t &corr_fixed, + int32_t &corr_prop, + int32_t &dunno) { + TO_ROW *row; //current row + TO_ROW_IT row_it = block->get_rows (); + + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { + row = row_it.data (); + switch (row->pitch_decision) { + case PITCH_DUNNO: + dunno++; + break; + case PITCH_DEF_PROP: + def_prop++; + break; + case PITCH_MAYBE_PROP: + maybe_prop++; + break; + case PITCH_DEF_FIXED: + def_fixed++; + break; + case PITCH_MAYBE_FIXED: + maybe_fixed++; + break; + case PITCH_CORR_PROP: + corr_prop++; + break; + case PITCH_CORR_FIXED: + corr_fixed++; + break; + } + } +} + + +/********************************************************************** + * row_pitch_stats + * + * Decide whether each row is fixed pitch individually. + **********************************************************************/ + +bool row_pitch_stats( //find line stats + TO_ROW* row, //current row + int32_t maxwidth, //of spaces + bool testing_on //correct orientation +) { + BLOBNBOX *blob; //current blob + int gap_index; //current gap + int32_t prev_x; //end of prev blob + int32_t cluster_count; //no of clusters + int32_t prev_count; //of clusters + int32_t smooth_factor; //for smoothing stats + TBOX blob_box; //bounding box + float lower, upper; //cluster thresholds + //gap sizes + float gaps[BLOCK_STATS_CLUSTERS]; + //blobs + BLOBNBOX_IT blob_it = row->blob_list (); + STATS gap_stats (0, maxwidth); + STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1]; + //clusters + + smooth_factor = + static_cast<int32_t>(row->xheight * textord_wordstats_smooth_factor + 1.5); + if (!blob_it.empty ()) { + prev_x = blob_it.data ()->bounding_box ().right (); + blob_it.forward (); + while (!blob_it.at_first ()) { + blob = blob_it.data (); + if (!blob->joined_to_prev ()) { + blob_box = blob->bounding_box (); + if (blob_box.left () - prev_x < maxwidth) + gap_stats.add (blob_box.left () - prev_x, 1); + prev_x = blob_box.right (); + } + blob_it.forward (); + } + } + if (gap_stats.get_total () == 0) { + return false; + } + cluster_count = 0; + lower = row->xheight * words_initial_lower; + upper = row->xheight * words_initial_upper; + gap_stats.smooth (smooth_factor); + do { + prev_count = cluster_count; + cluster_count = gap_stats.cluster (lower, upper, + textord_spacesize_ratioprop, + BLOCK_STATS_CLUSTERS, cluster_stats); + } + while (cluster_count > prev_count && cluster_count < BLOCK_STATS_CLUSTERS); + if (cluster_count < 1) { + return false; + } + for (gap_index = 0; gap_index < cluster_count; gap_index++) + gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5); + //get medians + if (testing_on) { + tprintf ("cluster_count=%d:", cluster_count); + for (gap_index = 0; gap_index < cluster_count; gap_index++) + tprintf (" %g(%d)", gaps[gap_index], + cluster_stats[gap_index + 1].get_total ()); + tprintf ("\n"); + } + qsort (gaps, cluster_count, sizeof (float), sort_floats); + + //Try to find proportional non-space and space for row. + lower = row->xheight * words_default_prop_nonspace; + upper = row->xheight * textord_words_min_minspace; + for (gap_index = 0; gap_index < cluster_count + && gaps[gap_index] < lower; gap_index++); + if (gap_index == 0) { + if (testing_on) + tprintf ("No clusters below nonspace threshold!!\n"); + if (cluster_count > 1) { + row->pr_nonsp = gaps[0]; + row->pr_space = gaps[1]; + } + else { + row->pr_nonsp = lower; + row->pr_space = gaps[0]; + } + } + else { + row->pr_nonsp = gaps[gap_index - 1]; + while (gap_index < cluster_count && gaps[gap_index] < upper) + gap_index++; + if (gap_index == cluster_count) { + if (testing_on) + tprintf ("No clusters above nonspace threshold!!\n"); + row->pr_space = lower * textord_spacesize_ratioprop; + } + else + row->pr_space = gaps[gap_index]; + } + + //Now try to find the fixed pitch space and non-space. + upper = row->xheight * words_default_fixed_space; + for (gap_index = 0; gap_index < cluster_count + && gaps[gap_index] < upper; gap_index++); + if (gap_index == 0) { + if (testing_on) + tprintf ("No clusters below space threshold!!\n"); + row->fp_nonsp = upper; + row->fp_space = gaps[0]; + } + else { + row->fp_nonsp = gaps[gap_index - 1]; + if (gap_index == cluster_count) { + if (testing_on) + tprintf ("No clusters above space threshold!!\n"); + row->fp_space = row->xheight; + } + else + row->fp_space = gaps[gap_index]; + } + if (testing_on) { + tprintf + ("Initial estimates:pr_nonsp=%g, pr_space=%g, fp_nonsp=%g, fp_space=%g\n", + row->pr_nonsp, row->pr_space, row->fp_nonsp, row->fp_space); + } + return true; //computed some stats +} + + +/********************************************************************** + * find_row_pitch + * + * Check to see if this row could be fixed pitch using the given spacings. + * Blobs with gaps smaller than the lower threshold are assumed to be one. + * The larger threshold is the word gap threshold. + **********************************************************************/ + +bool find_row_pitch( //find lines + TO_ROW* row, //row to do + int32_t maxwidth, //max permitted space + int32_t dm_gap, //ignorable gaps + TO_BLOCK* block, //block of row + int32_t block_index, //block_number + int32_t row_index, //number of row + bool testing_on //correct orientation +) { + bool used_dm_model; //looks like dot matrix + float min_space; //estimate threshold + float non_space; //gap size + float gap_iqr; //interquartile range + float pitch_iqr; + float dm_gap_iqr; //interquartile range + float dm_pitch_iqr; + float dm_pitch; //pitch with dm on + float pitch; //revised estimate + float initial_pitch; //guess at pitch + STATS gap_stats (0, maxwidth); + //centre-centre + STATS pitch_stats (0, maxwidth); + + row->fixed_pitch = 0.0f; + initial_pitch = row->fp_space; + if (initial_pitch > row->xheight * (1 + words_default_fixed_limit)) + initial_pitch = row->xheight;//keep pitch decent + non_space = row->fp_nonsp; + if (non_space > initial_pitch) + non_space = initial_pitch; + min_space = (initial_pitch + non_space) / 2; + + if (!count_pitch_stats (row, &gap_stats, &pitch_stats, + initial_pitch, min_space, true, false, dm_gap)) { + dm_gap_iqr = 0.0001f; + dm_pitch_iqr = maxwidth * 2.0f; + dm_pitch = initial_pitch; + } + else { + dm_gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25); + dm_pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25); + dm_pitch = pitch_stats.ile (0.5); + } + gap_stats.clear (); + pitch_stats.clear (); + if (!count_pitch_stats (row, &gap_stats, &pitch_stats, + initial_pitch, min_space, true, false, 0)) { + gap_iqr = 0.0001f; + pitch_iqr = maxwidth * 3.0f; + } + else { + gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25); + pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25); + if (testing_on) + tprintf + ("First fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, pitch=%g\n", + initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile (0.5)); + initial_pitch = pitch_stats.ile (0.5); + if (min_space > initial_pitch + && count_pitch_stats (row, &gap_stats, &pitch_stats, + initial_pitch, initial_pitch, true, false, 0)) { + min_space = initial_pitch; + gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25); + pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25); + if (testing_on) + tprintf + ("Revised fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, pitch=%g\n", + initial_pitch, gap_iqr, pitch_iqr, pitch_stats.ile (0.5)); + initial_pitch = pitch_stats.ile (0.5); + } + } + if (textord_debug_pitch_metric) + tprintf("Blk=%d:Row=%d:%c:p_iqr=%g:g_iqr=%g:dm_p_iqr=%g:dm_g_iqr=%g:%c:", + block_index, row_index, 'X', + pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr, + pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth ? 'D' : + (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr ? 'S' : 'M')); + if (pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth) { + row->pitch_decision = PITCH_DUNNO; + if (textord_debug_pitch_metric) + tprintf ("\n"); + return false; //insufficient data + } + if (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr) { + if (testing_on) + tprintf + ("Choosing non dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, dm_gap_iqr=%g\n", + pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr); + gap_iqr = gap_stats.ile (0.75) - gap_stats.ile (0.25); + pitch_iqr = pitch_stats.ile (0.75) - pitch_stats.ile (0.25); + pitch = pitch_stats.ile (0.5); + used_dm_model = false; + } + else { + if (testing_on) + tprintf + ("Choosing dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, dm_gap_iqr=%g\n", + pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr); + gap_iqr = dm_gap_iqr; + pitch_iqr = dm_pitch_iqr; + pitch = dm_pitch; + used_dm_model = true; + } + if (textord_debug_pitch_metric) { + tprintf ("rev_p_iqr=%g:rev_g_iqr=%g:pitch=%g:", + pitch_iqr, gap_iqr, pitch); + tprintf ("p_iqr/g=%g:p_iqr/x=%g:iqr_res=%c:", + pitch_iqr / gap_iqr, pitch_iqr / block->xheight, + pitch_iqr < gap_iqr * textord_fpiqr_ratio + && pitch_iqr < block->xheight * textord_max_pitch_iqr + && pitch < block->xheight * textord_words_default_maxspace + ? 'F' : 'P'); + } + if (pitch_iqr < gap_iqr * textord_fpiqr_ratio + && pitch_iqr < block->xheight * textord_max_pitch_iqr + && pitch < block->xheight * textord_words_default_maxspace) + row->pitch_decision = PITCH_MAYBE_FIXED; + else + row->pitch_decision = PITCH_MAYBE_PROP; + row->fixed_pitch = pitch; + row->kern_size = gap_stats.ile (0.5); + row->min_space = static_cast<int32_t>(row->fixed_pitch + non_space) / 2; + if (row->min_space > row->fixed_pitch) + row->min_space = static_cast<int32_t>(row->fixed_pitch); + row->max_nonspace = row->min_space; + row->space_size = row->fixed_pitch; + row->space_threshold = (row->max_nonspace + row->min_space) / 2; + row->used_dm_model = used_dm_model; + return true; +} + + +/********************************************************************** + * fixed_pitch_row + * + * Check to see if this row could be fixed pitch using the given spacings. + * Blobs with gaps smaller than the lower threshold are assumed to be one. + * The larger threshold is the word gap threshold. + **********************************************************************/ + +bool fixed_pitch_row(TO_ROW* row, // row to do + BLOCK* block, + int32_t block_index // block_number +) { + const char *res_string; // pitch result + int16_t mid_cuts; // no of cheap cuts + float non_space; // gap size + float pitch_sd; // error on pitch + float sp_sd = 0.0f; // space sd + + non_space = row->fp_nonsp; + if (non_space > row->fixed_pitch) + non_space = row->fixed_pitch; + POLY_BLOCK* pb = block != nullptr ? block->pdblk.poly_block() : nullptr; + if (textord_all_prop || (pb != nullptr && !pb->IsText())) { + // Set the decision to definitely proportional. + pitch_sd = textord_words_def_prop * row->fixed_pitch; + row->pitch_decision = PITCH_DEF_PROP; + } else { + pitch_sd = tune_row_pitch (row, &row->projection, row->projection_left, + row->projection_right, + (row->fixed_pitch + non_space * 3) / 4, + row->fixed_pitch, sp_sd, mid_cuts, + &row->char_cells, + block_index == textord_debug_block); + if (pitch_sd < textord_words_pitchsd_threshold * row->fixed_pitch + && ((pitsync_linear_version & 3) < 3 + || ((pitsync_linear_version & 3) >= 3 && (row->used_dm_model + || sp_sd > 20 + || (pitch_sd == 0 && sp_sd > 10))))) { + if (pitch_sd < textord_words_def_fixed * row->fixed_pitch + && !row->all_caps + && ((pitsync_linear_version & 3) < 3 || sp_sd > 20)) + row->pitch_decision = PITCH_DEF_FIXED; + else + row->pitch_decision = PITCH_MAYBE_FIXED; + } + else if ((pitsync_linear_version & 3) < 3 + || sp_sd > 20 + || mid_cuts > 0 + || pitch_sd >= textord_words_pitchsd_threshold * row->fixed_pitch) { + if (pitch_sd < textord_words_def_prop * row->fixed_pitch) + row->pitch_decision = PITCH_MAYBE_PROP; + else + row->pitch_decision = PITCH_DEF_PROP; + } + else + row->pitch_decision = PITCH_DUNNO; + } + + if (textord_debug_pitch_metric) { + res_string = "??"; + switch (row->pitch_decision) { + case PITCH_DEF_PROP: + res_string = "DP"; + break; + case PITCH_MAYBE_PROP: + res_string = "MP"; + break; + case PITCH_DEF_FIXED: + res_string = "DF"; + break; + case PITCH_MAYBE_FIXED: + res_string = "MF"; + break; + default: + res_string = "??"; + } + tprintf (":sd/p=%g:occ=%g:init_res=%s\n", + pitch_sd / row->fixed_pitch, sp_sd, res_string); + } + return true; +} + + +/********************************************************************** + * count_pitch_stats + * + * Count up the gap and pitch stats on the block to see if it is fixed pitch. + * Blobs with gaps smaller than the lower threshold are assumed to be one. + * The larger threshold is the word gap threshold. + * The return value indicates whether there were any decent values to use. + **********************************************************************/ + +bool count_pitch_stats( //find lines + TO_ROW* row, //row to do + STATS* gap_stats, //blob gaps + STATS* pitch_stats, //centre-centre stats + float initial_pitch, //guess at pitch + float min_space, //estimate space size + bool ignore_outsize, //discard big objects + bool split_outsize, //split big objects + int32_t dm_gap //ignorable gaps +) { + bool prev_valid; //not word broken + BLOBNBOX *blob; //current blob + //blobs + BLOBNBOX_IT blob_it = row->blob_list (); + int32_t prev_right; //end of prev blob + int32_t prev_centre; //centre of previous blob + int32_t x_centre; //centre of this blob + int32_t blob_width; //width of blob + int32_t width_units; //no of widths in blob + float width; //blob width + TBOX blob_box; //bounding box + TBOX joined_box; //of super blob + + gap_stats->clear (); + pitch_stats->clear (); + if (blob_it.empty ()) + return false; + prev_valid = false; + prev_centre = 0; + prev_right = 0; // stop compiler warning + joined_box = blob_it.data ()->bounding_box (); + do { + blob_it.forward (); + blob = blob_it.data (); + if (!blob->joined_to_prev ()) { + blob_box = blob->bounding_box (); + if ((blob_box.left () - joined_box.right () < dm_gap + && !blob_it.at_first ()) + || blob->cblob() == nullptr) + joined_box += blob_box; //merge blobs + else { + blob_width = joined_box.width (); + if (split_outsize) { + width_units = + static_cast<int32_t>(floor (static_cast<float>(blob_width) / initial_pitch + 0.5)); + if (width_units < 1) + width_units = 1; + width_units--; + } + else if (ignore_outsize) { + width = static_cast<float>(blob_width) / initial_pitch; + width_units = width < 1 + words_default_fixed_limit + && width > 1 - words_default_fixed_limit ? 0 : -1; + } + else + width_units = 0; //everything in + x_centre = static_cast<int32_t>(joined_box.left () + + (blob_width - + width_units * initial_pitch) / 2); + if (prev_valid && width_units >= 0) { + // if (width_units>0) + // { + // tprintf("wu=%d, width=%d, xc=%d, adding %d\n", + // width_units,blob_width,x_centre,x_centre-prev_centre); + // } + gap_stats->add (joined_box.left () - prev_right, 1); + pitch_stats->add (x_centre - prev_centre, 1); + } + prev_centre = static_cast<int32_t>(x_centre + width_units * initial_pitch); + prev_right = joined_box.right (); + prev_valid = blob_box.left () - joined_box.right () < min_space; + prev_valid = prev_valid && width_units >= 0; + joined_box = blob_box; + } + } + } + while (!blob_it.at_first ()); + return gap_stats->get_total () >= 3; +} + + +/********************************************************************** + * tune_row_pitch + * + * Use a dp algorithm to fit the character cells and return the sd of + * the cell size over the row. + **********************************************************************/ + +float tune_row_pitch( //find fp cells + TO_ROW* row, //row to do + STATS* projection, //vertical projection + int16_t projection_left, //edge of projection + int16_t projection_right, //edge of projection + float space_size, //size of blank + float& initial_pitch, //guess at pitch + float& best_sp_sd, //space sd + int16_t& best_mid_cuts, //no of cheap cuts + ICOORDELT_LIST* best_cells, //row cells + bool testing_on //inidividual words +) { + int pitch_delta; //offset pitch + int16_t mid_cuts; //cheap cuts + float pitch_sd; //current sd + float best_sd; //best result + float best_pitch; //pitch for best result + float initial_sd; //starting error + float sp_sd; //space sd + ICOORDELT_LIST test_cells; //row cells + ICOORDELT_IT best_it; //start of best list + + if (textord_fast_pitch_test) + return tune_row_pitch2 (row, projection, projection_left, + projection_right, space_size, initial_pitch, + best_sp_sd, + //space sd + best_mid_cuts, best_cells, testing_on); + if (textord_disable_pitch_test) { + best_sp_sd = initial_pitch; + return initial_pitch; + } + initial_sd = + compute_pitch_sd(row, + projection, + projection_left, + projection_right, + space_size, + initial_pitch, + best_sp_sd, + best_mid_cuts, + best_cells, + testing_on); + best_sd = initial_sd; + best_pitch = initial_pitch; + if (testing_on) + tprintf ("tune_row_pitch:start pitch=%g, sd=%g\n", best_pitch, best_sd); + for (pitch_delta = 1; pitch_delta <= textord_pitch_range; pitch_delta++) { + pitch_sd = + compute_pitch_sd (row, projection, projection_left, projection_right, + space_size, initial_pitch + pitch_delta, sp_sd, + mid_cuts, &test_cells, testing_on); + if (testing_on) + tprintf ("testing pitch at %g, sd=%g\n", initial_pitch + pitch_delta, + pitch_sd); + if (pitch_sd < best_sd) { + best_sd = pitch_sd; + best_mid_cuts = mid_cuts; + best_sp_sd = sp_sd; + best_pitch = initial_pitch + pitch_delta; + best_cells->clear (); + best_it.set_to_list (best_cells); + best_it.add_list_after (&test_cells); + } + else + test_cells.clear (); + if (pitch_sd > initial_sd) + break; //getting worse + } + for (pitch_delta = 1; pitch_delta <= textord_pitch_range; pitch_delta++) { + pitch_sd = + compute_pitch_sd (row, projection, projection_left, projection_right, + space_size, initial_pitch - pitch_delta, sp_sd, + mid_cuts, &test_cells, testing_on); + if (testing_on) + tprintf ("testing pitch at %g, sd=%g\n", initial_pitch - pitch_delta, + pitch_sd); + if (pitch_sd < best_sd) { + best_sd = pitch_sd; + best_mid_cuts = mid_cuts; + best_sp_sd = sp_sd; + best_pitch = initial_pitch - pitch_delta; + best_cells->clear (); + best_it.set_to_list (best_cells); + best_it.add_list_after (&test_cells); + } + else + test_cells.clear (); + if (pitch_sd > initial_sd) + break; + } + initial_pitch = best_pitch; + + if (textord_debug_pitch_metric) + print_pitch_sd(row, + projection, + projection_left, + projection_right, + space_size, + best_pitch); + + return best_sd; +} + + +/********************************************************************** + * tune_row_pitch + * + * Use a dp algorithm to fit the character cells and return the sd of + * the cell size over the row. + **********************************************************************/ + +float tune_row_pitch2( //find fp cells + TO_ROW* row, //row to do + STATS* projection, //vertical projection + int16_t projection_left, //edge of projection + int16_t projection_right, //edge of projection + float space_size, //size of blank + float& initial_pitch, //guess at pitch + float& best_sp_sd, //space sd + int16_t& best_mid_cuts, //no of cheap cuts + ICOORDELT_LIST* best_cells, //row cells + bool testing_on //inidividual words +) { + int pitch_delta; //offset pitch + int16_t pixel; //pixel coord + int16_t best_pixel; //pixel coord + int16_t best_delta; //best pitch + int16_t best_pitch; //best pitch + int16_t start; //of good range + int16_t end; //of good range + int32_t best_count; //lowest sum + float best_sd; //best result + + best_sp_sd = initial_pitch; + + best_pitch = static_cast<int>(initial_pitch); + if (textord_disable_pitch_test || best_pitch <= textord_pitch_range) { + return initial_pitch; + } + std::unique_ptr<STATS[]> sum_proj(new STATS[textord_pitch_range * 2 + 1]); //summed projection + + for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range; + pitch_delta++) + sum_proj[textord_pitch_range + pitch_delta].set_range (0, + best_pitch + + pitch_delta + 1); + for (pixel = projection_left; pixel <= projection_right; pixel++) { + for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range; + pitch_delta++) { + sum_proj[textord_pitch_range + pitch_delta].add( + (pixel - projection_left) % (best_pitch + pitch_delta), + projection->pile_count(pixel)); + } + } + best_count = sum_proj[textord_pitch_range].pile_count (0); + best_delta = 0; + best_pixel = 0; + for (pitch_delta = -textord_pitch_range; pitch_delta <= textord_pitch_range; + pitch_delta++) { + for (pixel = 0; pixel < best_pitch + pitch_delta; pixel++) { + if (sum_proj[textord_pitch_range + pitch_delta].pile_count (pixel) + < best_count) { + best_count = + sum_proj[textord_pitch_range + + pitch_delta].pile_count (pixel); + best_delta = pitch_delta; + best_pixel = pixel; + } + } + } + if (testing_on) + tprintf ("tune_row_pitch:start pitch=%g, best_delta=%d, count=%d\n", + initial_pitch, best_delta, best_count); + best_pitch += best_delta; + initial_pitch = best_pitch; + best_count++; + best_count += best_count; + for (start = best_pixel - 2; start > best_pixel - best_pitch + && sum_proj[textord_pitch_range + + best_delta].pile_count (start % best_pitch) <= best_count; + start--); + for (end = best_pixel + 2; + end < best_pixel + best_pitch + && sum_proj[textord_pitch_range + + best_delta].pile_count (end % best_pitch) <= best_count; + end++); + + best_sd = + compute_pitch_sd(row, + projection, + projection_left, + projection_right, + space_size, + initial_pitch, + best_sp_sd, + best_mid_cuts, + best_cells, + testing_on, + start, + end); + if (testing_on) + tprintf ("tune_row_pitch:output pitch=%g, sd=%g\n", initial_pitch, + best_sd); + + if (textord_debug_pitch_metric) + print_pitch_sd(row, + projection, + projection_left, + projection_right, + space_size, + initial_pitch); + + return best_sd; +} + + +/********************************************************************** + * compute_pitch_sd + * + * Use a dp algorithm to fit the character cells and return the sd of + * the cell size over the row. + **********************************************************************/ + +float compute_pitch_sd( //find fp cells + TO_ROW* row, //row to do + STATS* projection, //vertical projection + int16_t projection_left, //edge + int16_t projection_right, //edge + float space_size, //size of blank + float initial_pitch, //guess at pitch + float& sp_sd, //space sd + int16_t& mid_cuts, //no of free cuts + ICOORDELT_LIST* row_cells, //list of chop pts + bool testing_on, //inidividual words + int16_t start, //start of good range + int16_t end //end of good range +) { + int16_t occupation; //no of cells in word. + //blobs + BLOBNBOX_IT blob_it = row->blob_list (); + BLOBNBOX_IT start_it; //start of word + BLOBNBOX_IT plot_it; //for plotting + int16_t blob_count; //no of blobs + TBOX blob_box; //bounding box + TBOX prev_box; //of super blob + int32_t prev_right; //of word sync + int scale_factor; //on scores for big words + int32_t sp_count; //spaces + FPSEGPT_LIST seg_list; //char cells + FPSEGPT_IT seg_it; //iterator + int16_t segpos; //position of segment + int16_t cellpos; //previous cell boundary + //iterator + ICOORDELT_IT cell_it = row_cells; + ICOORDELT *cell; //new cell + double sqsum; //sum of squares + double spsum; //of spaces + double sp_var; //space error + double word_sync; //result for word + int32_t total_count; //total blobs + + if ((pitsync_linear_version & 3) > 1) { + word_sync = compute_pitch_sd2 (row, projection, projection_left, + projection_right, initial_pitch, + occupation, mid_cuts, row_cells, + testing_on, start, end); + sp_sd = occupation; + return word_sync; + } + mid_cuts = 0; + cellpos = 0; + total_count = 0; + sqsum = 0; + sp_count = 0; + spsum = 0; + prev_right = -1; + if (blob_it.empty ()) + return space_size * 10; +#ifndef GRAPHICS_DISABLED + if (testing_on && to_win != nullptr) { + blob_box = blob_it.data ()->bounding_box (); + projection->plot (to_win, projection_left, + row->intercept (), 1.0f, -1.0f, ScrollView::CORAL); + } +#endif + start_it = blob_it; + blob_count = 0; + blob_box = box_next (&blob_it);//first blob + blob_it.mark_cycle_pt (); + do { + for (; blob_count > 0; blob_count--) + box_next(&start_it); + do { + prev_box = blob_box; + blob_count++; + blob_box = box_next (&blob_it); + } + while (!blob_it.cycled_list () + && blob_box.left () - prev_box.right () < space_size); + plot_it = start_it; + if (pitsync_linear_version & 3) + word_sync = + check_pitch_sync2 (&start_it, blob_count, static_cast<int16_t>(initial_pitch), 2, + projection, projection_left, projection_right, + row->xheight * textord_projection_scale, + occupation, &seg_list, start, end); + else + word_sync = + check_pitch_sync (&start_it, blob_count, static_cast<int16_t>(initial_pitch), 2, + projection, &seg_list); + if (testing_on) { + tprintf ("Word ending at (%d,%d), len=%d, sync rating=%g, ", + prev_box.right (), prev_box.top (), + seg_list.length () - 1, word_sync); + seg_it.set_to_list (&seg_list); + for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); + seg_it.forward ()) { + if (seg_it.data ()->faked) + tprintf ("(F)"); + tprintf ("%d, ", seg_it.data ()->position ()); + // tprintf("C=%g, s=%g, sq=%g\n", + // seg_it.data()->cost_function(), + // seg_it.data()->sum(), + // seg_it.data()->squares()); + } + tprintf ("\n"); + } +#ifndef GRAPHICS_DISABLED + if (textord_show_fixed_cuts && blob_count > 0 && to_win != nullptr) + plot_fp_cells2(to_win, ScrollView::GOLDENROD, row, &seg_list); +#endif + seg_it.set_to_list (&seg_list); + if (prev_right >= 0) { + sp_var = seg_it.data ()->position () - prev_right; + sp_var -= floor (sp_var / initial_pitch + 0.5) * initial_pitch; + sp_var *= sp_var; + spsum += sp_var; + sp_count++; + } + for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) { + segpos = seg_it.data ()->position (); + if (cell_it.empty () || segpos > cellpos + initial_pitch / 2) { + //big gap + while (!cell_it.empty () && segpos > cellpos + initial_pitch * 3 / 2) { + cell = new ICOORDELT (cellpos + static_cast<int16_t>(initial_pitch), 0); + cell_it.add_after_then_move (cell); + cellpos += static_cast<int16_t>(initial_pitch); + } + //make new one + cell = new ICOORDELT (segpos, 0); + cell_it.add_after_then_move (cell); + cellpos = segpos; + } + else if (segpos > cellpos - initial_pitch / 2) { + cell = cell_it.data (); + //average positions + cell->set_x ((cellpos + segpos) / 2); + cellpos = cell->x (); + } + } + seg_it.move_to_last (); + prev_right = seg_it.data ()->position (); + if (textord_pitch_scalebigwords) { + scale_factor = (seg_list.length () - 2) / 2; + if (scale_factor < 1) + scale_factor = 1; + } + else + scale_factor = 1; + sqsum += word_sync * scale_factor; + total_count += (seg_list.length () - 1) * scale_factor; + seg_list.clear (); + } + while (!blob_it.cycled_list ()); + sp_sd = sp_count > 0 ? sqrt (spsum / sp_count) : 0; + return total_count > 0 ? sqrt (sqsum / total_count) : space_size * 10; +} + + +/********************************************************************** + * compute_pitch_sd2 + * + * Use a dp algorithm to fit the character cells and return the sd of + * the cell size over the row. + **********************************************************************/ + +float compute_pitch_sd2( //find fp cells + TO_ROW* row, //row to do + STATS* projection, //vertical projection + int16_t projection_left, //edge + int16_t projection_right, //edge + float initial_pitch, //guess at pitch + int16_t& occupation, //no of occupied cells + int16_t& mid_cuts, //no of free cuts + ICOORDELT_LIST* row_cells, //list of chop pts + bool testing_on, //inidividual words + int16_t start, //start of good range + int16_t end //end of good range +) { + //blobs + BLOBNBOX_IT blob_it = row->blob_list (); + BLOBNBOX_IT plot_it; + int16_t blob_count; //no of blobs + TBOX blob_box; //bounding box + FPSEGPT_LIST seg_list; //char cells + FPSEGPT_IT seg_it; //iterator + int16_t segpos; //position of segment + //iterator + ICOORDELT_IT cell_it = row_cells; + ICOORDELT *cell; //new cell + double word_sync; //result for word + + mid_cuts = 0; + if (blob_it.empty ()) { + occupation = 0; + return initial_pitch * 10; + } +#ifndef GRAPHICS_DISABLED + if (testing_on && to_win != nullptr) { + projection->plot (to_win, projection_left, + row->intercept (), 1.0f, -1.0f, ScrollView::CORAL); + } +#endif + blob_count = 0; + blob_it.mark_cycle_pt (); + do { + //first blob + blob_box = box_next (&blob_it); + blob_count++; + } + while (!blob_it.cycled_list ()); + plot_it = blob_it; + word_sync = check_pitch_sync2 (&blob_it, blob_count, static_cast<int16_t>(initial_pitch), + 2, projection, projection_left, + projection_right, + row->xheight * textord_projection_scale, + occupation, &seg_list, start, end); + if (testing_on) { + tprintf ("Row ending at (%d,%d), len=%d, sync rating=%g, ", + blob_box.right (), blob_box.top (), + seg_list.length () - 1, word_sync); + seg_it.set_to_list (&seg_list); + for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) { + if (seg_it.data ()->faked) + tprintf ("(F)"); + tprintf ("%d, ", seg_it.data ()->position ()); + // tprintf("C=%g, s=%g, sq=%g\n", + // seg_it.data()->cost_function(), + // seg_it.data()->sum(), + // seg_it.data()->squares()); + } + tprintf ("\n"); + } +#ifndef GRAPHICS_DISABLED + if (textord_show_fixed_cuts && blob_count > 0 && to_win != nullptr) + plot_fp_cells2(to_win, ScrollView::GOLDENROD, row, &seg_list); +#endif + seg_it.set_to_list (&seg_list); + for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) { + segpos = seg_it.data ()->position (); + //make new one + cell = new ICOORDELT (segpos, 0); + cell_it.add_after_then_move (cell); + if (seg_it.at_last ()) + mid_cuts = seg_it.data ()->cheap_cuts (); + } + seg_list.clear (); + return occupation > 0 ? sqrt (word_sync / occupation) : initial_pitch * 10; +} + + +/********************************************************************** + * print_pitch_sd + * + * Use a dp algorithm to fit the character cells and return the sd of + * the cell size over the row. + **********************************************************************/ + +void print_pitch_sd( //find fp cells + TO_ROW *row, //row to do + STATS *projection, //vertical projection + int16_t projection_left, //edges //size of blank + int16_t projection_right, + float space_size, + float initial_pitch //guess at pitch + ) { + const char *res2; //pitch result + int16_t occupation; //used cells + float sp_sd; //space sd + //blobs + BLOBNBOX_IT blob_it = row->blob_list (); + BLOBNBOX_IT start_it; //start of word + BLOBNBOX_IT row_start; //start of row + int16_t blob_count; //no of blobs + int16_t total_blob_count; //total blobs in line + TBOX blob_box; //bounding box + TBOX prev_box; //of super blob + int32_t prev_right; //of word sync + int scale_factor; //on scores for big words + int32_t sp_count; //spaces + FPSEGPT_LIST seg_list; //char cells + FPSEGPT_IT seg_it; //iterator + double sqsum; //sum of squares + double spsum; //of spaces + double sp_var; //space error + double word_sync; //result for word + double total_count; //total cuts + + if (blob_it.empty ()) + return; + row_start = blob_it; + total_blob_count = 0; + + total_count = 0; + sqsum = 0; + sp_count = 0; + spsum = 0; + prev_right = -1; + blob_it = row_start; + start_it = blob_it; + blob_count = 0; + blob_box = box_next (&blob_it);//first blob + blob_it.mark_cycle_pt (); + do { + for (; blob_count > 0; blob_count--) + box_next(&start_it); + do { + prev_box = blob_box; + blob_count++; + blob_box = box_next (&blob_it); + } + while (!blob_it.cycled_list () + && blob_box.left () - prev_box.right () < space_size); + word_sync = + check_pitch_sync2 (&start_it, blob_count, static_cast<int16_t>(initial_pitch), 2, + projection, projection_left, projection_right, + row->xheight * textord_projection_scale, + occupation, &seg_list, 0, 0); + total_blob_count += blob_count; + seg_it.set_to_list (&seg_list); + if (prev_right >= 0) { + sp_var = seg_it.data ()->position () - prev_right; + sp_var -= floor (sp_var / initial_pitch + 0.5) * initial_pitch; + sp_var *= sp_var; + spsum += sp_var; + sp_count++; + } + seg_it.move_to_last (); + prev_right = seg_it.data ()->position (); + if (textord_pitch_scalebigwords) { + scale_factor = (seg_list.length () - 2) / 2; + if (scale_factor < 1) + scale_factor = 1; + } + else + scale_factor = 1; + sqsum += word_sync * scale_factor; + total_count += (seg_list.length () - 1) * scale_factor; + seg_list.clear (); + } + while (!blob_it.cycled_list ()); + sp_sd = sp_count > 0 ? sqrt (spsum / sp_count) : 0; + word_sync = total_count > 0 ? sqrt (sqsum / total_count) : space_size * 10; + tprintf ("new_sd=%g:sd/p=%g:new_sp_sd=%g:res=%c:", + word_sync, word_sync / initial_pitch, sp_sd, + word_sync < textord_words_pitchsd_threshold * initial_pitch + ? 'F' : 'P'); + + start_it = row_start; + blob_it = row_start; + word_sync = + check_pitch_sync2 (&blob_it, total_blob_count, static_cast<int16_t>(initial_pitch), 2, + projection, projection_left, projection_right, + row->xheight * textord_projection_scale, occupation, + &seg_list, 0, 0); + if (occupation > 1) + word_sync /= occupation; + word_sync = sqrt (word_sync); + +#ifndef GRAPHICS_DISABLED + if (textord_show_row_cuts && to_win != nullptr) + plot_fp_cells2(to_win, ScrollView::CORAL, row, &seg_list); +#endif + seg_list.clear (); + if (word_sync < textord_words_pitchsd_threshold * initial_pitch) { + if (word_sync < textord_words_def_fixed * initial_pitch + && !row->all_caps) + res2 = "DF"; + else + res2 = "MF"; + } + else + res2 = word_sync < textord_words_def_prop * initial_pitch ? "MP" : "DP"; + tprintf + ("row_sd=%g:sd/p=%g:res=%c:N=%d:res2=%s,init pitch=%g, row_pitch=%g, all_caps=%d\n", + word_sync, word_sync / initial_pitch, + word_sync < textord_words_pitchsd_threshold * initial_pitch ? 'F' : 'P', + occupation, res2, initial_pitch, row->fixed_pitch, row->all_caps); +} + +/********************************************************************** + * find_repeated_chars + * + * Extract marked leader blobs and put them + * into words in advance of fixed pitch checking and word generation. + **********************************************************************/ +void find_repeated_chars(TO_BLOCK* block, // Block to search. + bool testing_on) { // Debug mode. + POLY_BLOCK* pb = block->block->pdblk.poly_block(); + if (pb != nullptr && !pb->IsText()) + return; // Don't find repeated chars in non-text blocks. + + TO_ROW *row; + BLOBNBOX_IT box_it; + BLOBNBOX_IT search_it; // forward search + WERD *word; // new word + TBOX word_box; // for plotting + int blobcount, repeated_set; + + TO_ROW_IT row_it = block->get_rows(); + if (row_it.empty()) return; // empty block + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + row = row_it.data(); + box_it.set_to_list(row->blob_list()); + if (box_it.empty()) continue; // no blobs in this row + if (!row->rep_chars_marked()) { + mark_repeated_chars(row); + } + if (row->num_repeated_sets() == 0) continue; // nothing to do for this row + // new words + WERD_IT word_it(&row->rep_words); + do { + if (box_it.data()->repeated_set() != 0 && + !box_it.data()->joined_to_prev()) { + blobcount = 1; + repeated_set = box_it.data()->repeated_set(); + search_it = box_it; + search_it.forward(); + while (!search_it.at_first() && + search_it.data()->repeated_set() == repeated_set) { + blobcount++; + search_it.forward(); + } + // After the call to make_real_word() all the blobs from this + // repeated set will be removed from the blob list. box_it will be + // set to point to the blob after the end of the extracted sequence. + word = make_real_word(&box_it, blobcount, box_it.at_first(), 1); + if (!box_it.empty() && box_it.data()->joined_to_prev()) { + tprintf("Bad box joined to prev at"); + box_it.data()->bounding_box().print(); + tprintf("After repeated word:"); + word->bounding_box().print(); + } + ASSERT_HOST(box_it.empty() || !box_it.data()->joined_to_prev()); + word->set_flag(W_REP_CHAR, true); + word->set_flag(W_DONT_CHOP, true); + word_it.add_after_then_move(word); + } else { + box_it.forward(); + } + } while (!box_it.at_first()); + } +} + + +/********************************************************************** + * plot_fp_word + * + * Plot a block of words as if fixed pitch. + **********************************************************************/ + +#ifndef GRAPHICS_DISABLED +void plot_fp_word( //draw block of words + TO_BLOCK *block, //block to draw + float pitch, //pitch to draw with + float nonspace //for space threshold + ) { + TO_ROW *row; //current row + TO_ROW_IT row_it = block->get_rows (); + + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { + row = row_it.data (); + row->min_space = static_cast<int32_t>((pitch + nonspace) / 2); + row->max_nonspace = row->min_space; + row->space_threshold = row->min_space; + plot_word_decisions (to_win, static_cast<int16_t>(pitch), row); + } +} +#endif + +} // namespace tesseract diff --git a/tesseract/src/textord/topitch.h b/tesseract/src/textord/topitch.h new file mode 100644 index 00000000..39b239f2 --- /dev/null +++ b/tesseract/src/textord/topitch.h @@ -0,0 +1,191 @@ +/********************************************************************** + * File: topitch.h (Formerly to_pitch.h) + * Description: Code to determine fixed pitchness and the pitch if fixed. + * Author: Ray Smith + * + * (C) Copyright 1993, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef TOPITCH_H +#define TOPITCH_H + +#include "blobbox.h" + +namespace tesseract { + +class Tesseract; + +extern BOOL_VAR_H (textord_debug_pitch_test, false, +"Debug on fixed pitch test"); +extern BOOL_VAR_H (textord_debug_pitch_metric, false, +"Write full metric stuff"); +extern BOOL_VAR_H (textord_show_row_cuts, false, "Draw row-level cuts"); +extern BOOL_VAR_H (textord_show_page_cuts, false, "Draw page-level cuts"); +extern BOOL_VAR_H (textord_pitch_cheat, false, +"Use correct answer for fixed/prop"); +extern BOOL_VAR_H (textord_blockndoc_fixed, true, +"Attempt whole doc/block fixed pitch"); +extern BOOL_VAR_H (textord_fast_pitch_test, false, +"Do even faster pitch algorithm"); +extern double_VAR_H (textord_projection_scale, 0.125, +"Ding rate for mid-cuts"); +extern double_VAR_H (textord_balance_factor, 2.0, +"Ding rate for unbalanced char cells"); + +void compute_fixed_pitch(ICOORD page_tr, // top right + TO_BLOCK_LIST* port_blocks, // input list + float gradient, // page skew + FCOORD rotation, // for drawing + bool testing_on); // correct orientation +void fix_row_pitch( //get some value + TO_ROW *bad_row, //row to fix + TO_BLOCK *bad_block, //block of bad_row + TO_BLOCK_LIST *blocks, //blocks to scan + int32_t row_target, //number of row + int32_t block_target //number of block + ); +void compute_block_pitch(TO_BLOCK* block, // input list + FCOORD rotation, // for drawing + int32_t block_index, // block number + bool testing_on); // correct orientation +bool compute_rows_pitch( //find line stats + TO_BLOCK* block, //block to do + int32_t block_index, //block number + bool testing_on //correct orientation +); +bool try_doc_fixed( //determine pitch + ICOORD page_tr, //top right + TO_BLOCK_LIST* port_blocks, //input list + float gradient //page skew +); +bool try_block_fixed( //find line stats + TO_BLOCK* block, //block to do + int32_t block_index //block number +); +bool try_rows_fixed( //find line stats + TO_BLOCK* block, //block to do + int32_t block_index, //block number + bool testing_on //correct orientation +); +void print_block_counts( //find line stats + TO_BLOCK *block, //block to do + int32_t block_index //block number + ); +void count_block_votes( //find line stats + TO_BLOCK *block, //block to do + int32_t &def_fixed, //add to counts + int32_t &def_prop, + int32_t &maybe_fixed, + int32_t &maybe_prop, + int32_t &corr_fixed, + int32_t &corr_prop, + int32_t &dunno); +bool row_pitch_stats( //find line stats + TO_ROW* row, //current row + int32_t maxwidth, //of spaces + bool testing_on //correct orientation +); +bool find_row_pitch( //find lines + TO_ROW* row, //row to do + int32_t maxwidth, //max permitted space + int32_t dm_gap, //ignorable gaps + TO_BLOCK* block, //block of row + int32_t block_index, //block_number + int32_t row_index, //number of row + bool testing_on //correct orientation +); +bool fixed_pitch_row( //find lines + TO_ROW* row, //row to do + BLOCK* block, + int32_t block_index //block_number +); +bool count_pitch_stats( //find lines + TO_ROW* row, //row to do + STATS* gap_stats, //blob gaps + STATS* pitch_stats, //centre-centre stats + float initial_pitch, //guess at pitch + float min_space, //estimate space size + bool ignore_outsize, //discard big objects + bool split_outsize, //split big objects + int32_t dm_gap //ignorable gaps +); +float tune_row_pitch( //find fp cells + TO_ROW* row, //row to do + STATS* projection, //vertical projection + int16_t projection_left, //edge of projection + int16_t projection_right, //edge of projection + float space_size, //size of blank + float& initial_pitch, //guess at pitch + float& best_sp_sd, //space sd + int16_t& best_mid_cuts, //no of cheap cuts + ICOORDELT_LIST* best_cells, //row cells + bool testing_on //inidividual words +); +float tune_row_pitch2( //find fp cells + TO_ROW* row, //row to do + STATS* projection, //vertical projection + int16_t projection_left, //edge of projection + int16_t projection_right, //edge of projection + float space_size, //size of blank + float& initial_pitch, //guess at pitch + float& best_sp_sd, //space sd + int16_t& best_mid_cuts, //no of cheap cuts + ICOORDELT_LIST* best_cells, //row cells + bool testing_on //inidividual words +); +float compute_pitch_sd( //find fp cells + TO_ROW* row, //row to do + STATS* projection, //vertical projection + int16_t projection_left, //edge + int16_t projection_right, //edge + float space_size, //size of blank + float initial_pitch, //guess at pitch + float& sp_sd, //space sd + int16_t& mid_cuts, //no of free cuts + ICOORDELT_LIST* row_cells, //list of chop pts + bool testing_on, //inidividual words + int16_t start = 0, //start of good range + int16_t end = 0 //end of good range +); +float compute_pitch_sd2( //find fp cells + TO_ROW* row, //row to do + STATS* projection, //vertical projection + int16_t projection_left, //edge + int16_t projection_right, //edge + float initial_pitch, //guess at pitch + int16_t& occupation, //no of occupied cells + int16_t& mid_cuts, //no of free cuts + ICOORDELT_LIST* row_cells, //list of chop pts + bool testing_on, //inidividual words + int16_t start = 0, //start of good range + int16_t end = 0 //end of good range +); +void print_pitch_sd( //find fp cells + TO_ROW *row, //row to do + STATS *projection, //vertical projection + int16_t projection_left, //edges //size of blank + int16_t projection_right, + float space_size, + float initial_pitch //guess at pitch + ); +void find_repeated_chars(TO_BLOCK* block, // Block to search. + bool testing_on); // Debug mode. +void plot_fp_word( //draw block of words + TO_BLOCK *block, //block to draw + float pitch, //pitch to draw with + float nonspace //for space threshold + ); + +} // namespace tesseract + +#endif diff --git a/tesseract/src/textord/tordmain.cpp b/tesseract/src/textord/tordmain.cpp new file mode 100644 index 00000000..7f91b1ff --- /dev/null +++ b/tesseract/src/textord/tordmain.cpp @@ -0,0 +1,994 @@ +/********************************************************************** + * File: tordmain.cpp (Formerly textordp.c) + * Description: C++ top level textord code. + * Author: Ray Smith + * + * (C) Copyright 1992, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#define _USE_MATH_DEFINES // for M_PI + +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#include "tordmain.h" + +#include "arrayaccess.h" // for GET_DATA_BYTE +#include "blobbox.h" // for BLOBNBOX_IT, BLOBNBOX, TO_BLOCK, TO_B... +#include "ccstruct.h" // for CCStruct, CCStruct::kXHeightFraction +#include "clst.h" // for CLISTIZE +#include "coutln.h" // for C_OUTLINE_IT, C_OUTLINE_LIST, C_OUTLINE +#include "drawtord.h" // for plot_box_list, to_win, create_to_win +#include "edgblob.h" // for extract_edges +#include "errcode.h" // for ASSERT_HOST, ... +#include "makerow.h" // for textord_test_x, textord_test_y, texto... +#include "ocrblock.h" // for BLOCK_IT, BLOCK, BLOCK_LIST (ptr only) +#include "ocrrow.h" // for ROW, ROW_IT, ROW_LIST, tweak_row_base... +#include "params.h" // for DoubleParam, BoolParam, IntParam +#include "pdblock.h" // for PDBLK +#include "points.h" // for FCOORD, ICOORD +#include "polyblk.h" // for POLY_BLOCK +#include "quadratc.h" // for QUAD_COEFFS +#include "quspline.h" // for QSPLINE, tweak_row_baseline +#include "rect.h" // for TBOX +#include "scrollview.h" // for ScrollView, ScrollView::WHITE +#include "statistc.h" // for STATS +#include "stepblob.h" // for C_BLOB_IT, C_BLOB, C_BLOB_LIST +#include "textord.h" // for Textord, WordWithBox, WordGrid, WordS... +#include "tprintf.h" // for tprintf +#include "werd.h" // for WERD_IT, WERD, WERD_LIST, W_DONT_CHOP + +#include "genericvector.h" // for PointerVector, GenericVector + +#include "allheaders.h" // for pixDestroy, pixGetHeight, boxCreate + +#include <cfloat> // for FLT_MAX +#include <cmath> // for ceil, floor, M_PI +#include <cstdint> // for INT16_MAX, uint32_t, int32_t, int16_t + +namespace tesseract { + +#define MAX_NEAREST_DIST 600 //for block skew stats + +CLISTIZE(WordWithBox) + +/********************************************************************** + * SetBlobStrokeWidth + * + * Set the horizontal and vertical stroke widths in the blob. + **********************************************************************/ +void SetBlobStrokeWidth(Pix* pix, BLOBNBOX* blob) { + // Cut the blob rectangle into a Pix. + int pix_height = pixGetHeight(pix); + const TBOX& box = blob->bounding_box(); + int width = box.width(); + int height = box.height(); + Box* blob_pix_box = boxCreate(box.left(), pix_height - box.top(), + width, height); + Pix* pix_blob = pixClipRectangle(pix, blob_pix_box, nullptr); + boxDestroy(&blob_pix_box); + Pix* dist_pix = pixDistanceFunction(pix_blob, 4, 8, L_BOUNDARY_BG); + pixDestroy(&pix_blob); + // Compute the stroke widths. + uint32_t* data = pixGetData(dist_pix); + int wpl = pixGetWpl(dist_pix); + // Horizontal width of stroke. + STATS h_stats(0, width + 1); + for (int y = 0; y < height; ++y) { + uint32_t* pixels = data + y*wpl; + int prev_pixel = 0; + int pixel = GET_DATA_BYTE(pixels, 0); + for (int x = 1; x < width; ++x) { + int next_pixel = GET_DATA_BYTE(pixels, x); + // We are looking for a pixel that is equal to its vertical neighbours, + // yet greater than its left neighbour. + if (prev_pixel < pixel && + (y == 0 || pixel == GET_DATA_BYTE(pixels - wpl, x - 1)) && + (y == height - 1 || pixel == GET_DATA_BYTE(pixels + wpl, x - 1))) { + if (pixel > next_pixel) { + // Single local max, so an odd width. + h_stats.add(pixel * 2 - 1, 1); + } else if (pixel == next_pixel && x + 1 < width && + pixel > GET_DATA_BYTE(pixels, x + 1)) { + // Double local max, so an even width. + h_stats.add(pixel * 2, 1); + } + } + prev_pixel = pixel; + pixel = next_pixel; + } + } + // Vertical width of stroke. + STATS v_stats(0, height + 1); + for (int x = 0; x < width; ++x) { + int prev_pixel = 0; + int pixel = GET_DATA_BYTE(data, x); + for (int y = 1; y < height; ++y) { + uint32_t* pixels = data + y*wpl; + int next_pixel = GET_DATA_BYTE(pixels, x); + // We are looking for a pixel that is equal to its horizontal neighbours, + // yet greater than its upper neighbour. + if (prev_pixel < pixel && + (x == 0 || pixel == GET_DATA_BYTE(pixels - wpl, x - 1)) && + (x == width - 1 || pixel == GET_DATA_BYTE(pixels - wpl, x + 1))) { + if (pixel > next_pixel) { + // Single local max, so an odd width. + v_stats.add(pixel * 2 - 1, 1); + } else if (pixel == next_pixel && y + 1 < height && + pixel > GET_DATA_BYTE(pixels + wpl, x)) { + // Double local max, so an even width. + v_stats.add(pixel * 2, 1); + } + } + prev_pixel = pixel; + pixel = next_pixel; + } + } + pixDestroy(&dist_pix); + // Store the horizontal and vertical width in the blob, keeping both + // widths if there is enough information, otherwise only the one with + // the most samples. + // If there are insufficient samples, store zero, rather than using + // 2*area/perimeter, as the numbers that gives do not match the numbers + // from the distance method. + if (h_stats.get_total() >= (width + height) / 4) { + blob->set_horz_stroke_width(h_stats.ile(0.5f)); + if (v_stats.get_total() >= (width + height) / 4) + blob->set_vert_stroke_width(v_stats.ile(0.5f)); + else + blob->set_vert_stroke_width(0.0f); + } else { + if (v_stats.get_total() >= (width + height) / 4 || + v_stats.get_total() > h_stats.get_total()) { + blob->set_horz_stroke_width(0.0f); + blob->set_vert_stroke_width(v_stats.ile(0.5f)); + } else { + blob->set_horz_stroke_width(h_stats.get_total() > 2 ? h_stats.ile(0.5f) + : 0.0f); + blob->set_vert_stroke_width(0.0f); + } + } +} + +/********************************************************************** + * assign_blobs_to_blocks2 + * + * Make a list of TO_BLOCKs for portrait and landscape orientation. + **********************************************************************/ + +void assign_blobs_to_blocks2(Pix* pix, + BLOCK_LIST *blocks, // blocks to process + TO_BLOCK_LIST *port_blocks) { // output list + BLOCK *block; // current block + BLOBNBOX *newblob; // created blob + C_BLOB *blob; // current blob + BLOCK_IT block_it = blocks; + C_BLOB_IT blob_it; // iterator + BLOBNBOX_IT port_box_it; // iterator + // destination iterator + TO_BLOCK_IT port_block_it = port_blocks; + TO_BLOCK *port_block; // created block + + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { + block = block_it.data(); + port_block = new TO_BLOCK(block); + + // Convert the good outlines to block->blob_list + port_box_it.set_to_list(&port_block->blobs); + blob_it.set_to_list(block->blob_list()); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + blob = blob_it.extract(); + newblob = new BLOBNBOX(blob); // Convert blob to BLOBNBOX. + SetBlobStrokeWidth(pix, newblob); + port_box_it.add_after_then_move(newblob); + } + + // Put the rejected outlines in block->noise_blobs, which allows them to + // be reconsidered and sorted back into rows and recover outlines mistakenly + // rejected. + port_box_it.set_to_list(&port_block->noise_blobs); + blob_it.set_to_list(block->reject_blobs()); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + blob = blob_it.extract(); + newblob = new BLOBNBOX(blob); // Convert blob to BLOBNBOX. + SetBlobStrokeWidth(pix, newblob); + port_box_it.add_after_then_move(newblob); + } + + port_block_it.add_after_then_move(port_block); + } +} + +/********************************************************************** + * find_components + * + * Find the C_OUTLINEs of the connected components in each block, put them + * in C_BLOBs, and filter them by size, putting the different size + * grades on different lists in the matching TO_BLOCK in to_blocks. + **********************************************************************/ + +void Textord::find_components(Pix* pix, BLOCK_LIST *blocks, + TO_BLOCK_LIST *to_blocks) { + int width = pixGetWidth(pix); + int height = pixGetHeight(pix); + if (width > INT16_MAX || height > INT16_MAX) { + tprintf("Input image too large! (%d, %d)\n", width, height); + return; // Can't handle it. + } + + BLOCK_IT block_it(blocks); // iterator + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); + block_it.forward()) { + BLOCK* block = block_it.data(); + if (block->pdblk.poly_block() == nullptr || block->pdblk.poly_block()->IsText()) { + extract_edges(pix, block); + } + } + + assign_blobs_to_blocks2(pix, blocks, to_blocks); + ICOORD page_tr(width, height); + filter_blobs(page_tr, to_blocks, !textord_test_landscape); +} + +/********************************************************************** + * filter_blobs + * + * Sort the blobs into sizes in all the blocks for later work. + **********************************************************************/ + +void Textord::filter_blobs(ICOORD page_tr, // top right + TO_BLOCK_LIST* blocks, // output list + bool testing_on) { // for plotting + TO_BLOCK_IT block_it = blocks; // destination iterator + TO_BLOCK *block; // created block + + #ifndef GRAPHICS_DISABLED + if (to_win != nullptr) + to_win->Clear(); + #endif // !GRAPHICS_DISABLED + + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); + block_it.forward()) { + block = block_it.data(); + block->line_size = filter_noise_blobs(&block->blobs, + &block->noise_blobs, + &block->small_blobs, + &block->large_blobs); + if (block->line_size == 0) block->line_size = 1; + block->line_spacing = block->line_size * + (tesseract::CCStruct::kDescenderFraction + + tesseract::CCStruct::kXHeightFraction + + 2 * tesseract::CCStruct::kAscenderFraction) / + tesseract::CCStruct::kXHeightFraction; + block->line_size *= textord_min_linesize; + block->max_blob_size = block->line_size * textord_excess_blobsize; + + #ifndef GRAPHICS_DISABLED + if (textord_show_blobs && testing_on) { + if (to_win == nullptr) + create_to_win(page_tr); + block->plot_graded_blobs(to_win); + } + if (textord_show_boxes && testing_on) { + if (to_win == nullptr) + create_to_win(page_tr); + plot_box_list(to_win, &block->noise_blobs, ScrollView::WHITE); + plot_box_list(to_win, &block->small_blobs, ScrollView::WHITE); + plot_box_list(to_win, &block->large_blobs, ScrollView::WHITE); + plot_box_list(to_win, &block->blobs, ScrollView::WHITE); + } + #endif // !GRAPHICS_DISABLED + } +} + +/********************************************************************** + * filter_noise_blobs + * + * Move small blobs to a separate list. + **********************************************************************/ + +float Textord::filter_noise_blobs( + BLOBNBOX_LIST *src_list, // original list + BLOBNBOX_LIST *noise_list, // noise list + BLOBNBOX_LIST *small_list, // small blobs + BLOBNBOX_LIST *large_list) { // large blobs + int16_t height; //height of blob + int16_t width; //of blob + BLOBNBOX *blob; //current blob + float initial_x; //first guess + BLOBNBOX_IT src_it = src_list; //iterators + BLOBNBOX_IT noise_it = noise_list; + BLOBNBOX_IT small_it = small_list; + BLOBNBOX_IT large_it = large_list; + STATS size_stats (0, MAX_NEAREST_DIST); + //blob heights + float min_y; //size limits + float max_y; + float max_x; + float max_height; //of good blobs + + for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) { + blob = src_it.data(); + if (blob->bounding_box().height() < textord_max_noise_size) + noise_it.add_after_then_move(src_it.extract()); + else if (blob->enclosed_area() >= blob->bounding_box().height() + * blob->bounding_box().width() * textord_noise_area_ratio) + small_it.add_after_then_move(src_it.extract()); + } + for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) { + size_stats.add(src_it.data()->bounding_box().height(), 1); + } + initial_x = size_stats.ile(textord_initialx_ile); + max_y = ceil(initial_x * + (tesseract::CCStruct::kDescenderFraction + + tesseract::CCStruct::kXHeightFraction + + 2 * tesseract::CCStruct::kAscenderFraction) / + tesseract::CCStruct::kXHeightFraction); + min_y = floor (initial_x / 2); + max_x = ceil (initial_x * textord_width_limit); + small_it.move_to_first (); + for (small_it.mark_cycle_pt (); !small_it.cycled_list (); + small_it.forward ()) { + height = small_it.data()->bounding_box().height(); + if (height > max_y) + large_it.add_after_then_move(small_it.extract ()); + else if (height >= min_y) + src_it.add_after_then_move(small_it.extract ()); + } + size_stats.clear (); + for (src_it.mark_cycle_pt (); !src_it.cycled_list (); src_it.forward ()) { + height = src_it.data ()->bounding_box ().height (); + width = src_it.data ()->bounding_box ().width (); + if (height < min_y) + small_it.add_after_then_move (src_it.extract ()); + else if (height > max_y || width > max_x) + large_it.add_after_then_move (src_it.extract ()); + else + size_stats.add (height, 1); + } + max_height = size_stats.ile (textord_initialasc_ile); + // tprintf("max_y=%g, min_y=%g, initial_x=%g, max_height=%g,", + // max_y,min_y,initial_x,max_height); + max_height *= tesseract::CCStruct::kXHeightCapRatio; + if (max_height > initial_x) + initial_x = max_height; + // tprintf(" ret=%g\n",initial_x); + return initial_x; +} + +// Fixes the block so it obeys all the rules: +// Must have at least one ROW. +// Must have at least one WERD. +// WERDs contain a fake blob. +void Textord::cleanup_nontext_block(BLOCK* block) { + // Non-text blocks must contain at least one row. + ROW_IT row_it(block->row_list()); + if (row_it.empty()) { + const TBOX& box = block->pdblk.bounding_box(); + float height = box.height(); + int32_t xstarts[2] = {box.left(), box.right()}; + double coeffs[3] = {0.0, 0.0, static_cast<double>(box.bottom())}; + ROW* row = new ROW(1, xstarts, coeffs, height / 2.0f, height / 4.0f, + height / 4.0f, 0, 1); + row_it.add_after_then_move(row); + } + // Each row must contain at least one word. + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + ROW* row = row_it.data(); + WERD_IT w_it(row->word_list()); + if (w_it.empty()) { + // Make a fake blob to put in the word. + TBOX box = block->row_list()->singleton() ? block->pdblk.bounding_box() + : row->bounding_box(); + C_BLOB* blob = C_BLOB::FakeBlob(box); + C_BLOB_LIST blobs; + C_BLOB_IT blob_it(&blobs); + blob_it.add_after_then_move(blob); + WERD* word = new WERD(&blobs, 0, nullptr); + w_it.add_after_then_move(word); + } + // Each word must contain a fake blob. + for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) { + WERD* word = w_it.data(); + // Just assert that this is true, as it would be useful to find + // out why it isn't. + ASSERT_HOST(!word->cblob_list()->empty()); + } + row->recalc_bounding_box(); + } +} + +/********************************************************************** + * cleanup_blocks + * + * Delete empty blocks, rows from the page. + **********************************************************************/ + +void Textord::cleanup_blocks(bool clean_noise, BLOCK_LIST* blocks) { + BLOCK_IT block_it = blocks; //iterator + ROW_IT row_it; //row iterator + + int num_rows = 0; + int num_rows_all = 0; + int num_blocks = 0; + int num_blocks_all = 0; + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); + block_it.forward()) { + BLOCK* block = block_it.data(); + if (block->pdblk.poly_block() != nullptr && !block->pdblk.poly_block()->IsText()) { + cleanup_nontext_block(block); + continue; + } + num_rows = 0; + num_rows_all = 0; + if (clean_noise) { + row_it.set_to_list(block->row_list()); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + ROW* row = row_it.data(); + ++num_rows_all; + clean_small_noise_from_words(row); + if ((textord_noise_rejrows && !row->word_list()->empty() && + clean_noise_from_row(row)) || + row->word_list()->empty()) { + delete row_it.extract(); // lose empty row. + } else { + if (textord_noise_rejwords) + clean_noise_from_words(row_it.data()); + if (textord_blshift_maxshift >= 0) + tweak_row_baseline(row, textord_blshift_maxshift, + textord_blshift_xfraction); + ++num_rows; + } + } + } + if (block->row_list()->empty()) { + delete block_it.extract(); // Lose empty text blocks. + } else { + ++num_blocks; + } + ++num_blocks_all; + if (textord_noise_debug) + tprintf("cleanup_blocks: # rows = %d / %d\n", num_rows, num_rows_all); + } + if (textord_noise_debug) + tprintf("cleanup_blocks: # blocks = %d / %d\n", num_blocks, num_blocks_all); +} + + +/********************************************************************** + * clean_noise_from_row + * + * Move blobs of words from rows of garbage into the reject blobs list. + **********************************************************************/ + +bool Textord::clean_noise_from_row( //remove empties + ROW* row //row to clean +) { + bool testing_on; + TBOX blob_box; //bounding box + C_BLOB *blob; //current blob + C_OUTLINE *outline; //current outline + WERD *word; //current word + int32_t blob_size; //biggest size + int32_t trans_count = 0; //no of transitions + int32_t trans_threshold; //noise tolerance + int32_t dot_count; //small objects + int32_t norm_count; //normal objects + int32_t super_norm_count; //real char-like + //words of row + WERD_IT word_it = row->word_list (); + C_BLOB_IT blob_it; //blob iterator + C_OUTLINE_IT out_it; //outline iterator + + testing_on = textord_test_y > row->base_line (textord_test_x) + && textord_show_blobs + && textord_test_y < row->base_line (textord_test_x) + row->x_height (); + dot_count = 0; + norm_count = 0; + super_norm_count = 0; + for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { + word = word_it.data (); //current word + //blobs in word + blob_it.set_to_list (word->cblob_list ()); + for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); + blob_it.forward ()) { + blob = blob_it.data (); + if (!word->flag (W_DONT_CHOP)) { + //get outlines + out_it.set_to_list (blob->out_list ()); + for (out_it.mark_cycle_pt (); !out_it.cycled_list (); + out_it.forward ()) { + outline = out_it.data (); + blob_box = outline->bounding_box (); + blob_size = + blob_box.width () > + blob_box.height ()? blob_box.width () : blob_box. + height(); + if (blob_size < textord_noise_sizelimit * row->x_height ()) + dot_count++; //count smal outlines + if (!outline->child ()->empty () + && blob_box.height () < + (1 + textord_noise_syfract) * row->x_height () + && blob_box.height () > + (1 - textord_noise_syfract) * row->x_height () + && blob_box.width () < + (1 + textord_noise_sxfract) * row->x_height () + && blob_box.width () > + (1 - textord_noise_sxfract) * row->x_height ()) + super_norm_count++; //count smal outlines + } + } + else + super_norm_count++; + blob_box = blob->bounding_box (); + blob_size = + blob_box.width () > + blob_box.height ()? blob_box.width () : blob_box.height (); + if (blob_size >= textord_noise_sizelimit * row->x_height () + && blob_size < row->x_height () * 2) { + trans_threshold = blob_size / textord_noise_sizefraction; + trans_count = blob->count_transitions (trans_threshold); + if (trans_count < textord_noise_translimit) + norm_count++; + } + else if (blob_box.height () > row->x_height () * 2 + && (!word_it.at_first () || !blob_it.at_first ())) + dot_count += 2; + if (testing_on) { + tprintf + ("Blob at (%d,%d) -> (%d,%d), ols=%d, tc=%d, bldiff=%g\n", + blob_box.left (), blob_box.bottom (), blob_box.right (), + blob_box.top (), blob->out_list ()->length (), trans_count, + blob_box.bottom () - row->base_line (blob_box.left ())); + } + } + } + if (textord_noise_debug) { + tprintf ("Row ending at (%d,%g):", + blob_box.right (), row->base_line (blob_box.right ())); + tprintf (" R=%g, dc=%d, nc=%d, %s\n", + norm_count > 0 ? static_cast<float>(dot_count) / norm_count : 9999, + dot_count, norm_count, + dot_count > norm_count * textord_noise_normratio + && dot_count > 2 ? "REJECTED" : "ACCEPTED"); + } + return super_norm_count < textord_noise_sncount + && dot_count > norm_count * textord_noise_rowratio && dot_count > 2; +} + +/********************************************************************** + * clean_noise_from_words + * + * Move blobs of words from rows of garbage into the reject blobs list. + **********************************************************************/ + +void Textord::clean_noise_from_words( //remove empties + ROW *row //row to clean + ) { + TBOX blob_box; //bounding box + C_BLOB *blob; //current blob + C_OUTLINE *outline; //current outline + WERD *word; //current word + int32_t blob_size; //biggest size + int32_t trans_count; //no of transitions + int32_t trans_threshold; //noise tolerance + int32_t dot_count; //small objects + int32_t norm_count; //normal objects + int32_t dud_words; //number discarded + int32_t ok_words; //number remaining + int32_t word_index; //current word + //words of row + WERD_IT word_it = row->word_list (); + C_BLOB_IT blob_it; //blob iterator + C_OUTLINE_IT out_it; //outline iterator + + ok_words = word_it.length (); + if (ok_words == 0 || textord_no_rejects) + return; + // was it chucked + std::vector<int8_t> word_dud(ok_words); + dud_words = 0; + ok_words = 0; + word_index = 0; + for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { + word = word_it.data (); //current word + dot_count = 0; + norm_count = 0; + //blobs in word + blob_it.set_to_list (word->cblob_list ()); + for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); + blob_it.forward ()) { + blob = blob_it.data (); + if (!word->flag (W_DONT_CHOP)) { + //get outlines + out_it.set_to_list (blob->out_list ()); + for (out_it.mark_cycle_pt (); !out_it.cycled_list (); + out_it.forward ()) { + outline = out_it.data (); + blob_box = outline->bounding_box (); + blob_size = + blob_box.width () > + blob_box.height ()? blob_box.width () : blob_box. + height(); + if (blob_size < textord_noise_sizelimit * row->x_height ()) + dot_count++; //count smal outlines + if (!outline->child ()->empty () + && blob_box.height () < + (1 + textord_noise_syfract) * row->x_height () + && blob_box.height () > + (1 - textord_noise_syfract) * row->x_height () + && blob_box.width () < + (1 + textord_noise_sxfract) * row->x_height () + && blob_box.width () > + (1 - textord_noise_sxfract) * row->x_height ()) + norm_count++; //count smal outlines + } + } + else + norm_count++; + blob_box = blob->bounding_box (); + blob_size = + blob_box.width () > + blob_box.height ()? blob_box.width () : blob_box.height (); + if (blob_size >= textord_noise_sizelimit * row->x_height () + && blob_size < row->x_height () * 2) { + trans_threshold = blob_size / textord_noise_sizefraction; + trans_count = blob->count_transitions (trans_threshold); + if (trans_count < textord_noise_translimit) + norm_count++; + } + else if (blob_box.height () > row->x_height () * 2 + && (!word_it.at_first () || !blob_it.at_first ())) + dot_count += 2; + } + if (dot_count > 2 && !word->flag(W_REP_CHAR)) { + if (dot_count > norm_count * textord_noise_normratio * 2) + word_dud[word_index] = 2; + else if (dot_count > norm_count * textord_noise_normratio) + word_dud[word_index] = 1; + else + word_dud[word_index] = 0; + } else { + word_dud[word_index] = 0; + } + if (word_dud[word_index] == 2) + dud_words++; + else + ok_words++; + word_index++; + } + + word_index = 0; + for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { + if (word_dud[word_index] == 2 + || (word_dud[word_index] == 1 && dud_words > ok_words)) { + word = word_it.data(); // Current word. + // Previously we threw away the entire word. + // Now just aggressively throw all small blobs into the reject list, where + // the classifier can decide whether they are actually needed. + word->CleanNoise(textord_noise_sizelimit * row->x_height()); + } + word_index++; + } +} + +// Remove outlines that are a tiny fraction in either width or height +// of the word height. +void Textord::clean_small_noise_from_words(ROW *row) { + WERD_IT word_it(row->word_list()); + for (word_it.mark_cycle_pt(); !word_it.cycled_list(); word_it.forward()) { + WERD* word = word_it.data(); + int min_size = static_cast<int>( + textord_noise_hfract * word->bounding_box().height() + 0.5); + C_BLOB_IT blob_it(word->cblob_list()); + for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { + C_BLOB* blob = blob_it.data(); + C_OUTLINE_IT out_it(blob->out_list()); + for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) { + C_OUTLINE* outline = out_it.data(); + outline->RemoveSmallRecursive(min_size, &out_it); + } + if (blob->out_list()->empty()) { + delete blob_it.extract(); + } + } + if (word->cblob_list()->empty()) { + if (!word_it.at_last()) { + // The next word is no longer a fuzzy non space if it was before, + // since the word before is about to be deleted. + WERD* next_word = word_it.data_relative(1); + if (next_word->flag(W_FUZZY_NON)) { + next_word->set_flag(W_FUZZY_NON, false); + } + } + delete word_it.extract(); + } + } +} + +// Local struct to hold a group of blocks. +struct BlockGroup { + BlockGroup() : rotation(1.0f, 0.0f), angle(0.0f), min_xheight(1.0f) {} + explicit BlockGroup(BLOCK* block) + : bounding_box(block->pdblk.bounding_box()), + rotation(block->re_rotation()), + angle(block->re_rotation().angle()), + min_xheight(block->x_height()) { + blocks.push_back(block); + } + // Union of block bounding boxes. + TBOX bounding_box; + // Common rotation of the blocks. + FCOORD rotation; + // Angle of rotation. + float angle; + // Min xheight of the blocks. + float min_xheight; + // Collection of borrowed pointers to the blocks in the group. + GenericVector<BLOCK*> blocks; +}; + +// Groups blocks by rotation, then, for each group, makes a WordGrid and calls +// TransferDiacriticsToWords to copy the diacritic blobs to the most +// appropriate words in the group of blocks. Source blobs are not touched. +void Textord::TransferDiacriticsToBlockGroups(BLOBNBOX_LIST* diacritic_blobs, + BLOCK_LIST* blocks) { + // Angle difference larger than this is too much to consider equal. + // They should only be in multiples of M_PI/2 anyway. + const double kMaxAngleDiff = 0.01; // About 0.6 degrees. + PointerVector<BlockGroup> groups; + BLOCK_IT bk_it(blocks); + for (bk_it.mark_cycle_pt(); !bk_it.cycled_list(); bk_it.forward()) { + BLOCK* block = bk_it.data(); + if (block->pdblk.poly_block() != nullptr && !block->pdblk.poly_block()->IsText()) { + continue; + } + // Linear search of the groups to find a matching rotation. + float block_angle = block->re_rotation().angle(); + int best_g = 0; + float best_angle_diff = FLT_MAX; + for (int g = 0; g < groups.size(); ++g) { + double angle_diff = fabs(block_angle - groups[g]->angle); + if (angle_diff > M_PI) angle_diff = fabs(angle_diff - 2.0 * M_PI); + if (angle_diff < best_angle_diff) { + best_angle_diff = angle_diff; + best_g = g; + } + } + if (best_angle_diff > kMaxAngleDiff) { + groups.push_back(new BlockGroup(block)); + } else { + groups[best_g]->blocks.push_back(block); + groups[best_g]->bounding_box += block->pdblk.bounding_box(); + float x_height = block->x_height(); + if (x_height < groups[best_g]->min_xheight) + groups[best_g]->min_xheight = x_height; + } + } + // Now process each group of blocks. + PointerVector<WordWithBox> word_ptrs; + for (int g = 0; g < groups.size(); ++g) { + const BlockGroup* group = groups[g]; + if (group->bounding_box.null_box()) continue; + WordGrid word_grid(group->min_xheight, group->bounding_box.botleft(), + group->bounding_box.topright()); + for (int b = 0; b < group->blocks.size(); ++b) { + ROW_IT row_it(group->blocks[b]->row_list()); + for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { + ROW* row = row_it.data(); + // Put the words of the row into the grid. + WERD_IT w_it(row->word_list()); + for (w_it.mark_cycle_pt(); !w_it.cycled_list(); w_it.forward()) { + WERD* word = w_it.data(); + auto* box_word = new WordWithBox(word); + word_grid.InsertBBox(true, true, box_word); + // Save the pointer where it will be auto-deleted. + word_ptrs.push_back(box_word); + } + } + } + FCOORD rotation = group->rotation; + // Make it a forward rotation that will transform blob coords to block. + rotation.set_y(-rotation.y()); + TransferDiacriticsToWords(diacritic_blobs, rotation, &word_grid); + } +} + +// Places a copy of blobs that are near a word (after applying rotation to the +// blob) in the most appropriate word, unless there is doubt, in which case a +// blob can end up in two words. Source blobs are not touched. +void Textord::TransferDiacriticsToWords(BLOBNBOX_LIST* diacritic_blobs, + const FCOORD& rotation, + WordGrid* word_grid) { + WordSearch ws(word_grid); + BLOBNBOX_IT b_it(diacritic_blobs); + // Apply rotation to each blob before finding the nearest words. The rotation + // allows us to only consider above/below placement and not left/right on + // vertical text, because all text is horizontal here. + for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) { + BLOBNBOX* blobnbox = b_it.data(); + TBOX blob_box = blobnbox->bounding_box(); + blob_box.rotate(rotation); + ws.StartRectSearch(blob_box); + // Above/below refer to word position relative to diacritic. Since some + // scripts eg Kannada/Telugu habitually put diacritics below words, and + // others eg Thai/Vietnamese/Latin put most diacritics above words, try + // for both if there isn't much in it. + WordWithBox* best_above_word = nullptr; + WordWithBox* best_below_word = nullptr; + int best_above_distance = 0; + int best_below_distance = 0; + for (WordWithBox* word = ws.NextRectSearch(); word != nullptr; + word = ws.NextRectSearch()) { + if (word->word()->flag(W_REP_CHAR)) continue; + TBOX word_box = word->true_bounding_box(); + int x_distance = blob_box.x_gap(word_box); + int y_distance = blob_box.y_gap(word_box); + if (x_distance > 0) { + // Arbitrarily divide x-distance by 2 if there is a major y overlap, + // and the word is to the left of the diacritic. If the + // diacritic is a dropped broken character between two words, this will + // help send all the pieces to a single word, instead of splitting them + // over the 2 words. + if (word_box.major_y_overlap(blob_box) && + blob_box.left() > word_box.right()) { + x_distance /= 2; + } + y_distance += x_distance; + } + if (word_box.y_middle() > blob_box.y_middle() && + (best_above_word == nullptr || y_distance < best_above_distance)) { + best_above_word = word; + best_above_distance = y_distance; + } + if (word_box.y_middle() <= blob_box.y_middle() && + (best_below_word == nullptr || y_distance < best_below_distance)) { + best_below_word = word; + best_below_distance = y_distance; + } + } + bool above_good = + best_above_word != nullptr && + (best_below_word == nullptr || + best_above_distance < best_below_distance + blob_box.height()); + bool below_good = + best_below_word != nullptr && best_below_word != best_above_word && + (best_above_word == nullptr || + best_below_distance < best_above_distance + blob_box.height()); + if (below_good) { + C_BLOB* copied_blob = C_BLOB::deep_copy(blobnbox->cblob()); + copied_blob->rotate(rotation); + // Put the blob into the word's reject blobs list. + C_BLOB_IT blob_it(best_below_word->RejBlobs()); + blob_it.add_to_end(copied_blob); + } + if (above_good) { + C_BLOB* copied_blob = C_BLOB::deep_copy(blobnbox->cblob()); + copied_blob->rotate(rotation); + // Put the blob into the word's reject blobs list. + C_BLOB_IT blob_it(best_above_word->RejBlobs()); + blob_it.add_to_end(copied_blob); + } + } +} + +/********************************************************************** + * tweak_row_baseline + * + * Shift baseline to fit the blobs more accurately where they are + * close enough. + **********************************************************************/ + +void tweak_row_baseline(ROW *row, + double blshift_maxshift, + double blshift_xfraction) { + TBOX blob_box; //bounding box + C_BLOB *blob; //current blob + WERD *word; //current word + int32_t blob_count; //no of blobs + int32_t src_index; //source segment + int32_t dest_index; //destination segment + float ydiff; //baseline error + float x_centre; //centre of blob + //words of row + WERD_IT word_it = row->word_list (); + C_BLOB_IT blob_it; //blob iterator + + blob_count = 0; + for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { + word = word_it.data (); //current word + //get total blobs + blob_count += word->cblob_list ()->length (); + } + if (blob_count == 0) + return; + // spline segments + std::vector<int32_t> xstarts(blob_count + row->baseline.segments + 1); + // spline coeffs + std::vector<double> coeffs((blob_count + row->baseline.segments) * 3); + + src_index = 0; + dest_index = 0; + xstarts[0] = row->baseline.xcoords[0]; + for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) { + word = word_it.data (); //current word + //blobs in word + blob_it.set_to_list (word->cblob_list ()); + for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); + blob_it.forward ()) { + blob = blob_it.data (); + blob_box = blob->bounding_box (); + x_centre = (blob_box.left () + blob_box.right ()) / 2.0; + ydiff = blob_box.bottom () - row->base_line (x_centre); + if (ydiff < 0) + ydiff = -ydiff / row->x_height (); + else + ydiff = ydiff / row->x_height (); + if (ydiff < blshift_maxshift + && blob_box.height () / row->x_height () > blshift_xfraction) { + if (xstarts[dest_index] >= x_centre) + xstarts[dest_index] = blob_box.left (); + coeffs[dest_index * 3] = 0; + coeffs[dest_index * 3 + 1] = 0; + coeffs[dest_index * 3 + 2] = blob_box.bottom (); + //shift it + dest_index++; + xstarts[dest_index] = blob_box.right () + 1; + } + else { + if (xstarts[dest_index] <= x_centre) { + while (row->baseline.xcoords[src_index + 1] <= x_centre + && src_index < row->baseline.segments - 1) { + if (row->baseline.xcoords[src_index + 1] > + xstarts[dest_index]) { + coeffs[dest_index * 3] = + row->baseline.quadratics[src_index].a; + coeffs[dest_index * 3 + 1] = + row->baseline.quadratics[src_index].b; + coeffs[dest_index * 3 + 2] = + row->baseline.quadratics[src_index].c; + dest_index++; + xstarts[dest_index] = + row->baseline.xcoords[src_index + 1]; + } + src_index++; + } + coeffs[dest_index * 3] = + row->baseline.quadratics[src_index].a; + coeffs[dest_index * 3 + 1] = + row->baseline.quadratics[src_index].b; + coeffs[dest_index * 3 + 2] = + row->baseline.quadratics[src_index].c; + dest_index++; + xstarts[dest_index] = row->baseline.xcoords[src_index + 1]; + } + } + } + } + while (src_index < row->baseline.segments + && row->baseline.xcoords[src_index + 1] <= xstarts[dest_index]) + src_index++; + while (src_index < row->baseline.segments) { + coeffs[dest_index * 3] = row->baseline.quadratics[src_index].a; + coeffs[dest_index * 3 + 1] = row->baseline.quadratics[src_index].b; + coeffs[dest_index * 3 + 2] = row->baseline.quadratics[src_index].c; + dest_index++; + src_index++; + xstarts[dest_index] = row->baseline.xcoords[src_index]; + } + //turn to spline + row->baseline = QSPLINE(dest_index, &xstarts[0], &coeffs[0]); +} + +} // namespace tesseract diff --git a/tesseract/src/textord/tordmain.h b/tesseract/src/textord/tordmain.h new file mode 100644 index 00000000..2a6e31fa --- /dev/null +++ b/tesseract/src/textord/tordmain.h @@ -0,0 +1,45 @@ +/********************************************************************** + * File: tordmain.h (Formerly textordp.h) + * Description: C++ top level textord code. + * Author: Ray Smith + * + * (C) Copyright 1992, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef TORDMAIN_H +#define TORDMAIN_H + +#include "params.h" +#include "ocrblock.h" +#include "blobs.h" +#include "blobbox.h" + +#include <ctime> + +struct Pix; + +namespace tesseract { + +class Tesseract; + +void SetBlobStrokeWidth(Pix* pix, BLOBNBOX* blob); +void assign_blobs_to_blocks2(Pix* pix, BLOCK_LIST *blocks, + TO_BLOCK_LIST *port_blocks); + +void tweak_row_baseline(ROW *row, + double blshift_maxshift, + double blshift_xfraction); + +} // namespace tesseract + +#endif diff --git a/tesseract/src/textord/tospace.cpp b/tesseract/src/textord/tospace.cpp new file mode 100644 index 00000000..6ab17a64 --- /dev/null +++ b/tesseract/src/textord/tospace.cpp @@ -0,0 +1,1894 @@ +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +/********************************************************************** + * tospace.cpp + * + * Compute fuzzy word spacing thresholds for each row. + * I.e. set : max_nonspace + * space_threshold + * min_space + * kern_size + * space_size + * for each row. + * ONLY FOR PROPORTIONAL BLOCKS - FIXED PITCH IS ASSUMED ALREADY DONE + * + * Note: functions in this file were originally not members of any + * class or enclosed by any namespace. Now they are all static members + * of the Textord class. + * + **********************************************************************/ + +#include "drawtord.h" +#include "statistc.h" +#include "textord.h" +#include "tovars.h" + +// Include automatically generated configuration file if running autoconf. +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#include <algorithm> +#include <memory> + +#define MAXSPACING 128 /*max expected spacing in pix */ + +namespace tesseract { +void Textord::to_spacing( + ICOORD page_tr, //topright of page + TO_BLOCK_LIST *blocks //blocks on page + ) { + TO_BLOCK_IT block_it; //iterator + TO_BLOCK *block; //current block; + TO_ROW *row; //current row + int block_index; //block number + int row_index; //row number + //estimated width of real spaces for whole block + int16_t block_space_gap_width; + //estimated width of non space gaps for whole block + int16_t block_non_space_gap_width; + bool old_text_ord_proportional;//old fixed/prop result + + block_it.set_to_list (blocks); + block_index = 1; + for (block_it.mark_cycle_pt (); !block_it.cycled_list (); + block_it.forward ()) { + block = block_it.data (); + std::unique_ptr<GAPMAP> gapmap(new GAPMAP (block)); //map of big vert gaps in blk + block_spacing_stats(block, + gapmap.get(), + old_text_ord_proportional, + block_space_gap_width, + block_non_space_gap_width); + // Make sure relative values of block-level space and non-space gap + // widths are reasonable. The ratio of 1:3 is also used in + // block_spacing_stats, to corrrect the block_space_gap_width + // Useful for arabic and hindi, when the non-space gap width is + // often over-estimated and should not be trusted. A similar ratio + // is found in block_spacing_stats. + if (tosp_old_to_method && tosp_old_to_constrain_sp_kn && + static_cast<float>(block_space_gap_width) / block_non_space_gap_width < 3.0) { + block_non_space_gap_width = static_cast<int16_t>(floor (block_space_gap_width / 3.0)); + } + // row iterator + TO_ROW_IT row_it(block->get_rows()); + row_index = 1; + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { + row = row_it.data (); + if ((row->pitch_decision == PITCH_DEF_PROP) || + (row->pitch_decision == PITCH_CORR_PROP)) { + if ((tosp_debug_level > 0) && !old_text_ord_proportional) + tprintf ("Block %d Row %d: Now Proportional\n", + block_index, row_index); + row_spacing_stats(row, + gapmap.get(), + block_index, + row_index, + block_space_gap_width, + block_non_space_gap_width); + } + else { + if ((tosp_debug_level > 0) && old_text_ord_proportional) + tprintf + ("Block %d Row %d: Now Fixed Pitch Decision:%d fp flag:%f\n", + block_index, row_index, row->pitch_decision, + row->fixed_pitch); + } +#ifndef GRAPHICS_DISABLED + if (textord_show_initial_words) + plot_word_decisions (to_win, static_cast<int16_t>(row->fixed_pitch), row); +#endif + row_index++; + } + block_index++; + } +} + + +/************************************************************************* + * block_spacing_stats() + *************************************************************************/ + +void Textord::block_spacing_stats( + TO_BLOCK* block, + GAPMAP* gapmap, + bool& old_text_ord_proportional, + int16_t& block_space_gap_width, // resulting estimate + int16_t& block_non_space_gap_width // resulting estimate +) { + TO_ROW *row; // current row + BLOBNBOX_IT blob_it; // iterator + + STATS centre_to_centre_stats (0, MAXSPACING); + // DEBUG USE ONLY + STATS all_gap_stats (0, MAXSPACING); + STATS space_gap_stats (0, MAXSPACING); + int16_t minwidth = MAXSPACING; // narrowest blob + TBOX blob_box; + TBOX prev_blob_box; + int16_t centre_to_centre; + int16_t gap_width; + float real_space_threshold; + float iqr_centre_to_centre; // DEBUG USE ONLY + float iqr_all_gap_stats; // DEBUG USE ONLY + int32_t end_of_row; + int32_t row_length; + + // row iterator + TO_ROW_IT row_it(block->get_rows()); + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { + row = row_it.data (); + if (!row->blob_list ()->empty () && + (!tosp_only_use_prop_rows || + (row->pitch_decision == PITCH_DEF_PROP) || + (row->pitch_decision == PITCH_CORR_PROP))) { + blob_it.set_to_list (row->blob_list ()); + blob_it.mark_cycle_pt (); + end_of_row = blob_it.data_relative (-1)->bounding_box ().right (); + if (tosp_use_pre_chopping) + blob_box = box_next_pre_chopped (&blob_it); + else if (tosp_stats_use_xht_gaps) + blob_box = reduced_box_next (row, &blob_it); + else + blob_box = box_next (&blob_it); + row_length = end_of_row - blob_box.left (); + if (blob_box.width () < minwidth) + minwidth = blob_box.width (); + prev_blob_box = blob_box; + while (!blob_it.cycled_list ()) { + if (tosp_use_pre_chopping) + blob_box = box_next_pre_chopped (&blob_it); + else if (tosp_stats_use_xht_gaps) + blob_box = reduced_box_next (row, &blob_it); + else + blob_box = box_next (&blob_it); + if (blob_box.width () < minwidth) + minwidth = blob_box.width (); + int16_t left = prev_blob_box.right(); + int16_t right = blob_box.left(); + gap_width = right - left; + if (!ignore_big_gap(row, row_length, gapmap, left, right)) { + all_gap_stats.add (gap_width, 1); + + centre_to_centre = (right + blob_box.right () - + (prev_blob_box.left () + left)) / 2; + //DEBUG + centre_to_centre_stats.add (centre_to_centre, 1); + // DEBUG + } + prev_blob_box = blob_box; + } + } + } + + //Inadequate samples + if (all_gap_stats.get_total () <= 1) { + block_non_space_gap_width = minwidth; + block_space_gap_width = -1; //No est. space width + //DEBUG + old_text_ord_proportional = true; + } + else { + /* For debug only ..... */ + iqr_centre_to_centre = centre_to_centre_stats.ile (0.75) - + centre_to_centre_stats.ile (0.25); + iqr_all_gap_stats = all_gap_stats.ile (0.75) - all_gap_stats.ile (0.25); + old_text_ord_proportional = + iqr_centre_to_centre * 2 > iqr_all_gap_stats; + /* .......For debug only */ + + /* + The median of the gaps is used as an estimate of the NON-SPACE gap width. + This RELIES on the assumption that there are more gaps WITHIN words than + BETWEEN words in a block + + Now try to estimate the width of a real space for all real spaces in the + block. Do this by using a crude threshold to ignore "narrow" gaps, then + find the median of the "wide" gaps and use this. + */ + block_non_space_gap_width = static_cast<int16_t>(floor (all_gap_stats.median ())); + // median gap + + row_it.set_to_list (block->get_rows ()); + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { + row = row_it.data (); + if (!row->blob_list ()->empty () && + (!tosp_only_use_prop_rows || + (row->pitch_decision == PITCH_DEF_PROP) || + (row->pitch_decision == PITCH_CORR_PROP))) { + real_space_threshold = + std::max (tosp_init_guess_kn_mult * block_non_space_gap_width, + tosp_init_guess_xht_mult * row->xheight); + blob_it.set_to_list (row->blob_list ()); + blob_it.mark_cycle_pt (); + end_of_row = + blob_it.data_relative (-1)->bounding_box ().right (); + if (tosp_use_pre_chopping) + blob_box = box_next_pre_chopped (&blob_it); + else if (tosp_stats_use_xht_gaps) + blob_box = reduced_box_next (row, &blob_it); + else + blob_box = box_next (&blob_it); + row_length = blob_box.left () - end_of_row; + prev_blob_box = blob_box; + while (!blob_it.cycled_list ()) { + if (tosp_use_pre_chopping) + blob_box = box_next_pre_chopped (&blob_it); + else if (tosp_stats_use_xht_gaps) + blob_box = reduced_box_next (row, &blob_it); + else + blob_box = box_next (&blob_it); + int16_t left = prev_blob_box.right(); + int16_t right = blob_box.left(); + gap_width = right - left; + if ((gap_width > real_space_threshold) && + !ignore_big_gap(row, row_length, gapmap, left, right)) { + /* + If tosp_use_cert_spaces is enabled, the estimate of the space gap is + restricted to obvious spaces - those wider than half the xht or those + with wide blobs on both sides - i.e not things that are suspect 1's or + punctuation that is sometimes widely spaced. + */ + if (!tosp_block_use_cert_spaces || + (gap_width > + tosp_fuzzy_space_factor2 * row->xheight) + || + ((gap_width > + tosp_fuzzy_space_factor1 * row->xheight) + && (!tosp_narrow_blobs_not_cert + || (!narrow_blob (row, prev_blob_box) + && !narrow_blob (row, blob_box)))) + || (wide_blob (row, prev_blob_box) + && wide_blob (row, blob_box))) + space_gap_stats.add (gap_width, 1); + } + prev_blob_box = blob_box; + } + } + } + //Inadequate samples + if (space_gap_stats.get_total () <= 2) + block_space_gap_width = -1;//No est. space width + else + block_space_gap_width = + std::max(static_cast<int16_t>(floor(space_gap_stats.median())), + static_cast<int16_t>(3 * block_non_space_gap_width)); + } +} + + +/************************************************************************* + * row_spacing_stats() + * Set values for min_space, max_non_space based on row stats only + * If failure - return 0 values. + *************************************************************************/ +void Textord::row_spacing_stats( + TO_ROW *row, + GAPMAP *gapmap, + int16_t block_idx, + int16_t row_idx, + int16_t block_space_gap_width, //estimate for block + int16_t block_non_space_gap_width //estimate for block + ) { + //iterator + BLOBNBOX_IT blob_it = row->blob_list (); + STATS all_gap_stats (0, MAXSPACING); + STATS cert_space_gap_stats (0, MAXSPACING); + STATS all_space_gap_stats (0, MAXSPACING); + STATS small_gap_stats (0, MAXSPACING); + TBOX blob_box; + TBOX prev_blob_box; + int16_t gap_width; + int16_t real_space_threshold = 0; + int16_t max = 0; + int16_t index; + int16_t large_gap_count = 0; + bool suspected_table; + int32_t max_max_nonspace; //upper bound + bool good_block_space_estimate = block_space_gap_width > 0; + int32_t end_of_row; + int32_t row_length = 0; + float sane_space; + int32_t sane_threshold; + + /* Collect first pass stats for row */ + + if (!good_block_space_estimate) + block_space_gap_width = int16_t (floor (row->xheight / 2)); + if (!row->blob_list ()->empty ()) { + if (tosp_threshold_bias1 > 0) + real_space_threshold = + block_non_space_gap_width + + int16_t (floor (0.5 + + tosp_threshold_bias1 * (block_space_gap_width - + block_non_space_gap_width))); + else + real_space_threshold = //Old TO method + (block_space_gap_width + block_non_space_gap_width) / 2; + blob_it.set_to_list (row->blob_list ()); + blob_it.mark_cycle_pt (); + end_of_row = blob_it.data_relative (-1)->bounding_box ().right (); + if (tosp_use_pre_chopping) + blob_box = box_next_pre_chopped (&blob_it); + else if (tosp_stats_use_xht_gaps) + blob_box = reduced_box_next (row, &blob_it); + else + blob_box = box_next (&blob_it); + row_length = end_of_row - blob_box.left (); + prev_blob_box = blob_box; + while (!blob_it.cycled_list ()) { + if (tosp_use_pre_chopping) + blob_box = box_next_pre_chopped (&blob_it); + else if (tosp_stats_use_xht_gaps) + blob_box = reduced_box_next (row, &blob_it); + else + blob_box = box_next (&blob_it); + int16_t left = prev_blob_box.right(); + int16_t right = blob_box.left(); + gap_width = right - left; + if (ignore_big_gap(row, row_length, gapmap, left, right)) { + large_gap_count++; + } else { + if (gap_width >= real_space_threshold) { + if (!tosp_row_use_cert_spaces || + (gap_width > tosp_fuzzy_space_factor2 * row->xheight) || + ((gap_width > tosp_fuzzy_space_factor1 * row->xheight) + && (!tosp_narrow_blobs_not_cert + || (!narrow_blob (row, prev_blob_box) + && !narrow_blob (row, blob_box)))) + || (wide_blob (row, prev_blob_box) + && wide_blob (row, blob_box))) + cert_space_gap_stats.add (gap_width, 1); + all_space_gap_stats.add (gap_width, 1); + } + else + small_gap_stats.add (gap_width, 1); + all_gap_stats.add (gap_width, 1); + } + prev_blob_box = blob_box; + } + } + suspected_table = (large_gap_count > 1) || + ((large_gap_count > 0) && + (all_gap_stats.get_total () <= tosp_few_samples)); + + /* Now determine row kern size, space size and threshold */ + + if ((cert_space_gap_stats.get_total () >= + tosp_enough_space_samples_for_median) || + ((suspected_table || + all_gap_stats.get_total () <= tosp_short_row) && + cert_space_gap_stats.get_total () > 0)) { + old_to_method(row, + &all_gap_stats, + &cert_space_gap_stats, + &small_gap_stats, + block_space_gap_width, + block_non_space_gap_width); + } else { + if (!tosp_recovery_isolated_row_stats || + !isolated_row_stats (row, gapmap, &all_gap_stats, suspected_table, + block_idx, row_idx)) { + if (tosp_row_use_cert_spaces && (tosp_debug_level > 5)) + tprintf ("B:%d R:%d -- Inadequate certain spaces.\n", + block_idx, row_idx); + if (tosp_row_use_cert_spaces1 && good_block_space_estimate) { + //Use block default + row->space_size = block_space_gap_width; + if (all_gap_stats.get_total () > tosp_redo_kern_limit) + row->kern_size = all_gap_stats.median (); + else + row->kern_size = block_non_space_gap_width; + row->space_threshold = + int32_t (floor ((row->space_size + row->kern_size) / + tosp_old_sp_kn_th_factor)); + } + else + old_to_method(row, + &all_gap_stats, + &all_space_gap_stats, + &small_gap_stats, + block_space_gap_width, + block_non_space_gap_width); + } + } + + if (tosp_improve_thresh && !suspected_table) + improve_row_threshold(row, &all_gap_stats); + + /* Now lets try to be careful not to do anything silly with tables when we + are ignoring big gaps*/ + if (tosp_sanity_method == 0) { + if (suspected_table && + (row->space_size < tosp_table_kn_sp_ratio * row->kern_size)) { + if (tosp_debug_level > 5) + tprintf("B:%d R:%d -- DON'T BELIEVE SPACE %3.2f %d %3.2f.\n", block_idx, + row_idx, row->kern_size, row->space_threshold, row->space_size); + row->space_threshold = + static_cast<int32_t>(tosp_table_kn_sp_ratio * row->kern_size); + row->space_size = std::max(row->space_threshold + 1.0f, row->xheight); + } + } + else if (tosp_sanity_method == 1) { + sane_space = row->space_size; + /* NEVER let space size get too close to kern size */ + if ((row->space_size < tosp_min_sane_kn_sp * std::max(row->kern_size, 2.5f)) + || ((row->space_size - row->kern_size) < + (tosp_silly_kn_sp_gap * row->xheight))) { + if (good_block_space_estimate && + (block_space_gap_width >= tosp_min_sane_kn_sp * row->kern_size)) + sane_space = block_space_gap_width; + else + sane_space = + std::max(static_cast<float>(tosp_min_sane_kn_sp) * std::max(row->kern_size, 2.5f), + row->xheight / 2.0f); + if (tosp_debug_level > 5) + tprintf("B:%d R:%d -- DON'T BELIEVE SPACE %3.2f %d %3.2f -> %3.2f.\n", + block_idx, row_idx, row->kern_size, row->space_threshold, + row->space_size, sane_space); + row->space_size = sane_space; + row->space_threshold = + int32_t (floor ((row->space_size + row->kern_size) / + tosp_old_sp_kn_th_factor)); + } + /* NEVER let threshold get VERY far away from kern */ + sane_threshold = int32_t (floor (tosp_max_sane_kn_thresh * + std::max(row->kern_size, 2.5f))); + if (row->space_threshold > sane_threshold) { + if (tosp_debug_level > 5) + tprintf("B:%d R:%d -- DON'T BELIEVE THRESH %3.2f %d %3.2f->%d.\n", + block_idx, row_idx, row->kern_size, row->space_threshold, + row->space_size, sane_threshold); + row->space_threshold = sane_threshold; + if (row->space_size <= sane_threshold) + row->space_size = row->space_threshold + 1.0f; + } + /* Beware of tables - there may be NO spaces */ + if (suspected_table) { + sane_space = std::max(tosp_table_kn_sp_ratio * row->kern_size, + tosp_table_xht_sp_ratio * row->xheight); + sane_threshold = int32_t (floor ((sane_space + row->kern_size) / 2)); + + if ((row->space_size < sane_space) || + (row->space_threshold < sane_threshold)) { + if (tosp_debug_level > 5) + tprintf ("B:%d R:%d -- SUSPECT NO SPACES %3.2f %d %3.2f.\n", + block_idx, row_idx, + row->kern_size, + row->space_threshold, row->space_size); + //the minimum sane value + row->space_threshold = static_cast<int32_t>(sane_space); + row->space_size = std::max(row->space_threshold + 1.0f, row->xheight); + } + } + } + + /* Now lets try to put some error limits on the threshold */ + + if (tosp_old_to_method) { + /* Old textord made a space if gap >= threshold */ + //NO FUZZY SPACES YET + row->max_nonspace = row->space_threshold; + //NO FUZZY SPACES YET + row->min_space = row->space_threshold + 1; + } + else { + /* Any gap greater than 0.6 x-ht is bound to be a space (isn't it:-) */ + row->min_space = + std::min(int32_t (ceil (tosp_fuzzy_space_factor * row->xheight)), + int32_t (row->space_size)); + if (row->min_space <= row->space_threshold) + // Don't be silly + row->min_space = row->space_threshold + 1; + /* + Lets try to guess the max certain kern gap by looking at the cluster of + kerns for the row. The row is proportional so the kerns should cluster + tightly at the bottom of the distribution. We also expect most gaps to be + kerns. Find the maximum of the kern piles between 0 and twice the kern + estimate. Piles before the first one with less than 1/10 the maximum + number of samples can be taken as certain kerns. + + Of course, there are some cases where the kern peak and space peaks merge, + so we will put an UPPER limit on the max certain kern gap of some fraction + below the threshold. + */ + + max_max_nonspace = int32_t ((row->space_threshold + row->kern_size) / 2); + + //default + row->max_nonspace = max_max_nonspace; + for (index = 0; index <= max_max_nonspace; index++) { + if (all_gap_stats.pile_count (index) > max) + max = all_gap_stats.pile_count (index); + if ((index > row->kern_size) && + (all_gap_stats.pile_count (index) < 0.1 * max)) { + row->max_nonspace = index; + break; + } + } + } + + /* Yet another algorithm - simpler this time - just choose a fraction of the + threshold to space range */ + + if ((tosp_fuzzy_sp_fraction > 0) && + (row->space_size > row->space_threshold)) + row->min_space = std::max(row->min_space, + static_cast<int32_t>(ceil (row->space_threshold + + tosp_fuzzy_sp_fraction * + (row->space_size - + row->space_threshold)))); + + /* Ensure that ANY space less than some multiplier times the kern size is + fuzzy. In tables there is a risk of erroneously setting a small space size + when there are no real spaces. Sometimes tables have text squashed into + columns so that the kn->sp ratio is small anyway - this means that we can't + use this to force a wider separation - hence we rely on context to join any + dubious breaks. */ + + if ((tosp_table_fuzzy_kn_sp_ratio > 0) && + (suspected_table || tosp_fuzzy_limit_all)) + row->min_space = std::max(row->min_space, + static_cast<int32_t>(ceil (tosp_table_fuzzy_kn_sp_ratio * + row->kern_size))); + + if ((tosp_fuzzy_kn_fraction > 0) && (row->kern_size < row->space_threshold)) { + row->max_nonspace = static_cast<int32_t>(floor (0.5 + row->kern_size + + tosp_fuzzy_kn_fraction * + (row->space_threshold - + row->kern_size))); + } + if (row->max_nonspace > row->space_threshold) { + // Don't be silly + row->max_nonspace = row->space_threshold; + } + + if (tosp_debug_level > 5) + tprintf + ("B:%d R:%d L:%d-- Kn:%d Sp:%d Thr:%d -- Kn:%3.2f (%d) Thr:%d (%d) Sp:%3.2f\n", + block_idx, row_idx, row_length, block_non_space_gap_width, + block_space_gap_width, real_space_threshold, row->kern_size, + row->max_nonspace, row->space_threshold, row->min_space, + row->space_size); + if (tosp_debug_level > 10) + tprintf("row->kern_size = %3.2f, row->space_size = %3.2f, " + "row->space_threshold = %d\n", + row->kern_size, row->space_size, row->space_threshold); +} + +void Textord::old_to_method( + TO_ROW *row, + STATS *all_gap_stats, + STATS *space_gap_stats, + STATS *small_gap_stats, + int16_t block_space_gap_width, //estimate for block + int16_t block_non_space_gap_width //estimate for block + ) { + /* First, estimate row space size */ + /* Old to condition was > 2 */ + if (space_gap_stats->get_total () >= tosp_enough_space_samples_for_median) { + //Adequate samples + /* Set space size to median of spaces BUT limits it if it seems wildly out */ + row->space_size = space_gap_stats->median (); + if (row->space_size > block_space_gap_width * 1.5) { + if (tosp_old_to_bug_fix) + row->space_size = block_space_gap_width * 1.5; + else + //BUG??? should be *1.5 + row->space_size = block_space_gap_width; + } + if (row->space_size < (block_non_space_gap_width * 2) + 1) + row->space_size = (block_non_space_gap_width * 2) + 1; + } + //Only 1 or 2 samples + else if (space_gap_stats->get_total () >= 1) { + //hence mean not median + row->space_size = space_gap_stats->mean (); + if (row->space_size > block_space_gap_width * 1.5) { + if (tosp_old_to_bug_fix) + row->space_size = block_space_gap_width * 1.5; + else + //BUG??? should be *1.5 + row->space_size = block_space_gap_width; + } + if (row->space_size < (block_non_space_gap_width * 3) + 1) + row->space_size = (block_non_space_gap_width * 3) + 1; + } + else { + //Use block default + row->space_size = block_space_gap_width; + } + + /* Next, estimate row kern size */ + if ((tosp_only_small_gaps_for_kern) && + (small_gap_stats->get_total () > tosp_redo_kern_limit)) + row->kern_size = small_gap_stats->median (); + else if (all_gap_stats->get_total () > tosp_redo_kern_limit) + row->kern_size = all_gap_stats->median (); + else //old TO -SAME FOR ALL ROWS + row->kern_size = block_non_space_gap_width; + + /* Finally, estimate row space threshold */ + if (tosp_threshold_bias2 > 0) { + row->space_threshold = + int32_t (floor (0.5 + row->kern_size + + tosp_threshold_bias2 * (row->space_size - + row->kern_size))); + } else { + /* + NOTE old text ord uses (space_size + kern_size + 1)/2 as the threshold + and holds this in a float. The use is with a >= test + NEW textord uses an integer threshold and a > test + It comes to the same thing. + (Though there is a difference in that old textor has integer space_size + and kern_size.) + */ + row->space_threshold = + int32_t (floor ((row->space_size + row->kern_size) / 2)); + } + + // Apply the same logic and ratios as in row_spacing_stats to + // restrict relative values of the row's space_size, kern_size, and + // space_threshold + if (tosp_old_to_constrain_sp_kn && tosp_sanity_method == 1 && + ((row->space_size < + tosp_min_sane_kn_sp * std::max(row->kern_size, 2.5f)) || + ((row->space_size - row->kern_size) < + tosp_silly_kn_sp_gap * row->xheight))) { + if (row->kern_size > 2.5) + row->kern_size = row->space_size / tosp_min_sane_kn_sp; + row->space_threshold = int32_t (floor ((row->space_size + row->kern_size) / + tosp_old_sp_kn_th_factor)); + } +} + + +/************************************************************************* + * isolated_row_stats() + * Set values for min_space, max_non_space based on row stats only + *************************************************************************/ +bool Textord::isolated_row_stats(TO_ROW* row, + GAPMAP* gapmap, + STATS* all_gap_stats, + bool suspected_table, + int16_t block_idx, + int16_t row_idx) { + float kern_estimate; + float crude_threshold_estimate; + int16_t small_gaps_count; + int16_t total; + //iterator + BLOBNBOX_IT blob_it = row->blob_list (); + STATS cert_space_gap_stats (0, MAXSPACING); + STATS all_space_gap_stats (0, MAXSPACING); + STATS small_gap_stats (0, MAXSPACING); + TBOX blob_box; + TBOX prev_blob_box; + int16_t gap_width; + int32_t end_of_row; + int32_t row_length; + + kern_estimate = all_gap_stats->median (); + crude_threshold_estimate = std::max(tosp_init_guess_kn_mult * kern_estimate, + tosp_init_guess_xht_mult * row->xheight); + small_gaps_count = stats_count_under (all_gap_stats, + static_cast<int16_t>(ceil (crude_threshold_estimate))); + total = all_gap_stats->get_total (); + + if ((total <= tosp_redo_kern_limit) || + ((small_gaps_count / static_cast<float>(total)) < tosp_enough_small_gaps) || + (total - small_gaps_count < 1)) { + if (tosp_debug_level > 5) + tprintf("B:%d R:%d -- Can't do isolated row stats.\n", block_idx, + row_idx); + return false; + } + blob_it.set_to_list (row->blob_list ()); + blob_it.mark_cycle_pt (); + end_of_row = blob_it.data_relative (-1)->bounding_box ().right (); + if (tosp_use_pre_chopping) + blob_box = box_next_pre_chopped (&blob_it); + else if (tosp_stats_use_xht_gaps) + blob_box = reduced_box_next (row, &blob_it); + else + blob_box = box_next (&blob_it); + row_length = end_of_row - blob_box.left (); + prev_blob_box = blob_box; + while (!blob_it.cycled_list ()) { + if (tosp_use_pre_chopping) + blob_box = box_next_pre_chopped (&blob_it); + else if (tosp_stats_use_xht_gaps) + blob_box = reduced_box_next (row, &blob_it); + else + blob_box = box_next (&blob_it); + int16_t left = prev_blob_box.right(); + int16_t right = blob_box.left(); + gap_width = right - left; + if (!ignore_big_gap(row, row_length, gapmap, left, right) && + (gap_width > crude_threshold_estimate)) { + if ((gap_width > tosp_fuzzy_space_factor2 * row->xheight) || + ((gap_width > tosp_fuzzy_space_factor1 * row->xheight) && + (!tosp_narrow_blobs_not_cert || + (!narrow_blob (row, prev_blob_box) && + !narrow_blob (row, blob_box)))) || + (wide_blob (row, prev_blob_box) && wide_blob (row, blob_box))) + cert_space_gap_stats.add (gap_width, 1); + all_space_gap_stats.add (gap_width, 1); + } + if (gap_width < crude_threshold_estimate) + small_gap_stats.add (gap_width, 1); + + prev_blob_box = blob_box; + } + if (cert_space_gap_stats.get_total () >= + tosp_enough_space_samples_for_median) + //median + row->space_size = cert_space_gap_stats.median (); + else if (suspected_table && (cert_space_gap_stats.get_total () > 0)) + //to avoid spaced + row->space_size = cert_space_gap_stats.mean (); + // 1's in tables + else if (all_space_gap_stats.get_total () >= + tosp_enough_space_samples_for_median) + //median + row->space_size = all_space_gap_stats.median (); + else + row->space_size = all_space_gap_stats.mean (); + + if (tosp_only_small_gaps_for_kern) + row->kern_size = small_gap_stats.median (); + else + row->kern_size = all_gap_stats->median (); + row->space_threshold = + int32_t (floor ((row->space_size + row->kern_size) / 2)); + /* Sanity check */ + if ((row->kern_size >= row->space_threshold) || + (row->space_threshold >= row->space_size) || + (row->space_threshold <= 0)) { + if (tosp_debug_level > 5) + tprintf ("B:%d R:%d -- Isolated row stats SANITY FAILURE: %f %d %f\n", + block_idx, row_idx, + row->kern_size, row->space_threshold, row->space_size); + row->kern_size = 0.0f; + row->space_threshold = 0; + row->space_size = 0.0f; + return false; + } + + if (tosp_debug_level > 5) + tprintf ("B:%d R:%d -- Isolated row stats: %f %d %f\n", + block_idx, row_idx, + row->kern_size, row->space_threshold, row->space_size); + return true; +} + +int16_t Textord::stats_count_under(STATS *stats, int16_t threshold) { + int16_t index; + int16_t total = 0; + + for (index = 0; index < threshold; index++) + total += stats->pile_count (index); + return total; +} + + +/************************************************************************* + * improve_row_threshold() + * Try to recognise a "normal line" - + * > 25 gaps + * && space > 3 * kn && space > 10 + * (I.e. reasonably large space and kn:sp ratio) + * && > 3/4 # gaps < kn + (sp - kn)/3 + * (I.e. most gaps are well away from space estimate) + * && a gap of max(3, (sp - kn) / 3) empty histogram positions is found + * somewhere in the histogram between kn and sp + * THEN set the threshold and fuzzy limits to this gap - ie NO fuzzies + * NO!!!!! the bristol line has "11" with a gap of 12 between the 1's!!! + * try moving the default threshold to within this band but leave the + * fuzzy limit calculation as at present. + *************************************************************************/ +void Textord::improve_row_threshold(TO_ROW *row, STATS *all_gap_stats) { + float sp = row->space_size; + float kn = row->kern_size; + int16_t reqd_zero_width = 0; + int16_t zero_width = 0; + int16_t zero_start = 0; + int16_t index = 0; + + if (tosp_debug_level > 10) + tprintf ("Improve row threshold 0"); + if ((all_gap_stats->get_total () <= 25) || + (sp <= 10) || + (sp <= 3 * kn) || + (stats_count_under (all_gap_stats, + static_cast<int16_t>(ceil (kn + (sp - kn) / 3 + 0.5))) < + (0.75 * all_gap_stats->get_total ()))) + return; + if (tosp_debug_level > 10) + tprintf (" 1"); + /* + Look for the first region of all 0's in the histogram which is wider than + max(3, (sp - kn) / 3) and starts between kn and sp. If found, and current + threshold is not within it, move the threshold so that is is just inside it. + */ + reqd_zero_width = static_cast<int16_t>(floor ((sp - kn) / 3 + 0.5)); + if (reqd_zero_width < 3) + reqd_zero_width = 3; + + for (index = int16_t (ceil (kn)); index < int16_t (floor (sp)); index++) { + if (all_gap_stats->pile_count (index) == 0) { + if (zero_width == 0) + zero_start = index; + zero_width++; + } + else { + if (zero_width >= reqd_zero_width) + break; + else { + zero_width = 0; + } + } + } + index--; + if (tosp_debug_level > 10) + tprintf (" reqd_z_width: %d found %d 0's, starting %d; thresh: %d/n", + reqd_zero_width, zero_width, zero_start, row->space_threshold); + if ((zero_width < reqd_zero_width) || + ((row->space_threshold >= zero_start) && + (row->space_threshold <= index))) + return; + if (tosp_debug_level > 10) + tprintf (" 2"); + if (row->space_threshold < zero_start) { + if (tosp_debug_level > 5) + tprintf + ("Improve row kn:%5.2f sp:%5.2f 0's: %d -> %d thresh:%d -> %d\n", + kn, sp, zero_start, index, row->space_threshold, zero_start); + row->space_threshold = zero_start; + } + if (row->space_threshold > index) { + if (tosp_debug_level > 5) + tprintf + ("Improve row kn:%5.2f sp:%5.2f 0's: %d -> %d thresh:%d -> %d\n", + kn, sp, zero_start, index, row->space_threshold, index); + row->space_threshold = index; + } +} + + +/********************************************************************** + * make_prop_words + * + * Convert a TO_ROW to a ROW. + **********************************************************************/ +ROW *Textord::make_prop_words( + TO_ROW *row, // row to make + FCOORD rotation // for drawing + ) { + bool bol; // start of line + /* prev_ values are for start of word being built. non prev_ values are for + the gap between the word being built and the next one. */ + bool prev_fuzzy_sp; // probably space + bool prev_fuzzy_non; // probably not + uint8_t prev_blanks; // in front of word + bool fuzzy_sp = false; // probably space + bool fuzzy_non = false; // probably not + uint8_t blanks = 0; // in front of word + bool prev_gap_was_a_space = false; + bool break_at_next_gap = false; + ROW *real_row; // output row + C_OUTLINE_IT cout_it; + C_BLOB_LIST cblobs; + C_BLOB_IT cblob_it = &cblobs; + WERD_LIST words; + WERD *word; // new word + int32_t next_rep_char_word_right = INT32_MAX; + float repetition_spacing; // gap between repetitions + int32_t xstarts[2]; // row ends + int32_t prev_x; // end of prev blob + BLOBNBOX *bblob; // current blob + TBOX blob_box; // bounding box + BLOBNBOX_IT box_it; // iterator + TBOX prev_blob_box; + TBOX next_blob_box; + int16_t prev_gap = INT16_MAX; + int16_t current_gap = INT16_MAX; + int16_t next_gap = INT16_MAX; + int16_t prev_within_xht_gap = INT16_MAX; + int16_t current_within_xht_gap = INT16_MAX; + int16_t next_within_xht_gap = INT16_MAX; + int16_t word_count = 0; + + // repeated char words + WERD_IT rep_char_it(&(row->rep_words)); + if (!rep_char_it.empty ()) { + next_rep_char_word_right = + rep_char_it.data ()->bounding_box ().right (); + } + + prev_x = -INT16_MAX; + cblob_it.set_to_list (&cblobs); + box_it.set_to_list (row->blob_list ()); + // new words + WERD_IT word_it(&words); + bol = true; + prev_blanks = 0; + prev_fuzzy_sp = false; + prev_fuzzy_non = false; + if (!box_it.empty ()) { + xstarts[0] = box_it.data ()->bounding_box ().left (); + if (xstarts[0] > next_rep_char_word_right) { + /* We need to insert a repeated char word at the start of the row */ + word = rep_char_it.extract (); + word_it.add_after_then_move (word); + /* Set spaces before repeated char word */ + word->set_flag (W_BOL, true); + bol = false; + word->set_blanks (0); + //NO uncertainty + word->set_flag (W_FUZZY_SP, false); + word->set_flag (W_FUZZY_NON, false); + xstarts[0] = word->bounding_box ().left (); + /* Set spaces after repeated char word (and leave current word set) */ + repetition_spacing = find_mean_blob_spacing (word); + current_gap = box_it.data ()->bounding_box ().left () - + next_rep_char_word_right; + current_within_xht_gap = current_gap; + if (current_gap > tosp_rep_space * repetition_spacing) { + prev_blanks = static_cast<uint8_t>(floor (current_gap / row->space_size)); + if (prev_blanks < 1) + prev_blanks = 1; + } + else + prev_blanks = 0; + if (tosp_debug_level > 5) + tprintf ("Repch wd at BOL(%d, %d). rep spacing %5.2f; Rgap:%d ", + box_it.data ()->bounding_box ().left (), + box_it.data ()->bounding_box ().bottom (), + repetition_spacing, current_gap); + prev_fuzzy_sp = false; + prev_fuzzy_non = false; + if (rep_char_it.empty ()) { + next_rep_char_word_right = INT32_MAX; + } + else { + rep_char_it.forward (); + next_rep_char_word_right = + rep_char_it.data ()->bounding_box ().right (); + } + } + + peek_at_next_gap(row, + box_it, + next_blob_box, + next_gap, + next_within_xht_gap); + do { + bblob = box_it.data (); + blob_box = bblob->bounding_box (); + if (bblob->joined_to_prev ()) { + if (bblob->cblob () != nullptr) { + cout_it.set_to_list (cblob_it.data ()->out_list ()); + cout_it.move_to_last (); + cout_it.add_list_after (bblob->cblob ()->out_list ()); + delete bblob->cblob (); + } + } else { + if (bblob->cblob() != nullptr) + cblob_it.add_after_then_move (bblob->cblob ()); + prev_x = blob_box.right (); + } + box_it.forward (); //next one + bblob = box_it.data (); + blob_box = bblob->bounding_box (); + + if (!bblob->joined_to_prev() && bblob->cblob() != nullptr) { + /* Real Blob - not multiple outlines or pre-chopped */ + prev_gap = current_gap; + prev_within_xht_gap = current_within_xht_gap; + prev_blob_box = next_blob_box; + current_gap = next_gap; + current_within_xht_gap = next_within_xht_gap; + peek_at_next_gap(row, + box_it, + next_blob_box, + next_gap, + next_within_xht_gap); + + int16_t prev_gap_arg = prev_gap; + int16_t next_gap_arg = next_gap; + if (tosp_only_use_xht_gaps) { + prev_gap_arg = prev_within_xht_gap; + next_gap_arg = next_within_xht_gap; + } + // Decide if a word-break should be inserted + if (blob_box.left () > next_rep_char_word_right || + make_a_word_break(row, blob_box, prev_gap_arg, prev_blob_box, + current_gap, current_within_xht_gap, + next_blob_box, next_gap_arg, + blanks, fuzzy_sp, fuzzy_non, + prev_gap_was_a_space, + break_at_next_gap) || + box_it.at_first()) { + /* Form a new word out of the blobs collected */ + word = new WERD (&cblobs, prev_blanks, nullptr); + word_count++; + word_it.add_after_then_move (word); + if (bol) { + word->set_flag (W_BOL, true); + bol = false; + } + if (prev_fuzzy_sp) + //probably space + word->set_flag (W_FUZZY_SP, true); + else if (prev_fuzzy_non) + word->set_flag (W_FUZZY_NON, true); + //probably not + + if (blob_box.left () > next_rep_char_word_right) { + /* We need to insert a repeated char word */ + word = rep_char_it.extract (); + word_it.add_after_then_move (word); + + /* Set spaces before repeated char word */ + repetition_spacing = find_mean_blob_spacing (word); + current_gap = word->bounding_box ().left () - prev_x; + current_within_xht_gap = current_gap; + if (current_gap > tosp_rep_space * repetition_spacing) { + blanks = + static_cast<uint8_t>(floor (current_gap / row->space_size)); + if (blanks < 1) + blanks = 1; + } + else + blanks = 0; + if (tosp_debug_level > 5) + tprintf + ("Repch wd (%d,%d) rep gap %5.2f; Lgap:%d (%d blanks);", + word->bounding_box ().left (), + word->bounding_box ().bottom (), + repetition_spacing, current_gap, blanks); + word->set_blanks (blanks); + //NO uncertainty + word->set_flag (W_FUZZY_SP, false); + word->set_flag (W_FUZZY_NON, false); + + /* Set spaces after repeated char word (and leave current word set) */ + current_gap = + blob_box.left () - next_rep_char_word_right; + if (current_gap > tosp_rep_space * repetition_spacing) { + blanks = static_cast<uint8_t>(current_gap / row->space_size); + if (blanks < 1) + blanks = 1; + } + else + blanks = 0; + if (tosp_debug_level > 5) + tprintf (" Rgap:%d (%d blanks)\n", + current_gap, blanks); + fuzzy_sp = false; + fuzzy_non = false; + + if (rep_char_it.empty ()) { + next_rep_char_word_right = INT32_MAX; + } + else { + rep_char_it.forward (); + next_rep_char_word_right = + rep_char_it.data ()->bounding_box ().right (); + } + } + + if (box_it.at_first () && rep_char_it.empty ()) { + //at end of line + word->set_flag (W_EOL, true); + xstarts[1] = prev_x; + } + else { + prev_blanks = blanks; + prev_fuzzy_sp = fuzzy_sp; + prev_fuzzy_non = fuzzy_non; + } + } + } + } + while (!box_it.at_first ()); //until back at start + + /* Insert any further repeated char words */ + while (!rep_char_it.empty ()) { + word = rep_char_it.extract (); + word_it.add_after_then_move (word); + + /* Set spaces before repeated char word */ + repetition_spacing = find_mean_blob_spacing (word); + current_gap = word->bounding_box ().left () - prev_x; + if (current_gap > tosp_rep_space * repetition_spacing) { + blanks = static_cast<uint8_t>(floor (current_gap / row->space_size)); + if (blanks < 1) + blanks = 1; + } + else + blanks = 0; + if (tosp_debug_level > 5) + tprintf( + "Repch wd at EOL (%d,%d). rep spacing %5.2f; Lgap:%d (%d blanks)\n", + word->bounding_box().left(), word->bounding_box().bottom(), + repetition_spacing, current_gap, blanks); + word->set_blanks (blanks); + //NO uncertainty + word->set_flag (W_FUZZY_SP, false); + word->set_flag (W_FUZZY_NON, false); + prev_x = word->bounding_box ().right (); + if (rep_char_it.empty ()) { + //at end of line + word->set_flag (W_EOL, true); + xstarts[1] = prev_x; + } + else { + rep_char_it.forward (); + } + } + real_row = new ROW (row, + static_cast<int16_t>(row->kern_size), static_cast<int16_t>(row->space_size)); + word_it.set_to_list (real_row->word_list ()); + //put words in row + word_it.add_list_after (&words); + real_row->recalc_bounding_box (); + + if (tosp_debug_level > 4) { + tprintf ("Row: Made %d words in row ((%d,%d)(%d,%d))\n", + word_count, + real_row->bounding_box ().left (), + real_row->bounding_box ().bottom (), + real_row->bounding_box ().right (), + real_row->bounding_box ().top ()); + } + return real_row; + } + return nullptr; +} + +/********************************************************************** + * make_blob_words + * + * Converts words into blobs so that each blob is a single character. + * Used for chopper test. + **********************************************************************/ +ROW *Textord::make_blob_words( + TO_ROW *row, // row to make + FCOORD rotation // for drawing + ) { + bool bol; // start of line + ROW *real_row; // output row + C_OUTLINE_IT cout_it; + C_BLOB_LIST cblobs; + C_BLOB_IT cblob_it = &cblobs; + WERD_LIST words; + WERD *word; // new word + BLOBNBOX *bblob; // current blob + TBOX blob_box; // bounding box + BLOBNBOX_IT box_it; // iterator + int16_t word_count = 0; + + cblob_it.set_to_list(&cblobs); + box_it.set_to_list(row->blob_list()); + // new words + WERD_IT word_it(&words); + bol = true; + if (!box_it.empty()) { + + do { + bblob = box_it.data(); + blob_box = bblob->bounding_box(); + if (bblob->joined_to_prev()) { + if (bblob->cblob() != nullptr) { + cout_it.set_to_list(cblob_it.data()->out_list()); + cout_it.move_to_last(); + cout_it.add_list_after(bblob->cblob()->out_list()); + delete bblob->cblob(); + } + } else { + if (bblob->cblob() != nullptr) + cblob_it.add_after_then_move(bblob->cblob()); + } + box_it.forward(); // next one + bblob = box_it.data(); + blob_box = bblob->bounding_box(); + + if (!bblob->joined_to_prev() && !cblobs.empty()) { + word = new WERD(&cblobs, 1, nullptr); + word_count++; + word_it.add_after_then_move(word); + if (bol) { + word->set_flag(W_BOL, true); + bol = false; + } + if (box_it.at_first()) { // at end of line + word->set_flag(W_EOL, true); + } + } + } + while (!box_it.at_first()); // until back at start + /* Setup the row with created words. */ + real_row = new ROW(row, static_cast<int16_t>(row->kern_size), static_cast<int16_t>(row->space_size)); + word_it.set_to_list(real_row->word_list()); + //put words in row + word_it.add_list_after(&words); + real_row->recalc_bounding_box(); + if (tosp_debug_level > 4) { + tprintf ("Row:Made %d words in row ((%d,%d)(%d,%d))\n", + word_count, + real_row->bounding_box().left(), + real_row->bounding_box().bottom(), + real_row->bounding_box().right(), + real_row->bounding_box().top()); + } + return real_row; + } + return nullptr; +} + +bool Textord::make_a_word_break( + TO_ROW* row, // row being made + TBOX blob_box, // for next_blob // how many blanks? + int16_t prev_gap, + TBOX prev_blob_box, + int16_t real_current_gap, + int16_t within_xht_current_gap, + TBOX next_blob_box, + int16_t next_gap, + uint8_t& blanks, + bool& fuzzy_sp, + bool& fuzzy_non, + bool& prev_gap_was_a_space, + bool& break_at_next_gap) { + bool space; + int16_t current_gap; + float fuzzy_sp_to_kn_limit; + + if (break_at_next_gap) { + break_at_next_gap = false; + return true; + } + /* Inhibit using the reduced gap if + The kerning is large - chars are not kerned and reducing "f"s can cause + erroneous blanks + OR The real gap is less than 0 + OR The real gap is less than the kerning estimate + */ + if ((row->kern_size > tosp_large_kerning * row->xheight) || + ((tosp_dont_fool_with_small_kerns >= 0) && + (real_current_gap < tosp_dont_fool_with_small_kerns * row->kern_size))) + //Ignore the difference + within_xht_current_gap = real_current_gap; + + if (tosp_use_xht_gaps && tosp_only_use_xht_gaps) + current_gap = within_xht_current_gap; + else + current_gap = real_current_gap; + + if (tosp_old_to_method) { + //Boring old method + space = current_gap > row->max_nonspace; + if (space && (current_gap < INT16_MAX)) { + if (current_gap < row->min_space) { + if (current_gap > row->space_threshold) { + blanks = 1; + fuzzy_sp = true; + fuzzy_non = false; + } + else { + blanks = 0; + fuzzy_sp = false; + fuzzy_non = true; + } + } + else { + blanks = static_cast<uint8_t>(current_gap / row->space_size); + if (blanks < 1) + blanks = 1; + fuzzy_sp = false; + fuzzy_non = false; + } + } + return space; + } + else { + /* New exciting heuristic method */ + if (prev_blob_box.null_box ()) // Beginning of row + prev_gap_was_a_space = true; + + //Default as old TO + space = current_gap > row->space_threshold; + + /* Set defaults for the word break in case we find one. Currently there are + no fuzzy spaces. Depending on the reliability of the different heuristics + we may need to set PARTICULAR spaces to fuzzy or not. The values will ONLY + be used if the function returns true - ie the word is to be broken. + */ + int num_blanks = current_gap; + if (row->space_size > 1.0f) + num_blanks = IntCastRounded(current_gap / row->space_size); + blanks = static_cast<uint8_t>(ClipToRange<int>(num_blanks, 1, UINT8_MAX)); + fuzzy_sp = false; + fuzzy_non = false; + /* + If xht measure causes gap to flip one of the 3 thresholds act accordingly - + despite any other heuristics - the MINIMUM action is to pass a fuzzy kern to + context. + */ + if (tosp_use_xht_gaps && + (real_current_gap <= row->max_nonspace) && + (within_xht_current_gap > row->max_nonspace)) { + space = true; + fuzzy_non = true; +#ifndef GRAPHICS_DISABLED + mark_gap (blob_box, 20, + prev_gap, prev_blob_box.width (), + current_gap, next_blob_box.width (), next_gap); +#endif + } + else if (tosp_use_xht_gaps && + (real_current_gap <= row->space_threshold) && + (within_xht_current_gap > row->space_threshold)) { + space = true; + if (tosp_flip_fuzz_kn_to_sp) + fuzzy_sp = true; + else + fuzzy_non = true; +#ifndef GRAPHICS_DISABLED + mark_gap (blob_box, 21, + prev_gap, prev_blob_box.width (), + current_gap, next_blob_box.width (), next_gap); +#endif + } + else if (tosp_use_xht_gaps && + (real_current_gap < row->min_space) && + (within_xht_current_gap >= row->min_space)) { + space = true; +#ifndef GRAPHICS_DISABLED + mark_gap (blob_box, 22, + prev_gap, prev_blob_box.width (), + current_gap, next_blob_box.width (), next_gap); +#endif + } + else if (tosp_force_wordbreak_on_punct && + !suspected_punct_blob(row, prev_blob_box) && + suspected_punct_blob(row, blob_box)) { + break_at_next_gap = true; + } + /* Now continue with normal heuristics */ + else if ((current_gap < row->min_space) && + (current_gap > row->space_threshold)) { + /* Heuristics to turn dubious spaces to kerns */ + if (tosp_pass_wide_fuzz_sp_to_context > 0) + fuzzy_sp_to_kn_limit = row->kern_size + + tosp_pass_wide_fuzz_sp_to_context * + (row->space_size - row->kern_size); + else + fuzzy_sp_to_kn_limit = 99999.0f; + + /* If current gap is significantly smaller than the previous space the other + side of a narrow blob then this gap is a kern. */ + if ((prev_blob_box.width () > 0) && + narrow_blob (row, prev_blob_box) && + prev_gap_was_a_space && + (current_gap <= tosp_gap_factor * prev_gap)) { + if ((tosp_all_flips_fuzzy) || + (current_gap > fuzzy_sp_to_kn_limit)) { + if (tosp_flip_fuzz_sp_to_kn) + fuzzy_non = true; + else + fuzzy_sp = true; + } + else + space = false; +#ifndef GRAPHICS_DISABLED + mark_gap (blob_box, 1, + prev_gap, prev_blob_box.width (), + current_gap, next_blob_box.width (), next_gap); +#endif + } + /* If current gap not much bigger than the previous kern the other side of a + narrow blob then this gap is a kern as well */ + else if ((prev_blob_box.width () > 0) && + narrow_blob (row, prev_blob_box) && + !prev_gap_was_a_space && + (current_gap * tosp_gap_factor <= prev_gap)) { + if ((tosp_all_flips_fuzzy) || + (current_gap > fuzzy_sp_to_kn_limit)) { + if (tosp_flip_fuzz_sp_to_kn) + fuzzy_non = true; + else + fuzzy_sp = true; + } + else + space = false; +#ifndef GRAPHICS_DISABLED + mark_gap (blob_box, 2, + prev_gap, prev_blob_box.width (), + current_gap, next_blob_box.width (), next_gap); +#endif + } + else if ((next_blob_box.width () > 0) && + narrow_blob (row, next_blob_box) && + (next_gap > row->space_threshold) && + (current_gap <= tosp_gap_factor * next_gap)) { + if ((tosp_all_flips_fuzzy) || + (current_gap > fuzzy_sp_to_kn_limit)) { + if (tosp_flip_fuzz_sp_to_kn) + fuzzy_non = true; + else + fuzzy_sp = true; + } + else + space = false; +#ifndef GRAPHICS_DISABLED + mark_gap (blob_box, 3, + prev_gap, prev_blob_box.width (), + current_gap, next_blob_box.width (), next_gap); +#endif + } + else if ((next_blob_box.width () > 0) && + narrow_blob (row, next_blob_box) && + (next_gap <= row->space_threshold) && + (current_gap * tosp_gap_factor <= next_gap)) { + if ((tosp_all_flips_fuzzy) || + (current_gap > fuzzy_sp_to_kn_limit)) { + if (tosp_flip_fuzz_sp_to_kn) + fuzzy_non = true; + else + fuzzy_sp = true; + } + else + space = false; +#ifndef GRAPHICS_DISABLED + mark_gap (blob_box, 4, + prev_gap, prev_blob_box.width (), + current_gap, next_blob_box.width (), next_gap); +#endif + } + else if ((((next_blob_box.width () > 0) && + narrow_blob (row, next_blob_box)) || + ((prev_blob_box.width () > 0) && + narrow_blob (row, prev_blob_box)))) { + fuzzy_sp = true; +#ifndef GRAPHICS_DISABLED + mark_gap (blob_box, 6, + prev_gap, prev_blob_box.width (), + current_gap, next_blob_box.width (), next_gap); +#endif + } + } + else if ((current_gap > row->max_nonspace) && + (current_gap <= row->space_threshold)) { + + /* Heuristics to turn dubious kerns to spaces */ + /* TRIED THIS BUT IT MADE THINGS WORSE + if (prev_gap == INT16_MAX) + prev_gap = 0; // start of row + if (next_gap == INT16_MAX) + next_gap = 0; // end of row + */ + if ((prev_blob_box.width () > 0) && + (next_blob_box.width () > 0) && + (current_gap >= + tosp_kern_gap_factor1 * std::max(prev_gap, next_gap)) && + wide_blob (row, prev_blob_box) && + wide_blob (row, next_blob_box)) { + + space = true; + /* + tosp_flip_caution is an attempt to stop the default changing in cases + where there is a large difference between the kern and space estimates. + See problem in 'chiefs' where "have" gets split in the quotation. + */ + if ((tosp_flip_fuzz_kn_to_sp) && + ((tosp_flip_caution <= 0) || + (tosp_flip_caution * row->kern_size > row->space_size))) + fuzzy_sp = true; + else + fuzzy_non = true; +#ifndef GRAPHICS_DISABLED + mark_gap (blob_box, 7, + prev_gap, prev_blob_box.width (), + current_gap, next_blob_box.width (), next_gap); +#endif + } else if (prev_blob_box.width() > 0 && + next_blob_box.width() > 0 && + current_gap > 5 && // Rule 9 handles small gap, big ratio. + current_gap >= + tosp_kern_gap_factor2 * std::max(prev_gap, next_gap) && + !(narrow_blob(row, prev_blob_box) || + suspected_punct_blob(row, prev_blob_box)) && + !(narrow_blob(row, next_blob_box) || + suspected_punct_blob(row, next_blob_box))) { + space = true; + fuzzy_non = true; +#ifndef GRAPHICS_DISABLED + mark_gap (blob_box, 8, + prev_gap, prev_blob_box.width (), + current_gap, next_blob_box.width (), next_gap); +#endif + } + else if ((tosp_kern_gap_factor3 > 0) && + (prev_blob_box.width () > 0) && + (next_blob_box.width () > 0) && + (current_gap >= tosp_kern_gap_factor3 * std::max(prev_gap, next_gap)) && + (!tosp_rule_9_test_punct || + (!suspected_punct_blob (row, prev_blob_box) && + !suspected_punct_blob (row, next_blob_box)))) { + space = true; + fuzzy_non = true; +#ifndef GRAPHICS_DISABLED + mark_gap (blob_box, 9, + prev_gap, prev_blob_box.width (), + current_gap, next_blob_box.width (), next_gap); +#endif + } + } + if (tosp_debug_level > 10) + tprintf("word break = %d current_gap = %d, prev_gap = %d, " + "next_gap = %d\n", space ? 1 : 0, current_gap, + prev_gap, next_gap); + prev_gap_was_a_space = space && !(fuzzy_non); + return space; + } +} + +bool Textord::narrow_blob(TO_ROW* row, TBOX blob_box) { + bool result; + result = ((blob_box.width () <= tosp_narrow_fraction * row->xheight) || + ((static_cast<float>(blob_box.width ()) / blob_box.height ()) <= + tosp_narrow_aspect_ratio)); + return result; +} + +bool Textord::wide_blob(TO_ROW* row, TBOX blob_box) { + bool result; + if (tosp_wide_fraction > 0) { + if (tosp_wide_aspect_ratio > 0) + result = ((blob_box.width () >= tosp_wide_fraction * row->xheight) && + ((static_cast<float>(blob_box.width ()) / blob_box.height ()) > + tosp_wide_aspect_ratio)); + else + result = (blob_box.width () >= tosp_wide_fraction * row->xheight); + } + else + result = !narrow_blob (row, blob_box); + return result; +} + +bool Textord::suspected_punct_blob(TO_ROW* row, TBOX box) { + bool result; + float baseline; + float blob_x_centre; + /* Find baseline of centre of blob */ + blob_x_centre = (box.right () + box.left ()) / 2.0; + baseline = row->baseline.y (blob_x_centre); + + result = (box.height () <= 0.66 * row->xheight) || + (box.top () < baseline + row->xheight / 2.0) || + (box.bottom () > baseline + row->xheight / 2.0); + return result; +} + + +void Textord::peek_at_next_gap(TO_ROW *row, + BLOBNBOX_IT box_it, + TBOX &next_blob_box, + int16_t &next_gap, + int16_t &next_within_xht_gap) { + TBOX next_reduced_blob_box; + TBOX bit_beyond; + BLOBNBOX_IT reduced_box_it = box_it; + + next_blob_box = box_next (&box_it); + next_reduced_blob_box = reduced_box_next (row, &reduced_box_it); + if (box_it.at_first ()) { + next_gap = INT16_MAX; + next_within_xht_gap = INT16_MAX; + } + else { + bit_beyond = box_it.data ()->bounding_box (); + next_gap = bit_beyond.left () - next_blob_box.right (); + bit_beyond = reduced_box_next (row, &reduced_box_it); + next_within_xht_gap = + bit_beyond.left () - next_reduced_blob_box.right (); + } +} + + +#ifndef GRAPHICS_DISABLED +void Textord::mark_gap( + TBOX blob, // blob following gap + int16_t rule, // heuristic id + int16_t prev_gap, + int16_t prev_blob_width, + int16_t current_gap, + int16_t next_blob_width, + int16_t next_gap) { + ScrollView::Color col; //of ellipse marking flipped gap + + switch (rule) { + case 1: + col = ScrollView::RED; + break; + case 2: + col = ScrollView::CYAN; + break; + case 3: + col = ScrollView::GREEN; + break; + case 4: + col = ScrollView::BLACK; + break; + case 5: + col = ScrollView::MAGENTA; + break; + case 6: + col = ScrollView::BLUE; + break; + + case 7: + col = ScrollView::WHITE; + break; + case 8: + col = ScrollView::YELLOW; + break; + case 9: + col = ScrollView::BLACK; + break; + + case 20: + col = ScrollView::CYAN; + break; + case 21: + col = ScrollView::GREEN; + break; + case 22: + col = ScrollView::MAGENTA; + break; + default: + col = ScrollView::BLACK; + } + if (textord_show_initial_words) { + to_win->Pen(col); + /* if (rule < 20) + //interior_style(to_win, INT_SOLID, false); + else + //interior_style(to_win, INT_HOLLOW, true);*/ + //x radius + to_win->Ellipse (current_gap / 2.0f, + blob.height () / 2.0f, //y radius + //x centre + blob.left () - current_gap / 2.0f, + //y centre + blob.bottom () + blob.height () / 2.0f); + } + if (tosp_debug_level > 5) + tprintf(" (%d,%d) Sp<->Kn Rule %d %d %d %d %d %d\n", + blob.left() - current_gap / 2, blob.bottom(), rule, prev_gap, + prev_blob_width, current_gap, next_blob_width, next_gap); +} +#endif + +float Textord::find_mean_blob_spacing(WERD *word) { + C_BLOB_IT cblob_it; + TBOX blob_box; + int32_t gap_sum = 0; + int16_t gap_count = 0; + int16_t prev_right; + + cblob_it.set_to_list (word->cblob_list ()); + if (!cblob_it.empty ()) { + cblob_it.mark_cycle_pt (); + prev_right = cblob_it.data ()->bounding_box ().right (); + //first blob + cblob_it.forward (); + for (; !cblob_it.cycled_list (); cblob_it.forward ()) { + blob_box = cblob_it.data ()->bounding_box (); + gap_sum += blob_box.left () - prev_right; + gap_count++; + prev_right = blob_box.right (); + } + } + if (gap_count > 0) + return (gap_sum / static_cast<float>(gap_count)); + else + return 0.0f; +} + + +bool Textord::ignore_big_gap(TO_ROW* row, + int32_t row_length, + GAPMAP* gapmap, + int16_t left, + int16_t right) { + int16_t gap = right - left + 1; + + if (tosp_ignore_big_gaps > 999) return false; // Don't ignore + if (tosp_ignore_big_gaps > 0) + return (gap > tosp_ignore_big_gaps * row->xheight); + if (gap > tosp_ignore_very_big_gaps * row->xheight) + return true; + if (tosp_ignore_big_gaps == 0) { + if ((gap > 2.1 * row->xheight) && (row_length > 20 * row->xheight)) + return true; + if ((gap > 1.75 * row->xheight) && + ((row_length > 35 * row->xheight) || + gapmap->table_gap (left, right))) + return true; + } + else { + /* ONLY time gaps < 3.0 * xht are ignored is when they are part of a table */ + if ((gap > gapmap_big_gaps * row->xheight) && + gapmap->table_gap (left, right)) + return true; + } + return false; +} + +/********************************************************************** + * reduced_box_next + * + * Compute the bounding box of this blob with merging of x overlaps + * but no pre-chopping. + * Then move the iterator on to the start of the next blob. + * DON'T reduce the box for small things - eg punctuation. + **********************************************************************/ +TBOX Textord::reduced_box_next( + TO_ROW *row, // current row + BLOBNBOX_IT *it // iterator to blobds + ) { + BLOBNBOX *blob; //current blob + BLOBNBOX *head_blob; //place to store box + TBOX full_box; //full blob boundg box + TBOX reduced_box; //box of significant part + int16_t left_above_xht; //ABOVE xht left limit + int16_t new_left_above_xht; //ABOVE xht left limit + + blob = it->data (); + if (blob->red_box_set ()) { + reduced_box = blob->reduced_box (); + do { + it->forward(); + blob = it->data(); + } + while (blob->cblob() == nullptr || blob->joined_to_prev()); + return reduced_box; + } + head_blob = blob; + full_box = blob->bounding_box (); + reduced_box = reduced_box_for_blob (blob, row, &left_above_xht); + do { + it->forward (); + blob = it->data (); + if (blob->cblob() == nullptr) + //was pre-chopped + full_box += blob->bounding_box (); + else if (blob->joined_to_prev ()) { + reduced_box += + reduced_box_for_blob(blob, row, &new_left_above_xht); + left_above_xht = std::min(left_above_xht, new_left_above_xht); + } + } + //until next real blob + while (blob->cblob() == nullptr || blob->joined_to_prev()); + + if ((reduced_box.width () > 0) && + ((reduced_box.left () + tosp_near_lh_edge * reduced_box.width ()) + < left_above_xht) && (reduced_box.height () > 0.7 * row->xheight)) { +#ifndef GRAPHICS_DISABLED + if (textord_show_initial_words) + reduced_box.plot (to_win, ScrollView::YELLOW, ScrollView::YELLOW); +#endif + } + else + reduced_box = full_box; + head_blob->set_reduced_box (reduced_box); + return reduced_box; +} + + +/************************************************************************* + * reduced_box_for_blob() + * Find box for blob which is the same height and y position as the whole blob, + * but whose left limit is the left most position of the blob ABOVE the + * baseline and whose right limit is the right most position of the blob BELOW + * the xheight. + * + * + * !!!!!!! WONT WORK WITH LARGE UPPER CASE CHARS - T F V W - look at examples on + * "home". Perhaps we need something which say if the width ABOVE the + * xht alone includes the whole of the reduced width, then use the full + * blob box - Might still fail on italic F + * + * Alternatively we could be a little less severe and only reduce the + * left and right edges by half the difference between the full box and + * the reduced box. + * + * NOTE that we need to rotate all the coordinates as + * find_blob_limits finds the y min and max within a specified x band + *************************************************************************/ +TBOX Textord::reduced_box_for_blob( + BLOBNBOX *blob, + TO_ROW *row, + int16_t *left_above_xht) { + float baseline; + float blob_x_centre; + float left_limit; + float right_limit; + float junk; + TBOX blob_box; + + /* Find baseline of centre of blob */ + + blob_box = blob->bounding_box (); + blob_x_centre = (blob_box.left () + blob_box.right ()) / 2.0; + baseline = row->baseline.y (blob_x_centre); + + /* + Find LH limit of blob ABOVE the xht. This is so that we can detect certain + caps ht chars which should NOT have their box reduced: T, Y, V, W etc + */ + left_limit = static_cast<float>(INT32_MAX); + junk = static_cast<float>(-INT32_MAX); + find_cblob_hlimits(blob->cblob(), (baseline + 1.1 * row->xheight), + static_cast<float>(INT16_MAX), left_limit, junk); + if (left_limit > junk) + *left_above_xht = INT16_MAX; //No area above xht + else + *left_above_xht = static_cast<int16_t>(floor (left_limit)); + /* + Find reduced LH limit of blob - the left extent of the region ABOVE the + baseline. + */ + left_limit = static_cast<float>(INT32_MAX); + junk = static_cast<float>(-INT32_MAX); + find_cblob_hlimits(blob->cblob(), baseline, static_cast<float>(INT16_MAX), + left_limit, junk); + + if (left_limit > junk) + return TBOX (); //no area within xht so return empty box + /* + Find reduced RH limit of blob - the right extent of the region BELOW the xht. + */ + junk = static_cast<float>(INT32_MAX); + right_limit = static_cast<float>(-INT32_MAX); + find_cblob_hlimits(blob->cblob(), static_cast<float>(-INT16_MAX), + (baseline + row->xheight), junk, right_limit); + if (junk > right_limit) + return TBOX (); //no area within xht so return empty box + + return TBOX (ICOORD (static_cast<int16_t>(floor (left_limit)), blob_box.bottom ()), + ICOORD (static_cast<int16_t>(ceil (right_limit)), blob_box.top ())); +} +} // namespace tesseract diff --git a/tesseract/src/textord/tovars.cpp b/tesseract/src/textord/tovars.cpp new file mode 100644 index 00000000..22e838f0 --- /dev/null +++ b/tesseract/src/textord/tovars.cpp @@ -0,0 +1,85 @@ +/********************************************************************** + * File: tovars.cpp (Formerly to_vars.c) + * Description: Variables used by textord. + * Author: Ray Smith + * Created: Tue Aug 24 16:55:02 BST 1993 + * + * (C) Copyright 1993, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "tovars.h" +#include "params.h" + +namespace tesseract { + +BOOL_VAR (textord_show_initial_words, false, "Display separate words"); +BOOL_VAR (textord_show_new_words, false, "Display separate words"); +BOOL_VAR (textord_show_fixed_words, false, +"Display forced fixed pitch words"); +BOOL_VAR (textord_blocksall_fixed, false, "Moan about prop blocks"); +BOOL_VAR (textord_blocksall_prop, false, +"Moan about fixed pitch blocks"); +BOOL_VAR (textord_blocksall_testing, false, "Dump stats when moaning"); +BOOL_VAR (textord_test_mode, false, "Do current test"); +INT_VAR (textord_dotmatrix_gap, 3, +"Max pixel gap for broken pixed pitch"); +INT_VAR (textord_debug_block, 0, "Block to do debug on"); +INT_VAR (textord_pitch_range, 2, "Max range test on pitch"); +double_VAR (textord_wordstats_smooth_factor, 0.05, +"Smoothing gap stats"); +double_VAR (textord_width_smooth_factor, 0.10, +"Smoothing width stats"); +double_VAR (textord_words_width_ile, 0.4, +"Ile of blob widths for space est"); +double_VAR (textord_words_maxspace, 4.0, "Multiple of xheight"); +double_VAR (textord_words_default_maxspace, 3.5, +"Max believable third space"); +double_VAR (textord_words_default_minspace, 0.6, +"Fraction of xheight"); +double_VAR (textord_words_min_minspace, 0.3, "Fraction of xheight"); +double_VAR (textord_words_default_nonspace, 0.2, +"Fraction of xheight"); +double_VAR(textord_words_initial_lower, 0.25, + "Max initial cluster size"); +double_VAR (textord_words_initial_upper, 0.15, +"Min initial cluster spacing"); +double_VAR (textord_words_minlarge, 0.75, +"Fraction of valid gaps needed"); +double_VAR (textord_words_pitchsd_threshold, 0.040, +"Pitch sync threshold"); +double_VAR (textord_words_def_fixed, 0.016, +"Threshold for definite fixed"); +double_VAR (textord_words_def_prop, 0.090, +"Threshold for definite prop"); +INT_VAR (textord_words_veto_power, 5, +"Rows required to outvote a veto"); +double_VAR (textord_pitch_rowsimilarity, 0.08, +"Fraction of xheight for sameness"); +BOOL_VAR (textord_pitch_scalebigwords, false, +"Scale scores on big words"); +double_VAR(words_initial_lower, 0.5, "Max initial cluster size"); +double_VAR (words_initial_upper, 0.15, "Min initial cluster spacing"); +double_VAR (words_default_prop_nonspace, 0.25, "Fraction of xheight"); +double_VAR (words_default_fixed_space, 0.75, "Fraction of xheight"); +double_VAR (words_default_fixed_limit, 0.6, "Allowed size variance"); +double_VAR (textord_words_definite_spread, 0.30, +"Non-fuzzy spacing region"); +double_VAR (textord_spacesize_ratiofp, 2.8, +"Min ratio space/nonspace"); +double_VAR (textord_spacesize_ratioprop, 2.0, +"Min ratio space/nonspace"); +double_VAR (textord_fpiqr_ratio, 1.5, "Pitch IQR/Gap IQR threshold"); +double_VAR (textord_max_pitch_iqr, 0.20, "Xh fraction noise in pitch"); +double_VAR (textord_fp_min_width, 0.5, "Min width of decent blobs"); + +} // namespace tesseract diff --git a/tesseract/src/textord/tovars.h b/tesseract/src/textord/tovars.h new file mode 100644 index 00000000..79d297a4 --- /dev/null +++ b/tesseract/src/textord/tovars.h @@ -0,0 +1,94 @@ +/********************************************************************** + * File: tovars.h (Formerly to_vars.h) + * Description: Variables used by textord. + * Author: Ray Smith + * Created: Tue Aug 24 16:55:02 BST 1993 + * + * (C) Copyright 1993, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef TOVARS_H +#define TOVARS_H + +#include "params.h" + +namespace tesseract { + +extern BOOL_VAR_H (textord_show_initial_words, false, +"Display separate words"); +extern BOOL_VAR_H (textord_show_new_words, false, "Display separate words"); +extern BOOL_VAR_H (textord_show_fixed_words, false, +"Display forced fixed pitch words"); +extern BOOL_VAR_H (textord_blocksall_fixed, false, "Moan about prop blocks"); +extern BOOL_VAR_H (textord_blocksall_prop, false, +"Moan about fixed pitch blocks"); +extern BOOL_VAR_H (textord_blocksall_testing, false, +"Dump stats when moaning"); +extern BOOL_VAR_H (textord_test_mode, false, "Do current test"); +extern INT_VAR_H (textord_dotmatrix_gap, 3, +"Max pixel gap for broken pixed pitch"); +extern INT_VAR_H (textord_debug_block, 0, "Block to do debug on"); +extern INT_VAR_H (textord_pitch_range, 2, "Max range test on pitch"); +extern double_VAR_H (textord_wordstats_smooth_factor, 0.05, +"Smoothing gap stats"); +extern double_VAR_H (textord_width_smooth_factor, 0.10, +"Smoothing width stats"); +extern double_VAR_H (textord_words_width_ile, 0.4, +"Ile of blob widths for space est"); +extern double_VAR_H (textord_words_maxspace, 4.0, "Multiple of xheight"); +extern double_VAR_H (textord_words_default_maxspace, 3.5, +"Max believable third space"); +extern double_VAR_H (textord_words_default_minspace, 0.6, +"Fraction of xheight"); +extern double_VAR_H (textord_words_min_minspace, 0.3, "Fraction of xheight"); +extern double_VAR_H (textord_words_default_nonspace, 0.2, +"Fraction of xheight"); +extern double_VAR_H(textord_words_initial_lower, 0.25, + "Max initial cluster size"); +extern double_VAR_H (textord_words_initial_upper, 0.15, +"Min initial cluster spacing"); +extern double_VAR_H (textord_words_minlarge, 0.75, +"Fraction of valid gaps needed"); +extern double_VAR_H (textord_words_pitchsd_threshold, 0.025, +"Pitch sync threshold"); +extern double_VAR_H (textord_words_def_fixed, 0.01, +"Threshold for definite fixed"); +extern double_VAR_H (textord_words_def_prop, 0.06, +"Threshold for definite prop"); +extern INT_VAR_H (textord_words_veto_power, 5, +"Rows required to outvote a veto"); +extern double_VAR_H (textord_pitch_rowsimilarity, 0.08, +"Fraction of xheight for sameness"); +extern BOOL_VAR_H (textord_pitch_scalebigwords, false, +"Scale scores on big words"); +extern double_VAR_H(words_initial_lower, 0.5, "Max initial cluster size"); +extern double_VAR_H (words_initial_upper, 0.15, +"Min initial cluster spacing"); +extern double_VAR_H (words_default_prop_nonspace, 0.25, +"Fraction of xheight"); +extern double_VAR_H (words_default_fixed_space, 0.75, "Fraction of xheight"); +extern double_VAR_H (words_default_fixed_limit, 0.6, "Allowed size variance"); +extern double_VAR_H (textord_words_definite_spread, 0.30, +"Non-fuzzy spacing region"); +extern double_VAR_H (textord_spacesize_ratiofp, 2.8, +"Min ratio space/nonspace"); +extern double_VAR_H (textord_spacesize_ratioprop, 2.0, +"Min ratio space/nonspace"); +extern double_VAR_H (textord_fpiqr_ratio, 1.5, "Pitch IQR/Gap IQR threshold"); +extern double_VAR_H (textord_max_pitch_iqr, 0.20, +"Xh fraction noise in pitch"); +extern double_VAR_H (textord_fp_min_width, 0.5, "Min width of decent blobs"); + +} // namespace tesseract + +#endif diff --git a/tesseract/src/textord/underlin.cpp b/tesseract/src/textord/underlin.cpp new file mode 100644 index 00000000..6a732f27 --- /dev/null +++ b/tesseract/src/textord/underlin.cpp @@ -0,0 +1,278 @@ +/********************************************************************** + * File: underlin.cpp (Formerly undrline.c) + * Description: Code to chop blobs apart from underlines. + * Author: Ray Smith + * + * (C) Copyright 1994, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#include "underlin.h" + +namespace tesseract { + +double_VAR (textord_underline_offset, 0.1, "Fraction of x to ignore"); +BOOL_VAR (textord_restore_underlines, true, "Chop underlines & put back"); + +/********************************************************************** + * restore_underlined_blobs + * + * Find underlined blobs and put them back in the row. + **********************************************************************/ + +void restore_underlined_blobs( //get chop points + TO_BLOCK *block //block to do + ) { + int16_t chop_coord; //chop boundary + TBOX blob_box; //of underline + BLOBNBOX *u_line; //underline bit + TO_ROW *row; //best row for blob + ICOORDELT_LIST chop_cells; //blobs to cut out + //real underlines + BLOBNBOX_LIST residual_underlines; + C_OUTLINE_LIST left_coutlines; + C_OUTLINE_LIST right_coutlines; + ICOORDELT_IT cell_it = &chop_cells; + //under lines + BLOBNBOX_IT under_it = &block->underlines; + BLOBNBOX_IT ru_it = &residual_underlines; + + if (block->get_rows()->empty()) + return; // Don't crash if there are no rows. + for (under_it.mark_cycle_pt (); !under_it.cycled_list (); + under_it.forward ()) { + u_line = under_it.extract (); + blob_box = u_line->bounding_box (); + row = most_overlapping_row (block->get_rows (), u_line); + if (row == nullptr) + return; // Don't crash if there is no row. + find_underlined_blobs (u_line, &row->baseline, row->xheight, + row->xheight * textord_underline_offset, + &chop_cells); + cell_it.set_to_list (&chop_cells); + for (cell_it.mark_cycle_pt (); !cell_it.cycled_list (); + cell_it.forward ()) { + chop_coord = cell_it.data ()->x (); + if (cell_it.data ()->y () - chop_coord > textord_fp_chop_error + 1) { + split_to_blob (u_line, chop_coord, + textord_fp_chop_error + 0.5, + &left_coutlines, + &right_coutlines); + if (!left_coutlines.empty()) { + ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines))); + } + chop_coord = cell_it.data ()->y (); + split_to_blob(nullptr, chop_coord, textord_fp_chop_error + 0.5, + &left_coutlines, &right_coutlines); + if (!left_coutlines.empty()) { + row->insert_blob(new BLOBNBOX(new C_BLOB(&left_coutlines))); + } + u_line = nullptr; //no more blobs to add + } + delete cell_it.extract(); + } + if (!right_coutlines.empty ()) { + split_to_blob(nullptr, blob_box.right(), textord_fp_chop_error + 0.5, + &left_coutlines, &right_coutlines); + if (!left_coutlines.empty()) + ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines))); + } + if (u_line != nullptr) { + delete u_line->cblob(); + delete u_line; + } + } + if (!ru_it.empty()) { + ru_it.move_to_first(); + for (ru_it.mark_cycle_pt(); !ru_it.cycled_list(); ru_it.forward()) { + under_it.add_after_then_move(ru_it.extract()); + } + } +} + + +/********************************************************************** + * most_overlapping_row + * + * Return the row which most overlaps the blob. + **********************************************************************/ + +TO_ROW *most_overlapping_row( //find best row + TO_ROW_LIST *rows, //list of rows + BLOBNBOX *blob //blob to place + ) { + int16_t x = (blob->bounding_box ().left () + + blob->bounding_box ().right ()) / 2; + TO_ROW_IT row_it = rows; //row iterator + TO_ROW *row; //current row + TO_ROW *best_row; //output row + float overlap; //of blob & row + float bestover; //best overlap + + best_row = nullptr; + bestover = static_cast<float>(-INT32_MAX); + if (row_it.empty ()) + return nullptr; + row = row_it.data (); + row_it.mark_cycle_pt (); + while (row->baseline.y (x) + row->descdrop > blob->bounding_box ().top () + && !row_it.cycled_list ()) { + best_row = row; + bestover = + blob->bounding_box ().top () - row->baseline.y (x) + row->descdrop; + row_it.forward (); + row = row_it.data (); + } + while (row->baseline.y (x) + row->xheight + row->ascrise + >= blob->bounding_box ().bottom () && !row_it.cycled_list ()) { + overlap = row->baseline.y (x) + row->xheight + row->ascrise; + if (blob->bounding_box ().top () < overlap) + overlap = blob->bounding_box ().top (); + if (blob->bounding_box ().bottom () > + row->baseline.y (x) + row->descdrop) + overlap -= blob->bounding_box ().bottom (); + else + overlap -= row->baseline.y (x) + row->descdrop; + if (overlap > bestover) { + bestover = overlap; + best_row = row; + } + row_it.forward (); + row = row_it.data (); + } + if (bestover < 0 + && row->baseline.y (x) + row->xheight + row->ascrise + - blob->bounding_box ().bottom () > bestover) + best_row = row; + return best_row; +} + + +/********************************************************************** + * find_underlined_blobs + * + * Find the start and end coords of blobs in the underline. + **********************************************************************/ + +void find_underlined_blobs( //get chop points + BLOBNBOX *u_line, //underlined unit + QSPLINE *baseline, //actual baseline + float xheight, //height of line + float baseline_offset, //amount to shrinke it + ICOORDELT_LIST *chop_cells //places to chop + ) { + int16_t x, y; //sides of blob + ICOORD blob_chop; //sides of blob + TBOX blob_box = u_line->bounding_box (); + //cell iterator + ICOORDELT_IT cell_it = chop_cells; + STATS upper_proj (blob_box.left (), blob_box.right () + 1); + STATS middle_proj (blob_box.left (), blob_box.right () + 1); + STATS lower_proj (blob_box.left (), blob_box.right () + 1); + C_OUTLINE_IT out_it; //outlines of blob + + ASSERT_HOST (u_line->cblob () != nullptr); + + out_it.set_to_list (u_line->cblob ()->out_list ()); + for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { + vertical_cunderline_projection (out_it.data (), + baseline, xheight, baseline_offset, + &lower_proj, &middle_proj, &upper_proj); + } + + for (x = blob_box.left (); x < blob_box.right (); x++) { + if (middle_proj.pile_count (x) > 0) { + for (y = x + 1; + y < blob_box.right () && middle_proj.pile_count (y) > 0; y++); + blob_chop = ICOORD (x, y); + cell_it.add_after_then_move (new ICOORDELT (blob_chop)); + x = y; + } + } +} + + +/********************************************************************** + * vertical_cunderline_projection + * + * Compute the vertical projection of a outline from its outlines + * and add to the given STATS. + **********************************************************************/ + +void vertical_cunderline_projection( //project outlines + C_OUTLINE *outline, //outline to project + QSPLINE *baseline, //actual baseline + float xheight, //height of line + float baseline_offset, //amount to shrinke it + STATS *lower_proj, //below baseline + STATS *middle_proj, //centre region + STATS *upper_proj //top region + ) { + ICOORD pos; //current point + ICOORD step; //edge step + int16_t lower_y, upper_y; //region limits + int32_t length; //of outline + int16_t stepindex; //current step + C_OUTLINE_IT out_it = outline->child (); + + pos = outline->start_pos (); + length = outline->pathlength (); + for (stepindex = 0; stepindex < length; stepindex++) { + step = outline->step (stepindex); + if (step.x () > 0) { + lower_y = + static_cast<int16_t>(floor (baseline->y (pos.x ()) + baseline_offset + 0.5)); + upper_y = + static_cast<int16_t>(floor (baseline->y (pos.x ()) + baseline_offset + + xheight + 0.5)); + if (pos.y () >= lower_y) { + lower_proj->add (pos.x (), -lower_y); + if (pos.y () >= upper_y) { + middle_proj->add (pos.x (), lower_y - upper_y); + upper_proj->add (pos.x (), upper_y - pos.y ()); + } + else + middle_proj->add (pos.x (), lower_y - pos.y ()); + } + else + lower_proj->add (pos.x (), -pos.y ()); + } + else if (step.x () < 0) { + lower_y = + static_cast<int16_t>(floor (baseline->y (pos.x () - 1) + baseline_offset + + 0.5)); + upper_y = + static_cast<int16_t>(floor (baseline->y (pos.x () - 1) + baseline_offset + + xheight + 0.5)); + if (pos.y () >= lower_y) { + lower_proj->add (pos.x () - 1, lower_y); + if (pos.y () >= upper_y) { + middle_proj->add (pos.x () - 1, upper_y - lower_y); + upper_proj->add (pos.x () - 1, pos.y () - upper_y); + } + else + middle_proj->add (pos.x () - 1, pos.y () - lower_y); + } + else + lower_proj->add (pos.x () - 1, pos.y ()); + } + pos += step; + } + + for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) { + vertical_cunderline_projection (out_it.data (), + baseline, xheight, baseline_offset, + lower_proj, middle_proj, upper_proj); + } +} + +} // namespace tesseract diff --git a/tesseract/src/textord/underlin.h b/tesseract/src/textord/underlin.h new file mode 100644 index 00000000..09be1b40 --- /dev/null +++ b/tesseract/src/textord/underlin.h @@ -0,0 +1,56 @@ +/********************************************************************** + * File: underlin.h (Formerly undrline.h) + * Description: Code to chop blobs apart from underlines. + * Author: Ray Smith + * + * (C) Copyright 1994, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef UNDERLIN_H +#define UNDERLIN_H + +#include "fpchop.h" + +namespace tesseract { + +extern double_VAR_H (textord_underline_offset, 0.1, +"Fraction of x to ignore"); +extern BOOL_VAR_H (textord_restore_underlines, false, +"Chop underlines & put back"); +void restore_underlined_blobs( //get chop points + TO_BLOCK *block //block to do + ); +TO_ROW *most_overlapping_row( //find best row + TO_ROW_LIST *rows, //list of rows + BLOBNBOX *blob //blob to place + ); +void find_underlined_blobs( //get chop points + BLOBNBOX *u_line, //underlined unit + QSPLINE *baseline, //actual baseline + float xheight, //height of line + float baseline_offset, //amount to shrinke it + ICOORDELT_LIST *chop_cells //places to chop + ); +void vertical_cunderline_projection( //project outlines + C_OUTLINE *outline, //outline to project + QSPLINE *baseline, //actual baseline + float xheight, //height of line + float baseline_offset, //amount to shrinke it + STATS *lower_proj, //below baseline + STATS *middle_proj, //centre region + STATS *upper_proj //top region + ); + +} // namespace tesseract + +#endif diff --git a/tesseract/src/textord/wordseg.cpp b/tesseract/src/textord/wordseg.cpp new file mode 100644 index 00000000..d8b5516e --- /dev/null +++ b/tesseract/src/textord/wordseg.cpp @@ -0,0 +1,625 @@ +/********************************************************************** + * File: wordseg.cpp (Formerly wspace.c) + * Description: Code to segment the blobs into words. + * Author: Ray Smith + * + * (C) Copyright 1992, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + + // Include automatically generated configuration file if running autoconf. +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#include "wordseg.h" + +#include "blobbox.h" +#include "statistc.h" +#include "drawtord.h" +#include "makerow.h" +#include "pitsync1.h" +#include "tovars.h" +#include "topitch.h" +#include "cjkpitch.h" +#include "textord.h" +#include "fpchop.h" + +namespace tesseract { + +BOOL_VAR(textord_fp_chopping, true, "Do fixed pitch chopping"); +BOOL_VAR(textord_force_make_prop_words, false, + "Force proportional word segmentation on all rows"); +BOOL_VAR(textord_chopper_test, false, + "Chopper is being tested."); + +#define BLOCK_STATS_CLUSTERS 10 + + +/** + * @name make_single_word + * + * For each row, arrange the blobs into one word. There is no fixed + * pitch detection. + */ + +void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST* real_rows) { + TO_ROW_IT to_row_it(rows); + ROW_IT row_it(real_rows); + for (to_row_it.mark_cycle_pt(); !to_row_it.cycled_list(); + to_row_it.forward()) { + TO_ROW* row = to_row_it.data(); + // The blobs have to come out of the BLOBNBOX into the C_BLOB_LIST ready + // to create the word. + C_BLOB_LIST cblobs; + C_BLOB_IT cblob_it(&cblobs); + BLOBNBOX_IT box_it(row->blob_list()); + for (;!box_it.empty(); box_it.forward()) { + BLOBNBOX* bblob= box_it.extract(); + if (bblob->joined_to_prev() || (one_blob && !cblob_it.empty())) { + if (bblob->cblob() != nullptr) { + C_OUTLINE_IT cout_it(cblob_it.data()->out_list()); + cout_it.move_to_last(); + cout_it.add_list_after(bblob->cblob()->out_list()); + delete bblob->cblob(); + } + } else { + if (bblob->cblob() != nullptr) + cblob_it.add_after_then_move(bblob->cblob()); + } + delete bblob; + } + // Convert the TO_ROW to a ROW. + ROW* real_row = new ROW(row, static_cast<int16_t>(row->kern_size), + static_cast<int16_t>(row->space_size)); + WERD_IT word_it(real_row->word_list()); + WERD* word = new WERD(&cblobs, 0, nullptr); + word->set_flag(W_BOL, true); + word->set_flag(W_EOL, true); + word->set_flag(W_DONT_CHOP, one_blob); + word_it.add_after_then_move(word); + row_it.add_after_then_move(real_row); + } +} + +/** + * make_words + * + * Arrange the blobs into words. + */ +void make_words(tesseract::Textord *textord, + ICOORD page_tr, // top right + float gradient, // page skew + BLOCK_LIST *blocks, // block list + TO_BLOCK_LIST *port_blocks) { // output list + TO_BLOCK_IT block_it; // iterator + TO_BLOCK *block; // current block + + if (textord->use_cjk_fp_model()) { + compute_fixed_pitch_cjk(page_tr, port_blocks); + } else { + compute_fixed_pitch(page_tr, port_blocks, gradient, FCOORD(0.0f, -1.0f), + !bool(textord_test_landscape)); + } + textord->to_spacing(page_tr, port_blocks); + block_it.set_to_list(port_blocks); + for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { + block = block_it.data(); + make_real_words(textord, block, FCOORD(1.0f, 0.0f)); + } +} + + +/** + * @name set_row_spaces + * + * Set the min_space and max_nonspace members of the row so that + * the blobs can be arranged into words. + */ + +void set_row_spaces( //find space sizes + TO_BLOCK* block, //block to do + FCOORD rotation, //for drawing + bool testing_on //correct orientation +) { + TO_ROW *row; //current row + TO_ROW_IT row_it = block->get_rows (); + + if (row_it.empty ()) + return; //empty block + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { + row = row_it.data (); + if (row->fixed_pitch == 0) { + row->min_space = + static_cast<int32_t>(ceil (row->pr_space - + (row->pr_space - + row->pr_nonsp) * textord_words_definite_spread)); + row->max_nonspace = + static_cast<int32_t>(floor (row->pr_nonsp + + (row->pr_space - + row->pr_nonsp) * textord_words_definite_spread)); + if (testing_on && textord_show_initial_words) { + tprintf ("Assigning defaults %d non, %d space to row at %g\n", + row->max_nonspace, row->min_space, row->intercept ()); + } + row->space_threshold = (row->max_nonspace + row->min_space) / 2; + row->space_size = row->pr_space; + row->kern_size = row->pr_nonsp; + } +#ifndef GRAPHICS_DISABLED + if (textord_show_initial_words && testing_on) { + plot_word_decisions (to_win, static_cast<int16_t>(row->fixed_pitch), row); + } +#endif + } +} + + +/** + * @name row_words + * + * Compute the max nonspace and min space for the row. + */ + +int32_t row_words( //compute space size + TO_BLOCK* block, //block it came from + TO_ROW* row, //row to operate on + int32_t maxwidth, //max expected space size + FCOORD rotation, //for drawing + bool testing_on //for debug +) { + bool testing_row; //contains testpt + bool prev_valid; //if decent size + int32_t prev_x; //end of prev blob + int32_t cluster_count; //no of clusters + int32_t gap_index; //which cluster + int32_t smooth_factor; //for smoothing stats + BLOBNBOX *blob; //current blob + float lower, upper; //clustering parameters + float gaps[3]; //gap clusers + ICOORD testpt; + TBOX blob_box; //bounding box + //iterator + BLOBNBOX_IT blob_it = row->blob_list (); + STATS gap_stats (0, maxwidth); + STATS cluster_stats[4]; //clusters + + testpt = ICOORD (textord_test_x, textord_test_y); + smooth_factor = + static_cast<int32_t>(block->xheight * textord_wordstats_smooth_factor + 1.5); + // if (testing_on) + // tprintf("Row smooth factor=%d\n",smooth_factor); + prev_valid = false; + prev_x = -INT32_MAX; + testing_row = false; + for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { + blob = blob_it.data (); + blob_box = blob->bounding_box (); + if (blob_box.contains (testpt)) + testing_row = true; + gap_stats.add (blob_box.width (), 1); + } + gap_stats.clear (); + for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { + blob = blob_it.data (); + if (!blob->joined_to_prev ()) { + blob_box = blob->bounding_box (); + if (prev_valid && blob_box.left () - prev_x < maxwidth) { + gap_stats.add (blob_box.left () - prev_x, 1); + } + prev_valid = true; + prev_x = blob_box.right (); + } + } + if (gap_stats.get_total () == 0) { + row->min_space = 0; //no evidence + row->max_nonspace = 0; + return 0; + } + gap_stats.smooth (smooth_factor); + lower = row->xheight * textord_words_initial_lower; + upper = row->xheight * textord_words_initial_upper; + cluster_count = gap_stats.cluster (lower, upper, + textord_spacesize_ratioprop, 3, + cluster_stats); + while (cluster_count < 2 && ceil (lower) < floor (upper)) { + //shrink gap + upper = (upper * 3 + lower) / 4; + lower = (lower * 3 + upper) / 4; + cluster_count = gap_stats.cluster (lower, upper, + textord_spacesize_ratioprop, 3, + cluster_stats); + } + if (cluster_count < 2) { + row->min_space = 0; //no evidence + row->max_nonspace = 0; + return 0; + } + for (gap_index = 0; gap_index < cluster_count; gap_index++) + gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5); + //get medians + if (cluster_count > 2) { + if (testing_on && textord_show_initial_words) { + tprintf ("Row at %g has 3 sizes of gap:%g,%g,%g\n", + row->intercept (), + cluster_stats[1].ile (0.5), + cluster_stats[2].ile (0.5), cluster_stats[3].ile (0.5)); + } + lower = gaps[0]; + if (gaps[1] > lower) { + upper = gaps[1]; //prefer most frequent + if (upper < block->xheight * textord_words_min_minspace + && gaps[2] > gaps[1]) { + upper = gaps[2]; + } + } + else if (gaps[2] > lower + && gaps[2] >= block->xheight * textord_words_min_minspace) + upper = gaps[2]; + else if (lower >= block->xheight * textord_words_min_minspace) { + upper = lower; //not nice + lower = gaps[1]; + if (testing_on && textord_show_initial_words) { + tprintf ("Had to switch most common from lower to upper!!\n"); + gap_stats.print(); + } + } + else { + row->min_space = 0; //no evidence + row->max_nonspace = 0; + return 0; + } + } + else { + if (gaps[1] < gaps[0]) { + if (testing_on && textord_show_initial_words) { + tprintf ("Had to switch most common from lower to upper!!\n"); + gap_stats.print(); + } + lower = gaps[1]; + upper = gaps[0]; + } + else { + upper = gaps[1]; + lower = gaps[0]; + } + } + if (upper < block->xheight * textord_words_min_minspace) { + row->min_space = 0; //no evidence + row->max_nonspace = 0; + return 0; + } + if (upper * 3 < block->min_space * 2 + block->max_nonspace + || lower * 3 > block->min_space * 2 + block->max_nonspace) { + if (testing_on && textord_show_initial_words) { + tprintf ("Disagreement between block and row at %g!!\n", + row->intercept ()); + tprintf ("Lower=%g, upper=%g, Stats:\n", lower, upper); + gap_stats.print(); + } + } + row->min_space = + static_cast<int32_t>(ceil (upper - (upper - lower) * textord_words_definite_spread)); + row->max_nonspace = + static_cast<int32_t>(floor (lower + (upper - lower) * textord_words_definite_spread)); + row->space_threshold = (row->max_nonspace + row->min_space) / 2; + row->space_size = upper; + row->kern_size = lower; + if (testing_on && textord_show_initial_words) { + if (testing_row) { + tprintf ("GAP STATS\n"); + gap_stats.print(); + tprintf ("SPACE stats\n"); + cluster_stats[2].print_summary(); + tprintf ("NONSPACE stats\n"); + cluster_stats[1].print_summary(); + } + tprintf ("Row at %g has minspace=%d(%g), max_non=%d(%g)\n", + row->intercept (), row->min_space, upper, + row->max_nonspace, lower); + } + return cluster_stats[2].get_total (); +} + + +/** + * @name row_words2 + * + * Compute the max nonspace and min space for the row. + */ + +int32_t row_words2( //compute space size + TO_BLOCK* block, //block it came from + TO_ROW* row, //row to operate on + int32_t maxwidth, //max expected space size + FCOORD rotation, //for drawing + bool testing_on //for debug +) { + bool prev_valid; //if decent size + bool this_valid; //current blob big enough + int32_t prev_x; //end of prev blob + int32_t min_width; //min interesting width + int32_t valid_count; //good gaps + int32_t total_count; //total gaps + int32_t cluster_count; //no of clusters + int32_t prev_count; //previous cluster_count + int32_t gap_index; //which cluster + int32_t smooth_factor; //for smoothing stats + BLOBNBOX *blob; //current blob + float lower, upper; //clustering parameters + ICOORD testpt; + TBOX blob_box; //bounding box + //iterator + BLOBNBOX_IT blob_it = row->blob_list (); + STATS gap_stats (0, maxwidth); + //gap sizes + float gaps[BLOCK_STATS_CLUSTERS]; + STATS cluster_stats[BLOCK_STATS_CLUSTERS + 1]; + //clusters + + testpt = ICOORD (textord_test_x, textord_test_y); + smooth_factor = + static_cast<int32_t>(block->xheight * textord_wordstats_smooth_factor + 1.5); + // if (testing_on) + // tprintf("Row smooth factor=%d\n",smooth_factor); + prev_valid = false; + prev_x = -INT16_MAX; + const bool testing_row = false; + //min blob size + min_width = static_cast<int32_t>(block->pr_space); + total_count = 0; + for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) { + blob = blob_it.data (); + if (!blob->joined_to_prev ()) { + blob_box = blob->bounding_box (); + this_valid = blob_box.width () >= min_width; + if (this_valid && prev_valid + && blob_box.left () - prev_x < maxwidth) { + gap_stats.add (blob_box.left () - prev_x, 1); + } + total_count++; //count possibles + prev_x = blob_box.right (); + prev_valid = this_valid; + } + } + valid_count = gap_stats.get_total (); + if (valid_count < total_count * textord_words_minlarge) { + gap_stats.clear (); + prev_x = -INT16_MAX; + for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); + blob_it.forward ()) { + blob = blob_it.data (); + if (!blob->joined_to_prev ()) { + blob_box = blob->bounding_box (); + if (blob_box.left () - prev_x < maxwidth) { + gap_stats.add (blob_box.left () - prev_x, 1); + } + prev_x = blob_box.right (); + } + } + } + if (gap_stats.get_total () == 0) { + row->min_space = 0; //no evidence + row->max_nonspace = 0; + return 0; + } + + cluster_count = 0; + lower = block->xheight * words_initial_lower; + upper = block->xheight * words_initial_upper; + gap_stats.smooth (smooth_factor); + do { + prev_count = cluster_count; + cluster_count = gap_stats.cluster (lower, upper, + textord_spacesize_ratioprop, + BLOCK_STATS_CLUSTERS, cluster_stats); + } + while (cluster_count > prev_count && cluster_count < BLOCK_STATS_CLUSTERS); + if (cluster_count < 1) { + row->min_space = 0; + row->max_nonspace = 0; + return 0; + } + for (gap_index = 0; gap_index < cluster_count; gap_index++) + gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5); + //get medians + if (testing_on) { + tprintf ("cluster_count=%d:", cluster_count); + for (gap_index = 0; gap_index < cluster_count; gap_index++) + tprintf (" %g(%d)", gaps[gap_index], + cluster_stats[gap_index + 1].get_total ()); + tprintf ("\n"); + } + + //Try to find proportional non-space and space for row. + for (gap_index = 0; gap_index < cluster_count + && gaps[gap_index] > block->max_nonspace; gap_index++); + if (gap_index < cluster_count) + lower = gaps[gap_index]; //most frequent below + else { + if (testing_on) + tprintf ("No cluster below block threshold!, using default=%g\n", + block->pr_nonsp); + lower = block->pr_nonsp; + } + for (gap_index = 0; gap_index < cluster_count + && gaps[gap_index] <= block->max_nonspace; gap_index++); + if (gap_index < cluster_count) + upper = gaps[gap_index]; //most frequent above + else { + if (testing_on) + tprintf ("No cluster above block threshold!, using default=%g\n", + block->pr_space); + upper = block->pr_space; + } + row->min_space = + static_cast<int32_t>(ceil (upper - (upper - lower) * textord_words_definite_spread)); + row->max_nonspace = + static_cast<int32_t>(floor (lower + (upper - lower) * textord_words_definite_spread)); + row->space_threshold = (row->max_nonspace + row->min_space) / 2; + row->space_size = upper; + row->kern_size = lower; + if (testing_on) { + if (testing_row) { + tprintf ("GAP STATS\n"); + gap_stats.print(); + tprintf ("SPACE stats\n"); + cluster_stats[2].print_summary(); + tprintf ("NONSPACE stats\n"); + cluster_stats[1].print_summary(); + } + tprintf ("Row at %g has minspace=%d(%g), max_non=%d(%g)\n", + row->intercept (), row->min_space, upper, + row->max_nonspace, lower); + } + return 1; +} + + +/** + * @name make_real_words + * + * Convert a TO_BLOCK to a BLOCK. + */ + +void make_real_words( + tesseract::Textord *textord, + TO_BLOCK *block, //block to do + FCOORD rotation //for drawing + ) { + TO_ROW *row; //current row + TO_ROW_IT row_it = block->get_rows (); + ROW *real_row = nullptr; //output row + ROW_IT real_row_it = block->block->row_list (); + + if (row_it.empty ()) + return; //empty block + for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) { + row = row_it.data (); + if (row->blob_list ()->empty () && !row->rep_words.empty ()) { + real_row = make_rep_words (row, block); + } else if (!row->blob_list()->empty()) { + // In a fixed pitch document, some lines may be detected as fixed pitch + // while others don't, and will go through different path. + // For non-space delimited language like CJK, fixed pitch chop always + // leave the entire line as one word. We can force consistent chopping + // with force_make_prop_words flag. + POLY_BLOCK* pb = block->block->pdblk.poly_block(); + if (textord_chopper_test) { + real_row = textord->make_blob_words (row, rotation); + } else if (textord_force_make_prop_words || + (pb != nullptr && !pb->IsText()) || + row->pitch_decision == PITCH_DEF_PROP || + row->pitch_decision == PITCH_CORR_PROP) { + real_row = textord->make_prop_words (row, rotation); + } else if (row->pitch_decision == PITCH_DEF_FIXED || + row->pitch_decision == PITCH_CORR_FIXED) { + real_row = fixed_pitch_words (row, rotation); + } else { + ASSERT_HOST(false); + } + } + if (real_row != nullptr) { + //put row in block + real_row_it.add_after_then_move (real_row); + } + } + block->block->set_stats (block->fixed_pitch == 0, static_cast<int16_t>(block->kern_size), + static_cast<int16_t>(block->space_size), + static_cast<int16_t>(block->fixed_pitch)); + block->block->check_pitch (); +} + + +/** + * @name make_rep_words + * + * Fabricate a real row from only the repeated blob words. + * Get the xheight from the block as it may be more meaningful. + */ + +ROW *make_rep_words( //make a row + TO_ROW *row, //row to convert + TO_BLOCK *block //block it lives in + ) { + ROW *real_row; //output row + TBOX word_box; //bounding box + //iterator + WERD_IT word_it = &row->rep_words; + + if (word_it.empty ()) + return nullptr; + word_box = word_it.data ()->bounding_box (); + for (word_it.mark_cycle_pt (); !word_it.cycled_list (); word_it.forward ()) + word_box += word_it.data ()->bounding_box (); + row->xheight = block->xheight; + real_row = new ROW(row, + static_cast<int16_t>(block->kern_size), static_cast<int16_t>(block->space_size)); + word_it.set_to_list (real_row->word_list ()); + //put words in row + word_it.add_list_after (&row->rep_words); + real_row->recalc_bounding_box (); + return real_row; +} + + +/** + * @name make_real_word + * + * Construct a WERD from a given number of adjacent entries in a + * list of BLOBNBOXs. + */ + +WERD *make_real_word(BLOBNBOX_IT *box_it, //iterator + int32_t blobcount, //no of blobs to use + bool bol, //start of line + uint8_t blanks //no of blanks + ) { + C_OUTLINE_IT cout_it; + C_BLOB_LIST cblobs; + C_BLOB_IT cblob_it = &cblobs; + WERD *word; // new word + BLOBNBOX *bblob; // current blob + int32_t blobindex; // in row + + for (blobindex = 0; blobindex < blobcount; blobindex++) { + bblob = box_it->extract(); + if (bblob->joined_to_prev()) { + if (bblob->cblob() != nullptr) { + cout_it.set_to_list(cblob_it.data()->out_list()); + cout_it.move_to_last(); + cout_it.add_list_after(bblob->cblob()->out_list()); + delete bblob->cblob(); + } + } + else { + if (bblob->cblob() != nullptr) + cblob_it.add_after_then_move(bblob->cblob()); + } + delete bblob; + box_it->forward(); // next one + } + + if (blanks < 1) + blanks = 1; + + word = new WERD(&cblobs, blanks, nullptr); + + if (bol) + word->set_flag(W_BOL, true); + if (box_it->at_first()) + word->set_flag(W_EOL, true); // at end of line + + return word; +} + +} // namespace tesseract diff --git a/tesseract/src/textord/wordseg.h b/tesseract/src/textord/wordseg.h new file mode 100644 index 00000000..88e9cfdc --- /dev/null +++ b/tesseract/src/textord/wordseg.h @@ -0,0 +1,78 @@ +/********************************************************************** + * File: wordseg.h (Formerly wspace.h) + * Description: Code to segment the blobs into words. + * Author: Ray Smith + * + * (C) Copyright 1992, Hewlett-Packard Ltd. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + **********************************************************************/ + +#ifndef WORDSEG_H +#define WORDSEG_H + +#include "params.h" +#include "blobbox.h" +#include "textord.h" + +namespace tesseract { +class Tesseract; + +extern BOOL_VAR_H (textord_fp_chopping, true, "Do fixed pitch chopping"); +extern BOOL_VAR_H(textord_force_make_prop_words, false, + "Force proportional word segmentation on all rows"); +extern BOOL_VAR_H (textord_chopper_test, false, + "Chopper is being tested."); + +void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST* real_rows); +void make_words(tesseract::Textord *textord, + ICOORD page_tr, // top right + float gradient, // page skew + BLOCK_LIST *blocks, // block list + TO_BLOCK_LIST *port_blocks); // output list +void set_row_spaces( //find space sizes + TO_BLOCK* block, //block to do + FCOORD rotation, //for drawing + bool testing_on //correct orientation +); +int32_t row_words( //compute space size + TO_BLOCK* block, //block it came from + TO_ROW* row, //row to operate on + int32_t maxwidth, //max expected space size + FCOORD rotation, //for drawing + bool testing_on //for debug +); +int32_t row_words2( //compute space size + TO_BLOCK* block, //block it came from + TO_ROW* row, //row to operate on + int32_t maxwidth, //max expected space size + FCOORD rotation, //for drawing + bool testing_on //for debug +); +void make_real_words( + tesseract::Textord *textord, + TO_BLOCK *block, //block to do + FCOORD rotation //for drawing + ); +ROW *make_rep_words( //make a row + TO_ROW *row, //row to convert + TO_BLOCK *block //block it lives in + ); +WERD *make_real_word( //make a WERD + BLOBNBOX_IT* box_it, //iterator + int32_t blobcount, //no of blobs to use + bool bol, //start of line + uint8_t blanks //no of blanks +); + +} // namespace tesseract + +#endif diff --git a/tesseract/src/textord/workingpartset.cpp b/tesseract/src/textord/workingpartset.cpp new file mode 100644 index 00000000..97ce70ae --- /dev/null +++ b/tesseract/src/textord/workingpartset.cpp @@ -0,0 +1,144 @@ +/////////////////////////////////////////////////////////////////////// +// File: workingpartset.cpp +// Description: Class to hold a working set of partitions of the page +// during construction of text/image regions. +// Author: Ray Smith +// Created: Tue Ocr 28 17:21:01 PDT 2008 +// +// (C) Copyright 2008, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#include "workingpartset.h" +#include "colpartition.h" + +namespace tesseract { + +ELISTIZE(WorkingPartSet) + +// Add the partition to this WorkingPartSet. Unrelated partitions are +// stored in the order in which they are received, but if the partition +// has a SingletonPartner, make sure that it stays with its partner. +void WorkingPartSet::AddPartition(ColPartition* part) { + ColPartition* partner = part->SingletonPartner(true); + if (partner != nullptr) { + ASSERT_HOST(partner->SingletonPartner(false) == part); + } + if (latest_part_ == nullptr || partner == nullptr) { + // This partition goes at the end of the list + part_it_.move_to_last(); + } else if (latest_part_->SingletonPartner(false) != part) { + // Reposition the iterator to the correct partner, or at the end. + for (part_it_.move_to_first(); !part_it_.at_last() && + part_it_.data() != partner; + part_it_.forward()); + } + part_it_.add_after_then_move(part); + latest_part_ = part; +} + +// Make blocks out of any partitions in this WorkingPartSet, and append +// them to the end of the blocks list. bleft, tright and resolution give +// the bounds and resolution of the source image, so that blocks can be +// made to fit in the bounds. +// All ColPartitions go in the used_parts list, as they need to be kept +// around, but are no longer needed. +void WorkingPartSet::ExtractCompletedBlocks(const ICOORD& bleft, + const ICOORD& tright, + int resolution, + ColPartition_LIST* used_parts, + BLOCK_LIST* blocks, + TO_BLOCK_LIST* to_blocks) { + MakeBlocks(bleft, tright, resolution, used_parts); + BLOCK_IT block_it(blocks); + block_it.move_to_last(); + block_it.add_list_after(&completed_blocks_); + TO_BLOCK_IT to_block_it(to_blocks); + to_block_it.move_to_last(); + to_block_it.add_list_after(&to_blocks_); +} + +// Insert the given blocks at the front of the completed_blocks_ list so +// they can be kept in the correct reading order. +void WorkingPartSet::InsertCompletedBlocks(BLOCK_LIST* blocks, + TO_BLOCK_LIST* to_blocks) { + BLOCK_IT block_it(&completed_blocks_); + block_it.add_list_before(blocks); + TO_BLOCK_IT to_block_it(&to_blocks_); + to_block_it.add_list_before(to_blocks); +} + +// Make a block using lines parallel to the given vector that fit between +// the min and max coordinates specified by the ColPartitions. +// Construct a block from the given list of partitions. +void WorkingPartSet::MakeBlocks(const ICOORD& bleft, const ICOORD& tright, + int resolution, ColPartition_LIST* used_parts) { + part_it_.move_to_first(); + while (!part_it_.empty()) { + // Gather a list of ColPartitions in block_parts that will be split + // by linespacing into smaller blocks. + ColPartition_LIST block_parts; + ColPartition_IT block_it(&block_parts); + ColPartition* next_part = nullptr; + bool text_block = false; + do { + ColPartition* part = part_it_.extract(); + if (part->blob_type() == BRT_UNKNOWN || + (part->IsTextType() && part->type() != PT_TABLE)) + text_block = true; + part->set_working_set(nullptr); + part_it_.forward(); + block_it.add_after_then_move(part); + next_part = part->SingletonPartner(false); + if (part_it_.empty() || next_part != part_it_.data()) { + // Sequences of partitions can get split by titles. + next_part = nullptr; + } + // Merge adjacent blocks that are of the same type and let the + // linespacing determine the real boundaries. + if (next_part == nullptr && !part_it_.empty()) { + ColPartition* next_block_part = part_it_.data(); + const TBOX& part_box = part->bounding_box(); + const TBOX& next_box = next_block_part->bounding_box(); + + // In addition to the same type, the next box must not be above the + // current box, nor (if image) too far below. + PolyBlockType type = part->type(), next_type = next_block_part->type(); + if (ColPartition::TypesSimilar(type, next_type) && + !part->IsLineType() && !next_block_part->IsLineType() && + next_box.bottom() <= part_box.top() && + (text_block || part_box.bottom() <= next_box.top())) + next_part = next_block_part; + } + } while (!part_it_.empty() && next_part != nullptr); + if (!text_block) { + TO_BLOCK* to_block = ColPartition::MakeBlock(bleft, tright, + &block_parts, used_parts); + if (to_block != nullptr) { + TO_BLOCK_IT to_block_it(&to_blocks_); + to_block_it.add_to_end(to_block); + BLOCK_IT block_it(&completed_blocks_); + block_it.add_to_end(to_block->block); + } + } else { + // Further sub-divide text blocks where linespacing changes. + ColPartition::LineSpacingBlocks(bleft, tright, resolution, &block_parts, + used_parts, + &completed_blocks_, &to_blocks_); + } + } + part_it_.set_to_list(&part_set_); + latest_part_ = nullptr; + ASSERT_HOST(completed_blocks_.length() == to_blocks_.length()); +} + +} // namespace tesseract. diff --git a/tesseract/src/textord/workingpartset.h b/tesseract/src/textord/workingpartset.h new file mode 100644 index 00000000..6fb342aa --- /dev/null +++ b/tesseract/src/textord/workingpartset.h @@ -0,0 +1,88 @@ +/////////////////////////////////////////////////////////////////////// +// File: workingpartset.h +// Description: Class to hold a working set of partitions of the page +// during construction of text/image regions. +// Author: Ray Smith +// Created: Tue Ocr 28 17:21:01 PDT 2008 +// +// (C) Copyright 2008, Google Inc. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +/////////////////////////////////////////////////////////////////////// + +#ifndef TESSERACT_TEXTORD_WORKINGPARSET_H_ +#define TESSERACT_TEXTORD_WORKINGPARSET_H_ + +#include "blobbox.h" // For TO_BLOCK_LIST and BLOCK_LIST. +#include "colpartition.h" // For ColPartition_LIST. + +namespace tesseract { + +// WorkingPartSet holds a working set of ColPartitions during transformation +// from the grid-based storage to regions in logical reading order, and is +// therefore only used during construction of the regions. +class WorkingPartSet : public ELIST_LINK { + public: + explicit WorkingPartSet(ColPartition* column) + : column_(column), latest_part_(nullptr), part_it_(&part_set_) { + } + + // Simple accessors. + ColPartition* column() const { + return column_; + } + void set_column(ColPartition* col) { + column_ = col; + } + + // Add the partition to this WorkingPartSet. Partitions are generally + // stored in the order in which they are received, but if the partition + // has a SingletonPartner, make sure that it stays with its partner. + void AddPartition(ColPartition* part); + + // Make blocks out of any partitions in this WorkingPartSet, and append + // them to the end of the blocks list. bleft, tright and resolution give + // the bounds and resolution of the source image, so that blocks can be + // made to fit in the bounds. + // All ColPartitions go in the used_parts list, as they need to be kept + // around, but are no longer needed. + void ExtractCompletedBlocks(const ICOORD& bleft, const ICOORD& tright, + int resolution, ColPartition_LIST* used_parts, + BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks); + + // Insert the given blocks at the front of the completed_blocks_ list so + // they can be kept in the correct reading order. + void InsertCompletedBlocks(BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks); + + private: + // Convert the part_set_ into blocks, starting a new block at a break + // in partnerships, or a change in linespacing (for text). + void MakeBlocks(const ICOORD& bleft, const ICOORD& tright, int resolution, + ColPartition_LIST* used_parts); + + // The column that this working set applies to. Used by the caller. + ColPartition* column_; + // The most recently added partition. + ColPartition* latest_part_; + // All the partitions in the block that is currently under construction. + ColPartition_LIST part_set_; + // Iteratorn on part_set_ pointing to the most recent addition. + ColPartition_IT part_it_; + // The blocks that have been made so far and belong before the current block. + BLOCK_LIST completed_blocks_; + TO_BLOCK_LIST to_blocks_; +}; + +ELISTIZEH(WorkingPartSet) + +} // namespace tesseract. + +#endif // TESSERACT_TEXTORD_WORKINGPARSET_H_ |