diff options
Diffstat (limited to 'tesseract/src/ccstruct/seam.h')
-rw-r--r-- | tesseract/src/ccstruct/seam.h | 204 |
1 files changed, 204 insertions, 0 deletions
diff --git a/tesseract/src/ccstruct/seam.h b/tesseract/src/ccstruct/seam.h new file mode 100644 index 00000000..0871bd28 --- /dev/null +++ b/tesseract/src/ccstruct/seam.h @@ -0,0 +1,204 @@ +/****************************************************************************** + * + * File: seam.h + * Author: Mark Seaman, SW Productivity + * + * (c) Copyright 1987, Hewlett-Packard Company. + ** Licensed under the Apache License, Version 2.0 (the "License"); + ** you may not use this file except in compliance with the License. + ** You may obtain a copy of the License at + ** http://www.apache.org/licenses/LICENSE-2.0 + ** Unless required by applicable law or agreed to in writing, software + ** distributed under the License is distributed on an "AS IS" BASIS, + ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + ** See the License for the specific language governing permissions and + ** limitations under the License. + * + *****************************************************************************/ +#ifndef SEAM_H +#define SEAM_H + +// Include automatically generated configuration file if running autoconf. +#ifdef HAVE_CONFIG_H +#include "config_auto.h" +#endif + +#include "blobs.h" +#include "split.h" + +namespace tesseract { + +using PRIORITY = float; /* PRIORITY */ + +class SEAM { + public: + // A seam with no splits + SEAM(float priority, const TPOINT& location) + : priority_(priority), + location_(location), + widthp_(0), + widthn_(0), + num_splits_(0) {} + // A seam with a single split point. + SEAM(float priority, const TPOINT& location, const SPLIT& split) + : priority_(priority), + location_(location), + widthp_(0), + widthn_(0), + num_splits_(1) { + splits_[0] = split; + } + // Default copy constructor, operator= and destructor are OK! + + // Accessors. + float priority() const { return priority_; } + void set_priority(float priority) { priority_ = priority; } + bool HasAnySplits() const { return num_splits_ > 0; } + + // Returns the bounding box of all the points in the seam. + TBOX bounding_box() const; + + // Returns true if other can be combined into *this. + bool CombineableWith(const SEAM& other, int max_x_dist, + float max_total_priority) const { + int dist = location_.x - other.location_.x; + return -max_x_dist < dist && dist < max_x_dist && + num_splits_ + other.num_splits_<= kMaxNumSplits && + priority_+ other.priority_ < max_total_priority && + !OverlappingSplits(other) && !SharesPosition(other); + } + + // Combines other into *this. Only works if CombinableWith returned true. + void CombineWith(const SEAM& other) { + priority_ += other.priority_; + location_ += other.location_; + location_ /= 2; + + for (uint8_t s = 0; s < other.num_splits_ && num_splits_ < kMaxNumSplits; ++s) + splits_[num_splits_++] = other.splits_[s]; + } + + // Returns true if the given blob contains all splits of *this SEAM. + bool ContainedByBlob(const TBLOB& blob) const { + for (int s = 0; s < num_splits_; ++s) { + if (!splits_[s].ContainedByBlob(blob)) return false; + } + return true; + } + + // Returns true if the given EDGEPT is used by this SEAM, checking only + // the EDGEPT pointer, not the coordinates. + bool UsesPoint(const EDGEPT* point) const { + for (int s = 0; s < num_splits_; ++s) { + if (splits_[s].UsesPoint(point)) return true; + } + return false; + } + // Returns true if *this and other share any common point, by coordinates. + bool SharesPosition(const SEAM& other) const { + for (int s = 0; s < num_splits_; ++s) { + for (int t = 0; t < other.num_splits_; ++t) + if (splits_[s].SharesPosition(other.splits_[t])) return true; + } + return false; + } + // Returns true if *this and other have any vertically overlapping splits. + bool OverlappingSplits(const SEAM& other) const { + for (int s = 0; s < num_splits_; ++s) { + TBOX split1_box = splits_[s].bounding_box(); + for (int t = 0; t < other.num_splits_; ++t) { + TBOX split2_box = other.splits_[t].bounding_box(); + if (split1_box.y_overlap(split2_box)) return true; + } + } + return false; + } + + // Marks the edgepts used by the seam so the segments made by the cut + // never get split further by another seam in the future. + void Finalize() { + for (int s = 0; s < num_splits_; ++s) { + splits_[s].point1->MarkChop(); + splits_[s].point2->MarkChop(); + } + } + + // Returns true if the splits in *this SEAM appear OK in the sense that they + // do not cross any outlines and do not chop off any ridiculously small + // pieces. + bool IsHealthy(const TBLOB& blob, int min_points, int min_area) const; + + // Computes the widthp_/widthn_ range for all existing SEAMs and for *this + // seam, which is about to be inserted at insert_index. Returns false if + // any of the computations fails, as this indicates an invalid chop. + // widthn_/widthp_ are only changed if modify is true. + bool PrepareToInsertSeam(const GenericVector<SEAM*>& seams, + const GenericVector<TBLOB*>& blobs, int insert_index, + bool modify); + // Computes the widthp_/widthn_ range. Returns false if not all the splits + // are accounted for. widthn_/widthp_ are only changed if modify is true. + bool FindBlobWidth(const GenericVector<TBLOB*>& blobs, int index, + bool modify); + + // Splits this blob into two blobs by applying the splits included in + // *this SEAM + void ApplySeam(bool italic_blob, TBLOB* blob, TBLOB* other_blob) const; + // Undoes ApplySeam by removing the seam between these two blobs. + // Produces one blob as a result, and deletes other_blob. + void UndoSeam(TBLOB* blob, TBLOB* other_blob) const; + + // Prints everything in *this SEAM. + void Print(const char* label) const; + // Prints a collection of SEAMs. + static void PrintSeams(const char* label, const GenericVector<SEAM*>& seams); +#ifndef GRAPHICS_DISABLED + // Draws the seam in the given window. + void Mark(ScrollView* window) const; +#endif + + // Break up the blobs in this chain so that they are all independent. + // This operation should undo the affect of join_pieces. + static void BreakPieces(const GenericVector<SEAM*>& seams, + const GenericVector<TBLOB*>& blobs, int first, + int last); + // Join a group of base level pieces into a single blob that can then + // be classified. + static void JoinPieces(const GenericVector<SEAM*>& seams, + const GenericVector<TBLOB*>& blobs, int first, + int last); + + // Hides the seam so the outlines appear not to be cut by it. + void Hide() const; + // Undoes hide, so the outlines are cut by the seam. + void Reveal() const; + + // Computes and returns, but does not set, the full priority of *this SEAM. + // The arguments here are config parameters defined in Wordrec. Add chop_ + // to the beginning of the name. + float FullPriority(int xmin, int xmax, double overlap_knob, + int centered_maxwidth, double center_knob, + double width_change_knob) const; + + private: + // Maximum number of splits that a SEAM can hold. + static const uint8_t kMaxNumSplits = 3; + // Priority of this split. Lower is better. + float priority_; + // Position of the middle of the seam. + TPOINT location_; + // A range such that all splits in *this SEAM are contained within blobs in + // the range [index - widthn_,index + widthp_] where index is the index of + // this SEAM in the seams vector. + int8_t widthp_; + int8_t widthn_; + // Number of splits_ that are used. + uint8_t num_splits_; + // Set of pairs of points that are the ends of each split in the SEAM. + SPLIT splits_[kMaxNumSplits]; +}; + +void start_seam_list(TWERD* word, GenericVector<SEAM*>* seam_array); + +} // namespace tesseract + +#endif |