summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'leptonica/prog/comparepages.c')
-rw-r--r--leptonica/prog/comparepages.c116
1 files changed, 116 insertions, 0 deletions
diff --git a/leptonica/prog/comparepages.c b/leptonica/prog/comparepages.c
new file mode 100644
index 00000000..45437356
--- /dev/null
+++ b/leptonica/prog/comparepages.c
@@ -0,0 +1,116 @@
+/*====================================================================*
+ - Copyright (C) 2001 Leptonica. All rights reserved.
+ -
+ - Redistribution and use in source and binary forms, with or without
+ - modification, are permitted provided that the following conditions
+ - are met:
+ - 1. Redistributions of source code must retain the above copyright
+ - notice, this list of conditions and the following disclaimer.
+ - 2. Redistributions in binary form must reproduce the above
+ - copyright notice, this list of conditions and the following
+ - disclaimer in the documentation and/or other materials
+ - provided with the distribution.
+ -
+ - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
+ - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *====================================================================*/
+
+/*
+ * comparepages.c
+ *
+ * This compares text pages using the location of word bounding boxes.
+ * The goal is to get a fast and robust determination for whether
+ * two pages are the same.
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config_auto.h>
+#endif /* HAVE_CONFIG_H */
+
+#include "allheaders.h"
+
+int main(int argc,
+ char **argv)
+{
+l_int32 w, h, n, same;
+BOXA *boxa1, *boxa2;
+NUMA *nai1, *nai2;
+NUMAA *naa1, *naa2;
+PIX *pixs, *pixt, *pixb1, *pixb2;
+
+ setLeptDebugOK(1);
+ lept_mkdir("lept/comp");
+
+ pixs = pixRead("lucasta.047.jpg");
+ pixb1 = pixConvertTo1(pixs, 128);
+ pixGetWordBoxesInTextlines(pixb1, 10, 10, 500, 50, &boxa1, &nai1);
+ pixt = pixDrawBoxaRandom(pixs, boxa1, 2);
+ pixDisplay(pixt, 100, 100);
+ pixWrite("/tmp/lept/comp/pixt.png", pixt, IFF_PNG);
+ naa1 = boxaExtractSortedPattern(boxa1, nai1);
+ numaaWrite("/tmp/lept/comp/naa1.naa", naa1);
+ n = numaaGetCount(naa1);
+ lept_stderr("Number of textlines = %d\n", n);
+ pixDisplay(pixb1, 300, 0);
+
+ /* Translate */
+ pixb2 = pixCreateTemplate(pixb1);
+ pixGetDimensions(pixb1, &w, &h, NULL);
+ pixRasterop(pixb2, 148, 133, w, h, PIX_SRC, pixb1, 0, 0);
+ pixDisplay(pixb2, 600, 0);
+ pixGetWordBoxesInTextlines(pixb2, 10, 10, 500, 50, &boxa2, &nai2);
+ naa2 = boxaExtractSortedPattern(boxa2, nai2);
+ numaaCompareImagesByBoxes(naa1, naa2, 5, 10, 150, 150, 20, 20, &same, 1);
+ lept_stderr("Translation. same?: %d\n\n", same);
+ boxaDestroy(&boxa2);
+ numaDestroy(&nai2);
+ pixDestroy(&pixb2);
+ numaaDestroy(&naa2);
+
+ /* Aligned part is below h/3 */
+ pixb2 = pixCreateTemplate(pixb1);
+ pixGetDimensions(pixb1, &w, &h, NULL);
+ pixRasterop(pixb2, 0, 0, w, h / 3, PIX_SRC, pixb1, 0, 2 * h / 3);
+ pixRasterop(pixb2, 0, h / 3, w, 2 * h / 3, PIX_SRC, pixb1, 0, h / 3);
+ pixDisplay(pixb2, 900, 0);
+ pixGetWordBoxesInTextlines(pixb2, 10, 10, 500, 50, &boxa2, &nai2);
+ naa2 = boxaExtractSortedPattern(boxa2, nai2);
+ numaaCompareImagesByBoxes(naa1, naa2, 5, 10, 150, 150, 20, 20, &same, 1);
+ lept_stderr("Aligned part below h/3. same?: %d\n\n", same);
+ boxaDestroy(&boxa2);
+ numaDestroy(&nai2);
+ pixDestroy(&pixb2);
+ numaaDestroy(&naa2);
+
+ /* Top and bottom switched; no aligned parts */
+ pixb2 = pixCreateTemplate(pixb1);
+ pixGetDimensions(pixb1, &w, &h, NULL);
+ pixRasterop(pixb2, 0, 0, w, h / 3, PIX_SRC, pixb1, 0, 2 * h / 3);
+ pixRasterop(pixb2, 0, h / 3, w, 2 * h / 3, PIX_SRC, pixb1, 0, 0);
+ pixDisplay(pixb2, 1200, 0);
+ pixGetWordBoxesInTextlines(pixb2, 10, 10, 500, 50, &boxa2, &nai2);
+ naa2 = boxaExtractSortedPattern(boxa2, nai2);
+ numaaCompareImagesByBoxes(naa1, naa2, 5, 10, 150, 150, 20, 20, &same, 1);
+ lept_stderr("Top/Bot switched; no alignment. Same?: %d\n", same);
+ boxaDestroy(&boxa2);
+ numaDestroy(&nai2);
+ pixDestroy(&pixb2);
+ numaaDestroy(&naa2);
+
+ boxaDestroy(&boxa1);
+ numaDestroy(&nai1);
+ pixDestroy(&pixs);
+ pixDestroy(&pixb1);
+ pixDestroy(&pixt);
+ numaaDestroy(&naa1);
+ return 0;
+}