diff options
Diffstat (limited to 'leptonica/prog/jbwords.c')
-rw-r--r-- | leptonica/prog/jbwords.c | 135 |
1 files changed, 135 insertions, 0 deletions
diff --git a/leptonica/prog/jbwords.c b/leptonica/prog/jbwords.c new file mode 100644 index 00000000..582cb415 --- /dev/null +++ b/leptonica/prog/jbwords.c @@ -0,0 +1,135 @@ +/*====================================================================* + - Copyright (C) 2001 Leptonica. All rights reserved. + - + - Redistribution and use in source and binary forms, with or without + - modification, are permitted provided that the following conditions + - are met: + - 1. Redistributions of source code must retain the above copyright + - notice, this list of conditions and the following disclaimer. + - 2. Redistributions in binary form must reproduce the above + - copyright notice, this list of conditions and the following + - disclaimer in the documentation and/or other materials + - provided with the distribution. + - + - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY + - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + *====================================================================*/ + +/* + * jbwords.c + * + * jbwords dirin thresh weight rootname [firstpage npages] + * + * dirin: directory of input pages + * reduction: 1 (full res) or 2 (half-res) + * thresh: 0.80 is a reasonable compromise between accuracy + * and number of classes, for characters + * weight: 0.6 seems to work reasonably with thresh = 0.8. + * rootname: used for naming the two output files (templates + * and c.c. data) + * firstpage: <optional> 0-based; default is 0 + * npages: <optional> use 0 for all pages; default is 0 + * + */ + +#ifdef HAVE_CONFIG_H +#include <config_auto.h> +#endif /* HAVE_CONFIG_H */ + +#include "allheaders.h" + + /* Eliminate very large "words" */ +static const l_int32 MAX_WORD_WIDTH = 500; +static const l_int32 MAX_WORD_HEIGHT = 200; + +#define BUF_SIZE 512 + + /* select additional debug output */ +#define RENDER_PAGES 1 +#define RENDER_DEBUG 1 + + +int main(int argc, + char **argv) +{ +char filename[BUF_SIZE]; +char *dirin, *rootname; +l_int32 reduction, i, firstpage, npages; +l_float32 thresh, weight; +JBDATA *data; +JBCLASSER *classer; +NUMA *natl; +PIX *pix; +PIXA *pixa, *pixadb; +static char mainName[] = "jbwords"; + + if (argc != 6 && argc != 8) + return ERROR_INT(" Syntax: jbwords dirin reduction thresh " + "weight rootname [firstpage, npages]", mainName, 1); + dirin = argv[1]; + reduction = atoi(argv[2]); + thresh = atof(argv[3]); + weight = atof(argv[4]); + rootname = argv[5]; + if (argc == 6) { + firstpage = 0; + npages = 0; + } else { + firstpage = atoi(argv[6]); + npages = atoi(argv[7]); + } + setLeptDebugOK(1); + + classer = jbWordsInTextlines(dirin, reduction, MAX_WORD_WIDTH, + MAX_WORD_HEIGHT, thresh, weight, + &natl, firstpage, npages); + + /* Save and write out the result */ + data = jbDataSave(classer); + jbDataWrite(rootname, data); + +#if RENDER_PAGES + /* Render the pages from the classifier data, and write to file. + * Use debugflag == FALSE to omit outlines of each component. */ + pixa = jbDataRender(data, FALSE); + npages = pixaGetCount(pixa); + for (i = 0; i < npages; i++) { + pix = pixaGetPix(pixa, i, L_CLONE); + snprintf(filename, BUF_SIZE, "%s.%05d", rootname, i); + lept_stderr("filename: %s\n", filename); + pixWrite(filename, pix, IFF_PNG); + pixDestroy(&pix); + } + pixaDestroy(&pixa); +#endif /* RENDER_PAGES */ + +#if RENDER_DEBUG + /* Use debugflag == TRUE to see outlines of each component. */ + pixadb = jbDataRender(data, TRUE); + /* Write the debug pages out */ + npages = pixaGetCount(pixadb); + for (i = 0; i < npages; i++) { + pix = pixaGetPix(pixadb, i, L_CLONE); + snprintf(filename, BUF_SIZE, "%s.db.%05d", rootname, i); + lept_stderr("filename: %s\n", filename); + pixWrite(filename, pix, IFF_PNG); + pixDestroy(&pix); + } + pixaDestroy(&pixadb); +#endif /* RENDER_DEBUG */ + + jbClasserDestroy(&classer); + jbDataDestroy(&data); + numaDestroy(&natl); + return 0; +} + |