summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'leptonica/src/bytearray.c')
-rw-r--r--leptonica/src/bytearray.c653
1 files changed, 653 insertions, 0 deletions
diff --git a/leptonica/src/bytearray.c b/leptonica/src/bytearray.c
new file mode 100644
index 00000000..ccc8dce4
--- /dev/null
+++ b/leptonica/src/bytearray.c
@@ -0,0 +1,653 @@
+/*====================================================================*
+ - Copyright (C) 2001 Leptonica. All rights reserved.
+ -
+ - Redistribution and use in source and binary forms, with or without
+ - modification, are permitted provided that the following conditions
+ - are met:
+ - 1. Redistributions of source code must retain the above copyright
+ - notice, this list of conditions and the following disclaimer.
+ - 2. Redistributions in binary form must reproduce the above
+ - copyright notice, this list of conditions and the following
+ - disclaimer in the documentation and/or other materials
+ - provided with the distribution.
+ -
+ - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
+ - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *====================================================================*/
+
+/*!
+ * \file bytearray.c
+ * <pre>
+ *
+ * Functions for handling byte arrays, in analogy with C++ 'strings'
+ *
+ * Creation, copy, clone, destruction
+ * L_BYTEA *l_byteaCreate()
+ * L_BYTEA *l_byteaInitFromMem()
+ * L_BYTEA *l_byteaInitFromFile()
+ * L_BYTEA *l_byteaInitFromStream()
+ * L_BYTEA *l_byteaCopy()
+ * void l_byteaDestroy()
+ *
+ * Accessors
+ * size_t l_byteaGetSize()
+ * l_uint8 *l_byteaGetData()
+ * l_uint8 *l_byteaCopyData()
+ *
+ * Appending
+ * l_int32 l_byteaAppendData()
+ * l_int32 l_byteaAppendString()
+ * static l_int32 l_byteaExtendArrayToSize()
+ *
+ * Join/Split
+ * l_int32 l_byteaJoin()
+ * l_int32 l_byteaSplit()
+ *
+ * Search
+ * l_int32 l_byteaFindEachSequence()
+ *
+ * Output to file
+ * l_int32 l_byteaWrite()
+ * l_int32 l_byteaWriteStream()
+ *
+ * The internal data array is always null-terminated, for ease of use
+ * in the event that it is an ascii string without null bytes.
+ * </pre>
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config_auto.h>
+#endif /* HAVE_CONFIG_H */
+
+#include <string.h>
+#include "allheaders.h"
+
+ /* Bounds on array size */
+static const l_uint32 MaxArraySize = 1000000000; /* 10^9 bytes */
+static const l_int32 InitialArraySize = 200; /*!< n'importe quoi */
+
+ /* Static function */
+static l_int32 l_byteaExtendArrayToSize(L_BYTEA *ba, size_t size);
+
+/*---------------------------------------------------------------------*
+ * Creation, copy, clone, destruction *
+ *---------------------------------------------------------------------*/
+/*!
+ * \brief l_byteaCreate()
+ *
+ * \param[in] nbytes determines initial size of data array
+ * \return l_bytea, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ * (1) The allocated array is n + 1 bytes. This allows room
+ * for null termination.
+ * </pre>
+ */
+L_BYTEA *
+l_byteaCreate(size_t nbytes)
+{
+L_BYTEA *ba;
+
+ PROCNAME("l_byteaCreate");
+
+ if (nbytes <= 0 || nbytes > MaxArraySize)
+ nbytes = InitialArraySize;
+ ba = (L_BYTEA *)LEPT_CALLOC(1, sizeof(L_BYTEA));
+ ba->data = (l_uint8 *)LEPT_CALLOC(nbytes + 1, sizeof(l_uint8));
+ if (!ba->data) {
+ l_byteaDestroy(&ba);
+ return (L_BYTEA *)ERROR_PTR("ba array not made", procName, NULL);
+ }
+ ba->nalloc = nbytes + 1;
+ ba->refcount = 1;
+ return ba;
+}
+
+
+/*!
+ * \brief l_byteaInitFromMem()
+ *
+ * \param[in] data to be copied to the array
+ * \param[in] size amount of data
+ * \return l_bytea, or NULL on error
+ */
+L_BYTEA *
+l_byteaInitFromMem(const l_uint8 *data,
+ size_t size)
+{
+L_BYTEA *ba;
+
+ PROCNAME("l_byteaInitFromMem");
+
+ if (!data)
+ return (L_BYTEA *)ERROR_PTR("data not defined", procName, NULL);
+ if (size <= 0)
+ return (L_BYTEA *)ERROR_PTR("no bytes to initialize", procName, NULL);
+ if (size > MaxArraySize)
+ return (L_BYTEA *)ERROR_PTR("size is too big", procName, NULL);
+
+ if ((ba = l_byteaCreate(size)) == NULL)
+ return (L_BYTEA *)ERROR_PTR("ba not made", procName, NULL);
+ memcpy(ba->data, data, size);
+ ba->size = size;
+ return ba;
+}
+
+
+/*!
+ * \brief l_byteaInitFromFile()
+ *
+ * \param[in] fname
+ * \return l_bytea, or NULL on error
+ */
+L_BYTEA *
+l_byteaInitFromFile(const char *fname)
+{
+FILE *fp;
+L_BYTEA *ba;
+
+ PROCNAME("l_byteaInitFromFile");
+
+ if (!fname)
+ return (L_BYTEA *)ERROR_PTR("fname not defined", procName, NULL);
+
+ if ((fp = fopenReadStream(fname)) == NULL)
+ return (L_BYTEA *)ERROR_PTR("file stream not opened", procName, NULL);
+ ba = l_byteaInitFromStream(fp);
+ fclose(fp);
+ if (!ba)
+ return (L_BYTEA *)ERROR_PTR("ba not made", procName, NULL);
+ return ba;
+}
+
+
+/*!
+ * \brief l_byteaInitFromStream()
+ *
+ * \param[in] fp file stream
+ * \return l_bytea, or NULL on error
+ */
+L_BYTEA *
+l_byteaInitFromStream(FILE *fp)
+{
+l_uint8 *data;
+size_t nbytes;
+L_BYTEA *ba;
+
+ PROCNAME("l_byteaInitFromStream");
+
+ if (!fp)
+ return (L_BYTEA *)ERROR_PTR("stream not defined", procName, NULL);
+
+ if ((data = l_binaryReadStream(fp, &nbytes)) == NULL)
+ return (L_BYTEA *)ERROR_PTR("data not read", procName, NULL);
+ if ((ba = l_byteaCreate(nbytes)) == NULL) {
+ LEPT_FREE(data);
+ return (L_BYTEA *)ERROR_PTR("ba not made", procName, NULL);
+ }
+ memcpy(ba->data, data, nbytes);
+ ba->size = nbytes;
+ LEPT_FREE(data);
+ return ba;
+}
+
+
+/*!
+ * \brief l_byteaCopy()
+ *
+ * \param[in] bas source lba
+ * \param[in] copyflag L_COPY, L_CLONE
+ * \return clone or copy of bas, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ * (1) If cloning, up the refcount and return a ptr to %bas.
+ * </pre>
+ */
+L_BYTEA *
+l_byteaCopy(L_BYTEA *bas,
+ l_int32 copyflag)
+{
+ PROCNAME("l_byteaCopy");
+
+ if (!bas)
+ return (L_BYTEA *)ERROR_PTR("bas not defined", procName, NULL);
+
+ if (copyflag == L_CLONE) {
+ bas->refcount++;
+ return bas;
+ }
+
+ return l_byteaInitFromMem(bas->data, bas->size);
+}
+
+
+/*!
+ * \brief l_byteaDestroy()
+ *
+ * \param[in,out] pba will be set to null before returning
+ * \return void
+ *
+ * <pre>
+ * Notes:
+ * (1) Decrements the ref count and, if 0, destroys the lba.
+ * (2) Always nulls the input ptr.
+ * (3) If the data has been previously removed, the lba will
+ * have been nulled, so this will do nothing.
+ * </pre>
+ */
+void
+l_byteaDestroy(L_BYTEA **pba)
+{
+L_BYTEA *ba;
+
+ PROCNAME("l_byteaDestroy");
+
+ if (pba == NULL) {
+ L_WARNING("ptr address is null!\n", procName);
+ return;
+ }
+
+ if ((ba = *pba) == NULL)
+ return;
+
+ /* Decrement the ref count. If it is 0, destroy the lba. */
+ ba->refcount--;
+ if (ba->refcount <= 0) {
+ if (ba->data) LEPT_FREE(ba->data);
+ LEPT_FREE(ba);
+ }
+ *pba = NULL;
+}
+
+
+/*---------------------------------------------------------------------*
+ * Accessors *
+ *---------------------------------------------------------------------*/
+/*!
+ * \brief l_byteaGetSize()
+ *
+ * \param[in] ba
+ * \return size of stored byte array, or 0 on error
+ */
+size_t
+l_byteaGetSize(L_BYTEA *ba)
+{
+ PROCNAME("l_byteaGetSize");
+
+ if (!ba)
+ return ERROR_INT("ba not defined", procName, 0);
+ return ba->size;
+}
+
+
+/*!
+ * \brief l_byteaGetData()
+ *
+ * \param[in] ba
+ * \param[out] psize size of data in lba
+ * \return ptr to existing data array, or NULL on error
+ *
+ * <pre>
+ * Notes:
+ * (1) The returned ptr is owned by %ba. Do not free it!
+ * </pre>
+ */
+l_uint8 *
+l_byteaGetData(L_BYTEA *ba,
+ size_t *psize)
+{
+ PROCNAME("l_byteaGetData");
+
+ if (!ba)
+ return (l_uint8 *)ERROR_PTR("ba not defined", procName, NULL);
+ if (!psize)
+ return (l_uint8 *)ERROR_PTR("&size not defined", procName, NULL);
+
+ *psize = ba->size;
+ return ba->data;
+}
+
+
+/*!
+ * \brief l_byteaCopyData()
+ *
+ * \param[in] ba
+ * \param[out] psize size of data in lba
+ * \return copy of data in use in the data array, or NULL on error.
+ *
+ * <pre>
+ * Notes:
+ * (1) The returned data is owned by the caller. The input %ba
+ * still owns the original data array.
+ * </pre>
+ */
+l_uint8 *
+l_byteaCopyData(L_BYTEA *ba,
+ size_t *psize)
+{
+l_uint8 *data;
+
+ PROCNAME("l_byteaCopyData");
+
+ if (!psize)
+ return (l_uint8 *)ERROR_PTR("&size not defined", procName, NULL);
+ *psize = 0;
+ if (!ba)
+ return (l_uint8 *)ERROR_PTR("ba not defined", procName, NULL);
+
+ data = l_byteaGetData(ba, psize);
+ return l_binaryCopy(data, *psize);
+}
+
+
+/*---------------------------------------------------------------------*
+ * Appending *
+ *---------------------------------------------------------------------*/
+/*!
+ * \brief l_byteaAppendData()
+ *
+ * \param[in] ba
+ * \param[in] newdata byte array to be appended
+ * \param[in] newbytes size of data array
+ * \return 0 if OK, 1 on error
+ */
+l_ok
+l_byteaAppendData(L_BYTEA *ba,
+ const l_uint8 *newdata,
+ size_t newbytes)
+{
+size_t size, nalloc, reqsize;
+
+ PROCNAME("l_byteaAppendData");
+
+ if (!ba)
+ return ERROR_INT("ba not defined", procName, 1);
+ if (!newdata)
+ return ERROR_INT("newdata not defined", procName, 1);
+
+ size = l_byteaGetSize(ba);
+ reqsize = size + newbytes + 1;
+ nalloc = ba->nalloc;
+ if (nalloc < reqsize) {
+ if (l_byteaExtendArrayToSize(ba, 2 * reqsize))
+ return ERROR_INT("extension failed", procName, 1);
+ }
+
+ memcpy(ba->data + size, newdata, newbytes);
+ ba->size += newbytes;
+ return 0;
+}
+
+
+/*!
+ * \brief l_byteaAppendString()
+ *
+ * \param[in] ba
+ * \param[in] str null-terminated string to be appended
+ * \return 0 if OK, 1 on error
+ */
+l_ok
+l_byteaAppendString(L_BYTEA *ba,
+ const char *str)
+{
+size_t size, len, nalloc, reqsize;
+
+ PROCNAME("l_byteaAppendString");
+
+ if (!ba)
+ return ERROR_INT("ba not defined", procName, 1);
+ if (!str)
+ return ERROR_INT("str not defined", procName, 1);
+
+ size = l_byteaGetSize(ba);
+ len = strlen(str);
+ reqsize = size + len + 1;
+ nalloc = ba->nalloc;
+ if (nalloc < reqsize) {
+ if (l_byteaExtendArrayToSize(ba, 2 * reqsize))
+ return ERROR_INT("extension failed", procName, 1);
+ }
+
+ memcpy(ba->data + size, str, len);
+ ba->size += len;
+ return 0;
+}
+
+
+/*!
+ * \brief l_byteaExtendArrayToSize()
+ *
+ * \param[in] ba
+ * \param[in] size new size of lba data array
+ * \return 0 if OK; 1 on error
+ *
+ * <pre>
+ * Notes:
+ * (1) If necessary, reallocs the byte array to %size.
+ * (2) The max buffer size is 1 GB.
+ * </pre>
+ */
+static l_int32
+l_byteaExtendArrayToSize(L_BYTEA *ba,
+ size_t size)
+{
+ PROCNAME("l_byteaExtendArrayToSize");
+
+ if (!ba)
+ return ERROR_INT("ba not defined", procName, 1);
+ if (ba->nalloc > MaxArraySize) /* belt & suspenders */
+ return ERROR_INT("ba has too many ptrs", procName, 1);
+ if (size > MaxArraySize)
+ return ERROR_INT("size > 1 GB; too large", procName, 1);
+ if (size <= ba->nalloc) {
+ L_INFO("size too small; no extension\n", procName);
+ return 0;
+ }
+
+ if ((ba->data =
+ (l_uint8 *)reallocNew((void **)&ba->data, ba->nalloc, size)) == NULL)
+ return ERROR_INT("new array not returned", procName, 1);
+ ba->nalloc = size;
+ return 0;
+}
+
+
+/*---------------------------------------------------------------------*
+ * String join/split *
+ *---------------------------------------------------------------------*/
+/*!
+ * \brief l_byteaJoin()
+ *
+ * \param[in] ba1
+ * \param[in,out] pba2 data array is added to the one in ba1;
+ * then ba2 is destroyed and its pointer is nulled.
+ * \return 0 if OK, 1 on error
+ *
+ * <pre>
+ * Notes:
+ * (1) It is a no-op, not an error, for %ba2 to be null.
+ * </pre>
+ */
+l_ok
+l_byteaJoin(L_BYTEA *ba1,
+ L_BYTEA **pba2)
+{
+l_uint8 *data2;
+size_t nbytes2;
+L_BYTEA *ba2;
+
+ PROCNAME("l_byteaJoin");
+
+ if (!ba1)
+ return ERROR_INT("ba1 not defined", procName, 1);
+ if (!pba2)
+ return ERROR_INT("&ba2 not defined", procName, 1);
+ if ((ba2 = *pba2) == NULL) return 0;
+
+ data2 = l_byteaGetData(ba2, &nbytes2);
+ l_byteaAppendData(ba1, data2, nbytes2);
+
+ l_byteaDestroy(pba2);
+ return 0;
+}
+
+
+/*!
+ * \brief l_byteaSplit()
+ *
+ * \param[in] ba1 lba to split; array bytes nulled beyond the split loc
+ * \param[in] splitloc location in ba1 to split; ba2 begins there
+ * \param[out] pba2 with data starting at splitloc
+ * \return 0 if OK, 1 on error
+ */
+l_ok
+l_byteaSplit(L_BYTEA *ba1,
+ size_t splitloc,
+ L_BYTEA **pba2)
+{
+l_uint8 *data1;
+size_t nbytes1, nbytes2;
+
+ PROCNAME("l_byteaSplit");
+
+ if (!pba2)
+ return ERROR_INT("&ba2 not defined", procName, 1);
+ *pba2 = NULL;
+ if (!ba1)
+ return ERROR_INT("ba1 not defined", procName, 1);
+
+ data1 = l_byteaGetData(ba1, &nbytes1);
+ if (splitloc >= nbytes1)
+ return ERROR_INT("splitloc invalid", procName, 1);
+ nbytes2 = nbytes1 - splitloc;
+
+ /* Make the new lba */
+ *pba2 = l_byteaInitFromMem(data1 + splitloc, nbytes2);
+
+ /* Null the removed bytes in the input lba */
+ memset(data1 + splitloc, 0, nbytes2);
+ ba1->size = splitloc;
+ return 0;
+}
+
+
+/*---------------------------------------------------------------------*
+ * Search *
+ *---------------------------------------------------------------------*/
+/*!
+ * \brief l_byteaFindEachSequence()
+ *
+ * \param[in] ba
+ * \param[in] sequence subarray of bytes to find in data
+ * \param[in] seqlen length of sequence, in bytes
+ * \param[out] pda byte positions of each occurrence of %sequence
+ * \return 0 if OK, 1 on error
+ */
+l_ok
+l_byteaFindEachSequence(L_BYTEA *ba,
+ const l_uint8 *sequence,
+ size_t seqlen,
+ L_DNA **pda)
+{
+l_uint8 *data;
+size_t size;
+
+ PROCNAME("l_byteaFindEachSequence");
+
+ if (!pda)
+ return ERROR_INT("&da not defined", procName, 1);
+ *pda = NULL;
+ if (!ba)
+ return ERROR_INT("ba not defined", procName, 1);
+ if (!sequence)
+ return ERROR_INT("sequence not defined", procName, 1);
+
+ data = l_byteaGetData(ba, &size);
+ *pda = arrayFindEachSequence(data, size, sequence, seqlen);
+ return 0;
+}
+
+
+/*---------------------------------------------------------------------*
+ * Output to file *
+ *---------------------------------------------------------------------*/
+/*!
+ * \brief l_byteaWrite()
+ *
+ * \param[in] fname output file
+ * \param[in] ba
+ * \param[in] startloc first byte to output
+ * \param[in] nbytes number of bytes to write; use 0 to write to
+ * the end of the data array
+ * \return 0 if OK, 1 on error
+ */
+l_ok
+l_byteaWrite(const char *fname,
+ L_BYTEA *ba,
+ size_t startloc,
+ size_t nbytes)
+{
+l_int32 ret;
+FILE *fp;
+
+ PROCNAME("l_byteaWrite");
+
+ if (!fname)
+ return ERROR_INT("fname not defined", procName, 1);
+ if (!ba)
+ return ERROR_INT("ba not defined", procName, 1);
+
+ if ((fp = fopenWriteStream(fname, "wb")) == NULL)
+ return ERROR_INT("stream not opened", procName, 1);
+ ret = l_byteaWriteStream(fp, ba, startloc, nbytes);
+ fclose(fp);
+ return ret;
+}
+
+
+/*!
+ * \brief l_byteaWriteStream()
+ *
+ * \param[in] fp file stream opened for binary write
+ * \param[in] ba
+ * \param[in] startloc first byte to output
+ * \param[in] nbytes number of bytes to write; use 0 to write to
+ * the end of the data array
+ * \return 0 if OK, 1 on error
+ */
+l_ok
+l_byteaWriteStream(FILE *fp,
+ L_BYTEA *ba,
+ size_t startloc,
+ size_t nbytes)
+{
+l_uint8 *data;
+size_t size, maxbytes;
+
+ PROCNAME("l_byteaWriteStream");
+
+ if (!fp)
+ return ERROR_INT("stream not defined", procName, 1);
+ if (!ba)
+ return ERROR_INT("ba not defined", procName, 1);
+
+ data = l_byteaGetData(ba, &size);
+ if (startloc >= size)
+ return ERROR_INT("invalid startloc", procName, 1);
+ maxbytes = size - startloc;
+ nbytes = (nbytes == 0) ? maxbytes : L_MIN(nbytes, maxbytes);
+
+ fwrite(data + startloc, 1, nbytes, fp);
+ return 0;
+}