summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'extract/src')
-rw-r--r--extract/src/alloc.c120
-rw-r--r--extract/src/astring.c41
-rw-r--r--extract/src/astring.h23
-rw-r--r--extract/src/buffer-test.c306
-rw-r--r--extract/src/buffer.c477
-rw-r--r--extract/src/compat_stdint.h25
-rw-r--r--extract/src/compat_strtoll.h9
-rw-r--r--extract/src/compat_va_copy.h8
-rw-r--r--extract/src/document.h150
-rw-r--r--extract/src/docx.c1097
-rw-r--r--extract/src/docx.h84
-rw-r--r--extract/src/docx_template.c910
-rw-r--r--extract/src/docx_template.h17
-rwxr-xr-xextract/src/docx_template_build.py210
-rw-r--r--extract/src/extract-exe.c244
-rw-r--r--extract/src/extract.c1226
-rw-r--r--extract/src/join.c951
-rw-r--r--extract/src/mem.c51
-rw-r--r--extract/src/mem.h14
-rw-r--r--extract/src/memento.c3574
-rw-r--r--extract/src/memento.h343
-rwxr-xr-xextract/src/memento.py83
-rw-r--r--extract/src/misc-test.c86
-rw-r--r--extract/src/outf.c42
-rw-r--r--extract/src/outf.h32
-rw-r--r--extract/src/template.docxbin0 -> 14108 bytes
-rw-r--r--extract/src/xml.c505
-rw-r--r--extract/src/xml.h123
-rw-r--r--extract/src/zip-test.c224
-rw-r--r--extract/src/zip.c307
-rw-r--r--extract/src/zip.h64
31 files changed, 11346 insertions, 0 deletions
diff --git a/extract/src/alloc.c b/extract/src/alloc.c
new file mode 100644
index 00000000..dee2f99a
--- /dev/null
+++ b/extract/src/alloc.c
@@ -0,0 +1,120 @@
+#include "../include/extract_alloc.h"
+#include "memento.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+struct extract_alloc_t
+{
+ extract_realloc_fn_t realloc_fn;
+ void* realloc_state;
+ size_t exp_min_alloc_size;
+ extract_alloc_stats_t stats;
+};
+
+int extract_alloc_create(extract_realloc_fn_t realloc_fn, void* realloc_state, extract_alloc_t** palloc)
+{
+ assert(realloc_fn);
+ assert(palloc);
+ *palloc = realloc_fn(realloc_state, NULL /*ptr*/, sizeof(**palloc));
+ if (!*palloc) {
+ errno = ENOMEM;
+ return -1;
+ }
+ memset(*palloc, 0, sizeof(**palloc));
+ (*palloc)->realloc_fn = realloc_fn;
+ (*palloc)->realloc_state = realloc_state;
+ (*palloc)->exp_min_alloc_size = 0;
+ return 0;
+}
+
+void extract_alloc_destroy(extract_alloc_t** palloc)
+{
+ if (!*palloc) return;
+ (*palloc)->realloc_fn((*palloc)->realloc_state, *palloc, 0 /*newsize*/);
+ *palloc = NULL;
+}
+
+extract_alloc_stats_t* extract_alloc_stats(extract_alloc_t* alloc)
+{
+ return &alloc->stats;
+}
+
+static size_t round_up(extract_alloc_t* alloc, size_t n)
+{
+ if (alloc && alloc->exp_min_alloc_size) {
+ /* Round up to power of two. */
+ size_t ret;
+ if (n==0) return 0;
+ ret = alloc->exp_min_alloc_size;
+ for(;;) {
+ size_t ret_old;
+ if (ret >= n) return ret;
+ ret_old = ret;
+ ret *= 2;
+ assert(ret > ret_old);
+ (void) ret_old;
+ }
+ }
+ else {
+ return n;
+ }
+}
+
+int (extract_malloc)(extract_alloc_t* alloc, void** pptr, size_t size)
+{
+ void* p;
+ size = round_up(alloc, size);
+ p = (alloc) ? alloc->realloc_fn(alloc->realloc_state, NULL, size) : malloc(size);
+ *pptr = p;
+ if (!p && size)
+ {
+ if (alloc) errno = ENOMEM;
+ return -1;
+ }
+ if (alloc) alloc->stats.num_malloc += 1;
+ return 0;
+}
+
+int (extract_realloc)(extract_alloc_t* alloc, void** pptr, size_t newsize)
+{
+ void* p = (alloc) ? alloc->realloc_fn(alloc->realloc_state, *pptr, newsize) : realloc(*pptr, newsize);
+ if (!p && newsize)
+ {
+ if (alloc) errno = ENOMEM;
+ return -1;
+ }
+ *pptr = p;
+ if (alloc) alloc->stats.num_realloc += 1;
+ return 0;
+}
+
+int (extract_realloc2)(extract_alloc_t* alloc, void** pptr, size_t oldsize, size_t newsize)
+{
+ /* We ignore <oldsize> if <ptr> is NULL - allows callers to not worry about
+ edge cases e.g. with strlen+1. */
+ oldsize = (*pptr) ? round_up(alloc, oldsize) : 0;
+ newsize = round_up(alloc, newsize);
+ if (newsize == oldsize) return 0;
+ return (extract_realloc)(alloc, pptr, newsize);
+}
+
+void (extract_free)(extract_alloc_t* alloc, void** pptr)
+{
+ if (alloc) {
+ (void) alloc->realloc_fn(alloc->realloc_state, *pptr, 0);
+ }
+ else {
+ free(*pptr);
+ }
+ *pptr = NULL;
+ if (alloc) alloc->stats.num_free += 1;
+}
+
+void extract_alloc_exp_min(extract_alloc_t* alloc, size_t size)
+{
+ alloc->exp_min_alloc_size = size;
+}
diff --git a/extract/src/astring.c b/extract/src/astring.c
new file mode 100644
index 00000000..1d273c9e
--- /dev/null
+++ b/extract/src/astring.c
@@ -0,0 +1,41 @@
+#include "../include/extract_alloc.h"
+
+#include "astring.h"
+#include "memento.h"
+
+#include <stdlib.h>
+#include <string.h>
+
+
+void extract_astring_init(extract_astring_t* string)
+{
+ string->chars = NULL;
+ string->chars_num = 0;
+}
+
+void extract_astring_free(extract_alloc_t* alloc, extract_astring_t* string)
+{
+ extract_free(alloc, &string->chars);
+ extract_astring_init(string);
+}
+
+
+int extract_astring_catl(extract_alloc_t* alloc, extract_astring_t* string, const char* s, size_t s_len)
+{
+ if (extract_realloc2(alloc, &string->chars, string->chars_num+1, string->chars_num + s_len + 1)) return -1;
+ memcpy(string->chars + string->chars_num, s, s_len);
+ string->chars[string->chars_num + s_len] = 0;
+ string->chars_num += s_len;
+ return 0;
+}
+
+int extract_astring_catc(extract_alloc_t* alloc, extract_astring_t* string, char c)
+{
+ return extract_astring_catl(alloc, string, &c, 1);
+}
+
+int extract_astring_cat(extract_alloc_t* alloc, extract_astring_t* string, const char* s)
+{
+ return extract_astring_catl(alloc, string, s, strlen(s));
+}
+
diff --git a/extract/src/astring.h b/extract/src/astring.h
new file mode 100644
index 00000000..947e6587
--- /dev/null
+++ b/extract/src/astring.h
@@ -0,0 +1,23 @@
+#ifndef ARTIFEX_EXTRACT_AUTOSTRING_XML
+#define ARTIFEX_EXTRACT_AUTOSTRING_XML
+
+/* Only for internal use by extract code. */
+
+/* A simple string struct that reallocs as required. */
+typedef struct
+{
+ char* chars; /* NULL or zero-terminated. */
+ size_t chars_num; /* Length of string pointed to by .chars. */
+} extract_astring_t;
+
+void extract_astring_init(extract_astring_t* string);
+
+void extract_astring_free(extract_alloc_t* alloc, extract_astring_t* string);
+
+int extract_astring_catl(extract_alloc_t* alloc, extract_astring_t* string, const char* s, size_t s_len);
+
+int extract_astring_catc(extract_alloc_t* alloc, extract_astring_t* string, char c);
+
+int extract_astring_cat(extract_alloc_t* alloc, extract_astring_t* string, const char* s);
+
+#endif
diff --git a/extract/src/buffer-test.c b/extract/src/buffer-test.c
new file mode 100644
index 00000000..6701fbab
--- /dev/null
+++ b/extract/src/buffer-test.c
@@ -0,0 +1,306 @@
+#include "../include/extract_buffer.h"
+#include "../include/extract_alloc.h"
+
+#include "mem.h"
+#include "memento.h"
+#include "outf.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <stdlib.h>
+
+
+static int rand_int(int max)
+/* Returns random int from 0..max-1. */
+{
+ return (int) (rand() / (RAND_MAX+1.0) * max);
+}
+
+
+/* Support for an extract_buffer_t that reads from / writes to a fixed block of
+memory, with a fn_cache() that returns a randomly-sized cache each time it is
+called, and read/write functions that do random short reads and writes. */
+
+typedef struct
+{
+ extract_alloc_t* alloc;
+ char* data;
+ size_t bytes; /* Size of data[]. */
+ size_t pos; /* Current position in data[]. */
+ char cache[137];
+ int num_calls_cache;
+ int num_calls_read;
+ int num_calls_write;
+} mem_t;
+
+static int s_read(void* handle, void* destination, size_t bytes, size_t* o_actual)
+/* Does a randomised short read. */
+{
+ mem_t* r = handle;
+ size_t n = 91;
+ assert(bytes > 0);
+ r->num_calls_read += 1;
+ assert(r->pos <= r->bytes);
+ if (n > bytes) n = bytes;
+ if (n > r->bytes - r->pos) n = r->bytes - r->pos;
+ if (n) n = rand_int((int) n-1) + 1;
+ memcpy(destination, r->data + r->pos, n);
+ r->pos += n;
+ *o_actual = n;
+ return 0;
+}
+
+static int s_read_cache(void* handle, void** o_cache, size_t* o_numbytes)
+/* Returns a cache with randomised size. */
+{
+ mem_t* r = handle;
+ int n;
+ r->num_calls_cache += 1;
+ *o_cache = r->cache;
+ n = (int) (r->bytes - r->pos);
+ if (n > (int) sizeof(r->cache)) n = sizeof(r->cache);
+ if (n) n = rand_int( n - 1) + 1;
+ memcpy(r->cache, r->data + r->pos, n);
+ r->pos += n;
+ *o_cache = r->cache;
+ *o_numbytes = n;
+ return 0;
+}
+
+static void s_read_buffer_close(void* handle)
+{
+ mem_t* r = handle;
+ extract_free(r->alloc, &r->data);
+}
+
+static void s_create_read_buffer(extract_alloc_t* alloc, int bytes, mem_t* r, extract_buffer_t** o_buffer)
+/* Creates extract_buffer_t that reads from randomised data using randomised
+short reads and cache with randomised sizes. */
+{
+ int i;
+ int e;
+ if (extract_malloc(alloc, &r->data, bytes)) abort();
+ for (i=0; i<bytes; ++i) {
+ r->data[i] = (char) rand();
+ }
+ r->alloc = alloc;
+ r->bytes = bytes;
+ r->pos = 0;
+ r->num_calls_cache = 0;
+ r->num_calls_read = 0;
+ r->num_calls_write = 0;
+ e = extract_buffer_open(alloc, r, s_read, NULL /*write*/, s_read_cache, s_read_buffer_close, o_buffer);
+ assert(!e);
+}
+
+static void test_read(void)
+{
+ /* Create read buffer with randomised content. */
+ int len = 12345;
+ mem_t r;
+ char* out_buffer;
+ int out_pos;
+ int its;
+ int e;
+ extract_buffer_t* buffer;
+ s_create_read_buffer(NULL /*alloc*/, len, &r, &buffer);
+
+ /* Repeatedly read from read-buffer until we get EOF, and check we read the
+ original content. */
+ if (extract_malloc(r.alloc, &out_buffer, len)) abort();
+ out_pos = 0;
+ for (its=0;; ++its) {
+ size_t actual;
+ int n = rand_int(120)+1;
+ int e = extract_buffer_read(buffer, out_buffer + out_pos, n, &actual);
+ out_pos += (int) actual;
+ assert(out_pos == (int) extract_buffer_pos(buffer));
+ if (e == 1) break;
+ assert(!e);
+ assert(!memcmp(out_buffer, r.data, out_pos));
+ }
+ assert(out_pos == len);
+ assert(!memcmp(out_buffer, r.data, len));
+ outf("its=%i num_calls_read=%i num_calls_write=%i num_calls_cache=%i",
+ its, r.num_calls_read, r.num_calls_write, r.num_calls_cache);
+ extract_free(r.alloc, &out_buffer);
+ out_buffer = NULL;
+ e = extract_buffer_close(&buffer);
+ assert(!e);
+
+ outf("Read test passed.\n");
+}
+
+
+static int s_write(void* handle, const void* source, size_t bytes, size_t* o_actual)
+/* Does a randomised short write. */
+{
+ mem_t* r = handle;
+ int n = 61;
+ r->num_calls_write += 1;
+ if (n > (int) bytes) n = (int) bytes;
+ if (n > (int) (r->bytes - r->pos)) n = (int) (r->bytes - r->pos);
+ assert(n);
+ n = rand_int((int) n-1) + 1;
+ memcpy(r->data + r->pos, source, n);
+ r->data[r->bytes] = 0;
+ r->pos += n;
+ *o_actual = n;
+ return 0;
+}
+
+static int s_write_cache(void* handle, void** o_cache, size_t* o_numbytes)
+/* Returns a cache with randomised size. */
+{
+ mem_t* r = handle;
+ int n;
+ r->num_calls_cache += 1;
+ assert(r->bytes >= r->pos);
+ *o_cache = r->cache;
+ n = (int) (r->bytes - r->pos);
+ if (n > (int) sizeof(r->cache)) n = sizeof(r->cache);
+ if (n) n = rand_int( n - 1) + 1;
+ *o_cache = r->cache;
+ *o_numbytes = n;
+ /* We will return a zero-length cache at EOF. */
+ return 0;
+}
+
+static void s_write_buffer_close(void* handle)
+{
+ mem_t* mem = handle;
+ outf("*** freeing mem->data=%p", mem->data);
+ extract_free(mem->alloc, &mem->data);
+}
+
+static void s_create_write_buffer(extract_alloc_t* alloc, size_t bytes, mem_t* r, extract_buffer_t** o_buffer)
+/* Creates extract_buffer_t that reads from randomised data using randomised
+short reads and cache with randomised sizes. */
+{
+ int e;
+ if (extract_malloc(alloc, &r->data, bytes+1)) abort();
+ extract_bzero(r->data, bytes);
+ r->alloc = alloc;
+ r->bytes = bytes;
+ r->pos = 0;
+ r->num_calls_cache = 0;
+ r->num_calls_read = 0;
+ r->num_calls_write = 0;
+ e = extract_buffer_open(r->alloc, r, NULL /*read*/, s_write, s_write_cache, s_write_buffer_close, o_buffer);
+ assert(!e);
+}
+
+
+static void test_write(void)
+{
+ /* Create write buffer. */
+ size_t len = 12345;
+ mem_t r;
+ extract_buffer_t* buffer;
+ char* out_buffer;
+ unsigned i;
+ size_t out_pos = 0;
+ int its;
+ int e;
+
+ s_create_write_buffer(NULL /*alloc*/, len, &r, &buffer);
+
+ /* Write to read-buffer, and check it contains the original content. */
+ if (extract_malloc(r.alloc, &out_buffer, len)) abort();
+ for (i=0; i<len; ++i) {
+ out_buffer[i] = (char) ('a' + rand_int(26));
+ }
+ for (its=0;; ++its) {
+ size_t actual;
+ size_t n = rand_int(12)+1;
+ int e = extract_buffer_write(buffer, out_buffer+out_pos, n, &actual);
+ out_pos += actual;
+ assert(out_pos == extract_buffer_pos(buffer));
+ if (e == 1) break;
+ assert(!e);
+ }
+ assert(out_pos == len);
+ assert(!memcmp(out_buffer, r.data, len));
+ extract_free(r.alloc, &out_buffer);
+ outf("its=%i num_calls_read=%i num_calls_write=%i num_calls_cache=%i",
+ its, r.num_calls_read, r.num_calls_write, r.num_calls_cache);
+ e = extract_buffer_close(&buffer);
+ assert(!e);
+ outf("Write test passed.\n");
+}
+
+static void test_file(void)
+{
+ /* Check we can write 3 bytes to file. */
+ extract_buffer_t* file_buffer;
+ if (extract_buffer_open_file(NULL /*alloc*/, "test/generated/buffer-file", 1 /*writable*/, &file_buffer)) abort();
+
+ {
+ size_t n;
+ int e;
+ errno = 0;
+ e = extract_buffer_write(file_buffer, "foo", 3, &n);
+ if (e == 0 && n == 3) {}
+ else {
+ outf("extract_buffer_write() returned e=%i errno=%i n=%zi", e, errno, n);
+ abort();
+ }
+ }
+ if (extract_buffer_close(&file_buffer)) abort();
+
+ /* Check we get back expected short reads and EOF when reading from 3-byte
+ file created above. */
+ if (extract_buffer_open_file(NULL /*alloc*/, "test/generated/buffer-file", 0 /*writable*/, &file_buffer)) abort();
+
+ {
+ size_t n;
+ char buffer[10];
+ int e;
+ errno = 0;
+ e = extract_buffer_read(file_buffer, buffer, 2, &n);
+ if (e == 0 && n == 2) {}
+ else {
+ outf("extract_buffer_read() returned e=%i errno=%i n=%zi", e, errno, n);
+ abort();
+ }
+ e = extract_buffer_read(file_buffer, buffer, 3, &n);
+ if (e == 1 && n == 1) {}
+ else {
+ outf("extract_buffer_read() returned e=%i errno=%i n=%zi", e, errno, n);
+ abort();
+ }
+ e = extract_buffer_read(file_buffer, buffer, 3, &n);
+ if (e == 1 && n == 0) {}
+ else {
+ outf("extract_buffer_read() returned e=%i errno=%i n=%zi", e, errno, n);
+ abort();
+ }
+ }
+ if (extract_buffer_close(&file_buffer)) abort();
+
+ /* Check writing to read-only file buffer fails. */
+ {
+ int e;
+ char text[] = "hello world";
+ size_t actual;
+ if (extract_buffer_open_file(NULL /*alloc*/, "test/generated/buffer-file", 0 /*writable*/, &file_buffer)) {
+ abort();
+ }
+
+ e = extract_buffer_write(file_buffer, text, sizeof(text)-1, &actual);
+ outf("extract_buffer_write() on read buffer returned e=%i actual=%zi", e, actual);
+ if (e != -1 || errno != EINVAL) abort();
+ if (extract_buffer_close(&file_buffer)) abort();
+ }
+
+ outf("file buffer tests passed.\n");
+}
+
+int main(void)
+{
+ outf_verbose_set(1);
+ test_read();
+ test_write();
+ test_file();
+ return 0;
+}
diff --git a/extract/src/buffer.c b/extract/src/buffer.c
new file mode 100644
index 00000000..3fd35bfd
--- /dev/null
+++ b/extract/src/buffer.c
@@ -0,0 +1,477 @@
+#include "../include/extract_buffer.h"
+#include "../include/extract_alloc.h"
+
+#include "memento.h"
+#include "outf.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+struct extract_buffer_t
+{
+ /* First member must be extract_buffer_cache_t - required by inline
+ implementations of extract_buffer_read() and extract_buffer_write(). */
+ extract_buffer_cache_t cache;
+ extract_alloc_t* alloc;
+ void* handle;
+ extract_buffer_fn_read fn_read;
+ extract_buffer_fn_write fn_write;
+ extract_buffer_fn_cache fn_cache;
+ extract_buffer_fn_close fn_close;
+ size_t pos; /* Does not include bytes currently read/written to cache. */
+};
+
+
+extract_alloc_t* extract_buffer_alloc(extract_buffer_t* buffer)
+{
+ return buffer->alloc;
+}
+
+
+int extract_buffer_open(
+ extract_alloc_t* alloc,
+ void* handle,
+ extract_buffer_fn_read fn_read,
+ extract_buffer_fn_write fn_write,
+ extract_buffer_fn_cache fn_cache,
+ extract_buffer_fn_close fn_close,
+ extract_buffer_t** o_buffer
+ )
+{
+ int e = -1;
+ extract_buffer_t* buffer;
+ if (extract_malloc(alloc, &buffer, sizeof(*buffer))) goto end;
+
+ buffer->alloc = alloc;
+ buffer->handle = handle;
+ buffer->fn_read = fn_read;
+ buffer->fn_write = fn_write;
+ buffer->fn_cache = fn_cache;
+ buffer->fn_close = fn_close;
+ buffer->cache.cache = NULL;
+ buffer->cache.numbytes = 0;
+ buffer->cache.pos = 0;
+ buffer->pos = 0;
+ e = 0;
+
+ end:
+ if (e) {
+ extract_free(alloc, &buffer);
+ }
+ else {
+ *o_buffer = buffer;
+ }
+ return e;
+}
+
+
+size_t extract_buffer_pos(extract_buffer_t* buffer)
+{
+ size_t ret = buffer->pos;
+ if (buffer->cache.cache) {
+ ret += buffer->cache.pos;
+ }
+ return ret;
+}
+
+
+static int s_cache_flush(extract_buffer_t* buffer, size_t* o_actual)
+/* Sends contents of cache to fn_write() using a loop to cope with short
+writes. Returns with *o_actual containing the number of bytes successfully
+sent, and buffer->cache.{cache,numbytes,pos} all set to zero.
+
+If we return zero but *actual is less than original buffer->cache.numbytes,
+then fn_write returned EOF. */
+{
+ int e = -1;
+ size_t p = 0;
+ assert(buffer->cache.pos <= buffer->cache.numbytes);
+ for(;;) {
+ size_t actual;
+ if (p == buffer->cache.pos) break;
+ if (buffer->fn_write(
+ buffer->handle,
+ (char*) buffer->cache.cache + p,
+ buffer->cache.pos - p,
+ &actual
+ )) goto end;
+ buffer->pos += actual;
+ p += actual;
+ if (actual == 0) {
+ /* EOF while flushing cache. We set <pos> to the number of bytes
+ in data..+numbytes that we know have been successfully handled by
+ buffer->fn_write(). This can be negative if we failed to flush
+ earlier data. */
+ outf("*** buffer->fn_write() EOF\n");
+ e = 0;
+ goto end;
+ }
+ }
+ outfx("cache flush, buffer->pos=%i p=%i buffer->cache.pos=%i\n",
+ buffer->pos, p, buffer->cache.pos);
+ assert(p == buffer->cache.pos);
+ buffer->cache.cache = NULL;
+ buffer->cache.numbytes = 0;
+ buffer->cache.pos = 0;
+ e = 0;
+ end:
+
+ *o_actual = p;
+ return e;
+}
+
+int extract_buffer_close(extract_buffer_t** p_buffer)
+{
+ extract_buffer_t* buffer = *p_buffer;
+ int e = -1;
+
+ if (!buffer) {
+ return 0;
+ }
+
+ if (buffer->cache.cache && buffer->fn_write) {
+ /* Flush cache. */
+ size_t cache_bytes = buffer->cache.pos;
+ size_t actual;
+ if (s_cache_flush(buffer, &actual)) goto end;
+ if (actual != cache_bytes) {
+ e = +1;
+ goto end;
+ }
+ }
+ if (buffer->fn_close) buffer->fn_close(buffer->handle);
+ e = 0;
+ end:
+ extract_free(buffer->alloc, &buffer);
+ *p_buffer = NULL;
+ return e;
+}
+
+static int s_simple_cache(void* handle, void** o_cache, size_t* o_numbytes)
+{
+ /* Indicate EOF. */
+ (void) handle;
+ *o_cache = NULL;
+ *o_numbytes = 0;
+ return 0;
+}
+
+int extract_buffer_open_simple(
+ extract_alloc_t* alloc,
+ const void* data,
+ size_t numbytes,
+ void* handle,
+ extract_buffer_fn_close fn_close,
+ extract_buffer_t** o_buffer
+ )
+{
+ extract_buffer_t* buffer;
+ if (extract_malloc(alloc, &buffer, sizeof(*buffer))) return -1;
+
+ /* We need cast away the const here. data[] will be written-to if caller
+ uses us as a write buffer. */
+ buffer->alloc = alloc;
+ buffer->cache.cache = (void*) data;
+ buffer->cache.numbytes = numbytes;
+ buffer->cache.pos = 0;
+ buffer->handle = handle;
+ buffer->fn_read = NULL;
+ buffer->fn_write = NULL;
+ buffer->fn_cache = s_simple_cache;
+ buffer->fn_close = fn_close;
+ *o_buffer = buffer;
+ return 0;
+}
+
+
+/* Implementation of extract_buffer_file*. */
+
+static int s_file_read(void* handle, void* data, size_t numbytes, size_t* o_actual)
+{
+ FILE* file = handle;
+ size_t n = fread(data, 1, numbytes, file);
+ outfx("file=%p numbytes=%i => n=%zi", file, numbytes, n);
+ assert(o_actual); /* We are called by other extract_buffer fns, not by user code. */
+ *o_actual = n;
+ if (!n && ferror(file)) {
+ errno = EIO;
+ return -1;
+ }
+ return 0;
+}
+
+static int s_file_write(void* handle, const void* data, size_t numbytes, size_t* o_actual)
+{
+ FILE* file = handle;
+ size_t n = fwrite(data, 1 /*size*/, numbytes /*nmemb*/, file);
+ outfx("file=%p numbytes=%i => n=%zi", file, numbytes, n);
+ assert(o_actual); /* We are called by other extract_buffer fns, not by user code. */
+ *o_actual = n;
+ if (!n && ferror(file)) {
+ errno = EIO;
+ return -1;
+ }
+ return 0;
+}
+
+static void s_file_close(void* handle)
+{
+ FILE* file = handle;
+ if (!file) return;
+ fclose(file);
+}
+
+int extract_buffer_open_file(extract_alloc_t* alloc, const char* path, int writable, extract_buffer_t** o_buffer)
+{
+ int e = -1;
+ FILE* file = fopen(path, (writable) ? "wb" : "rb");
+ if (!file) {
+ outf("failed to open '%s': %s", path, strerror(errno));
+ goto end;
+ }
+
+ if (extract_buffer_open(
+ alloc,
+ file /*handle*/,
+ writable ? NULL : s_file_read,
+ writable ? s_file_write : NULL,
+ NULL /*fn_cache*/,
+ s_file_close,
+ o_buffer
+ )) goto end;
+ e = 0;
+
+ end:
+ if (e) {
+ if (file) fclose(file);
+ *o_buffer = NULL;
+ }
+ return e;
+}
+
+
+/* Support for read/write. */
+
+int extract_buffer_read_internal(
+ extract_buffer_t* buffer,
+ void* destination,
+ size_t numbytes,
+ size_t* o_actual
+ )
+/* Called by extract_buffer_read() if not enough space in buffer->cache. */
+{
+ int e = -1;
+ size_t pos = 0; /* Number of bytes read so far. */
+
+ /* In each iteration we either read from cache, or use buffer->fn_read()
+ directly or repopulate the cache. */
+ for(;;) {
+ size_t n;
+ if (pos == numbytes) break;
+ n = buffer->cache.numbytes - buffer->cache.pos;
+ if (n) {
+ /* There is data in cache. */
+ if (n > numbytes - pos) n = numbytes - pos;
+ memcpy((char*) destination + pos, (char*) buffer->cache.cache + buffer->cache.pos, n);
+ pos += n;
+ buffer->cache.pos += n;
+ }
+ else {
+ /* No data in cache. */
+ int use_read = 0;
+ if (buffer->fn_read) {
+ if (!buffer->fn_cache) {
+ use_read = 1;
+ }
+ else if (buffer->cache.numbytes && numbytes - pos > buffer->cache.numbytes / 2) {
+ /* This read is large compared to previously-returned
+ cache size, so let's ignore buffer->fn_cache and use
+ buffer->fn_read() directly instead. */
+ use_read = 1;
+ }
+ }
+ if (use_read) {
+ /* Use buffer->fn_read() directly, carrying on looping in case
+ of short read. */
+ size_t actual;
+ outfx("using buffer->fn_read() directly for numbytes-pos=%i\n", numbytes-pos);
+ if (buffer->fn_read(buffer->handle, (char*) destination + pos, numbytes - pos, &actual)) goto end;
+ if (actual == 0) break; /* EOF. */
+ pos += actual;
+ buffer->pos += actual;
+ }
+ else {
+ /* Repopulate cache. */
+ outfx("using buffer->fn_cache() for buffer->cache.numbytes=%i\n", buffer->cache.numbytes);
+ if (buffer->fn_cache(buffer->handle, &buffer->cache.cache, &buffer->cache.numbytes)) goto end;
+ buffer->pos += buffer->cache.pos;
+ buffer->cache.pos = 0;
+ if (buffer->cache.numbytes == 0) break; /* EOF. */
+ }
+ }
+ }
+ e = 0;
+
+ end:
+ if (o_actual) *o_actual = pos;
+ if (e == 0 && pos != numbytes) return +1; /* EOF. */
+ return e;
+}
+
+
+int extract_buffer_write_internal(
+ extract_buffer_t* buffer,
+ const void* source,
+ size_t numbytes,
+ size_t* o_actual
+ )
+{
+ int e = -1;
+ size_t pos = 0; /* Number of bytes written so far. */
+
+ if (!buffer->fn_write) {
+ errno = EINVAL;
+ return -1;
+ }
+
+ /* In each iteration we either write to cache, or use buffer->fn_write()
+ directly or flush the cache. */
+ for(;;) {
+ size_t n;
+ outfx("numbytes=%i pos=%i. buffer->cache.numbytes=%i buffer->cache.pos=%i\n",
+ numbytes, pos, buffer->cache.numbytes, buffer->cache.pos);
+ if (pos == numbytes) break;
+ n = buffer->cache.numbytes - buffer->cache.pos;
+ if (n) {
+ /* There is space in cache for writing. */
+ if (n > numbytes - pos) n = numbytes - pos;
+ outfx("writing to cache: numbytes=%i n=%i\n", numbytes, n);
+ memcpy((char*) buffer->cache.cache + buffer->cache.pos, (char*) source + pos, n);
+ pos += n;
+ buffer->cache.pos += n;
+ }
+ else {
+ /* No space left in cache. */
+ int use_write = 0;
+ outfx("cache empty. pos=%i. buffer->cache.numbytes=%i buffer->cache.pos=%i\n",
+ pos, buffer->cache.numbytes, buffer->cache.pos);
+ {
+ /* Flush the cache. */
+ size_t actual;
+ int ee;
+ size_t b = buffer->cache.numbytes;
+ ptrdiff_t delta;
+ ee = s_cache_flush(buffer, &actual);
+ assert(actual <= b);
+ delta = actual - b;
+ pos += delta;
+ buffer->pos += delta;
+ if (delta) {
+ /* We have only partially flushed the cache. This is
+ not recoverable. <pos> will be the number of bytes in
+ source..+numbytes that have been successfully flushed, and
+ could be negative if we failed to flush earlier data. */
+ outf("failed to flush. actual=%i delta=%i\n", actual, delta);
+ e = 0;
+ goto end;
+ }
+ if (ee) goto end;
+ }
+
+ if (!buffer->fn_cache) {
+ use_write = 1;
+ }
+ else if (buffer->cache.numbytes && numbytes - pos > buffer->cache.numbytes / 2) {
+ /* This write is large compared to previously-returned cache
+ size, so let's ignore the cache and call buffer->fn_write()
+ directly instead. */
+ use_write = 1;
+ }
+ if (use_write) {
+ /* Use buffer->fn_write() directly, carrying on looping in case
+ of short write. */
+ size_t actual;
+ if (buffer->fn_write(buffer->handle, (char*) source + pos, numbytes - pos, &actual)) goto end;
+ if (actual == 0) break; /* EOF. */
+ outfx("direct write numbytes-pos=%i actual=%i buffer->pos=%i => %i\n",
+ numbytes-pos, actual, buffer->pos, buffer->pos + actual);
+ pos += actual;
+ buffer->pos += actual;
+ }
+ else {
+ /* Repopulate cache. */
+ outfx("repopulating cache buffer->pos=%i", buffer->pos);
+ if (buffer->fn_cache(buffer->handle, &buffer->cache.cache, &buffer->cache.numbytes)) goto end;
+ buffer->cache.pos = 0;
+ if (buffer->cache.numbytes == 0) break; /* EOF. */
+ }
+ }
+ }
+ e = 0;
+
+ end:
+ if (o_actual) *o_actual = pos;
+ if (e == 0 && pos != numbytes) e = +1; /* EOF. */
+ return e;
+}
+
+
+static int expanding_memory_buffer_write(void* handle, const void* source, size_t numbytes, size_t* o_actual)
+{
+ /* We realloc our memory region as required. For efficiency, we also use
+ any currently-unused region of our memory buffer as an extract_buffer
+ cache. So we can be called either to 'flush the cache' (in which case we
+ don't actually copy any data) or to accept data from somewhere else (in
+ which case we need to increase the size of our memory region. */
+ extract_buffer_expanding_t* ebe = handle;
+ if ((char*) source >= ebe->data && (char*) source < ebe->data + ebe->alloc_size) {
+ /* Source is inside our memory region so we are being called by
+ extract_buffer_write_internal() to re-populate the cache. We don't
+ actually have to copy anything. */
+ assert((size_t) ((char*) source - ebe->data) == ebe->data_size);
+ assert((size_t) ((char*) source - ebe->data + numbytes) <= ebe->alloc_size);
+ ebe->data_size += numbytes;
+ }
+ else {
+ /* Data is external, so copy into our buffer. We will have already been
+ called to flush the cache. */
+ if (extract_realloc2(ebe->buffer->alloc, &ebe->data, ebe->alloc_size, ebe->data_size + numbytes)) return -1;
+ ebe->alloc_size = ebe->data_size + numbytes;
+ memcpy(ebe->data + ebe->data_size, source, numbytes);
+ ebe->data_size += numbytes;
+ }
+ *o_actual = numbytes;
+ return 0;
+}
+
+static int expanding_memory_buffer_cache(void* handle, void** o_cache, size_t* o_numbytes)
+{
+ extract_buffer_expanding_t* ebe = handle;
+ size_t delta = 4096;
+ if (extract_realloc2(ebe->buffer->alloc, &ebe->data, ebe->alloc_size, ebe->data_size + delta)) return -1;
+ ebe->alloc_size = ebe->data_size + delta;
+ *o_cache = ebe->data + ebe->data_size;
+ *o_numbytes = delta;
+ return 0;
+}
+
+int extract_buffer_expanding_create(extract_alloc_t* alloc, extract_buffer_expanding_t* ebe)
+{
+ ebe->data = NULL;
+ ebe->data_size = 0;
+ ebe->alloc_size = 0;
+ if (extract_buffer_open(
+ alloc,
+ ebe,
+ NULL /*fn_read*/,
+ expanding_memory_buffer_write,
+ expanding_memory_buffer_cache,
+ NULL /*fn_close*/,
+ &ebe->buffer
+ )) return -1;
+ return 0;
+}
diff --git a/extract/src/compat_stdint.h b/extract/src/compat_stdint.h
new file mode 100644
index 00000000..174c72ae
--- /dev/null
+++ b/extract/src/compat_stdint.h
@@ -0,0 +1,25 @@
+#ifndef ARTIFEX_EXTRACT_COMPAT_STDINT_H
+#define ARTIFEX_EXTRACT_COMPAT_STDINT_H
+
+/* Fake what we need from stdint.h on MSVS. */
+
+#if defined(_MSC_VER) && (_MSC_VER < 1700) /* MSVC older than VS2012 */
+ typedef signed char int8_t;
+ typedef short int int16_t;
+ typedef int int32_t;
+ typedef __int64 int64_t;
+ typedef unsigned char uint8_t;
+ typedef unsigned short int uint16_t;
+ typedef unsigned int uint32_t;
+ typedef unsigned __int64 uint64_t;
+ #ifndef INT64_MAX
+ #define INT64_MAX 9223372036854775807i64
+ #endif
+ #ifndef SIZE_MAX
+ #define SIZE_MAX ((size_t) -1)
+ #endif
+#else
+ #include <stdint.h>
+#endif
+
+#endif
diff --git a/extract/src/compat_strtoll.h b/extract/src/compat_strtoll.h
new file mode 100644
index 00000000..76ed3530
--- /dev/null
+++ b/extract/src/compat_strtoll.h
@@ -0,0 +1,9 @@
+#ifndef ARTIFEX_EXTRACT_COMPAT_STRTOLL_H
+#define ARTIFEX_EXTRACT_COMPAT_STRTOLL_H
+
+#if defined(_MSC_VER) && (_MSC_VER < 1800) /* MSVC older than VS2013 */
+ #define strtoll( text, end, base) (long long) _strtoi64(text, end, base)
+ #define strtoull( text, end, base) (unsigned long long) _strtoi64(text, end, base)
+#endif
+
+#endif
diff --git a/extract/src/compat_va_copy.h b/extract/src/compat_va_copy.h
new file mode 100644
index 00000000..9b9ae8dc
--- /dev/null
+++ b/extract/src/compat_va_copy.h
@@ -0,0 +1,8 @@
+#ifndef ARTIFEX_EXTRACT_COMPAT_VA_COPY_H
+#define ARTIFEX_EXTRACT_COMPAT_VA_COPY_H
+
+#if defined(_MSC_VER) && (_MSC_VER < 1800) /* MSVC older than VS2013 */
+ #define va_copy(dst, src) ((dst) = (src))
+#endif
+
+#endif
diff --git a/extract/src/document.h b/extract/src/document.h
new file mode 100644
index 00000000..7a1470e4
--- /dev/null
+++ b/extract/src/document.h
@@ -0,0 +1,150 @@
+#ifndef ARTIFEX_EXTRACT_DOCUMENT_H
+#define ARTIFEX_EXTRACT_DOCUMENT_H
+
+static const double pi = 3.141592653589793;
+
+typedef struct
+{
+ double x;
+ double y;
+} point_t;
+
+typedef struct
+{
+ double a;
+ double b;
+ double c;
+ double d;
+ double e;
+ double f;
+} matrix_t;
+
+double matrix_expansion(matrix_t m);
+
+int matrix_cmp4(const matrix_t* lhs, const matrix_t* rhs)
+;
+/* Returns zero if first four members of *lhs and *rhs are equal, otherwise
++/-1. */
+
+typedef struct
+{
+ /* (x,y) before transformation by ctm and trm. */
+ double pre_x;
+ double pre_y;
+
+ /* (x,y) after transformation by ctm and trm. */
+ double x;
+ double y;
+
+ unsigned ucs;
+ double adv;
+} char_t;
+/* A single char in a span.
+*/
+
+typedef struct
+{
+ matrix_t ctm;
+ matrix_t trm;
+ char* font_name;
+
+ /* font size is matrix_expansion(trm). */
+
+ struct {
+ unsigned font_bold : 1;
+ unsigned font_italic : 1;
+ unsigned wmode : 1;
+ };
+
+ char_t* chars;
+ int chars_num;
+} span_t;
+/* List of chars that have same font and are usually adjacent. */
+
+char_t* span_char_last(span_t* span);
+/* Returns last character in span. */
+
+int span_append_c(extract_alloc_t* alloc, span_t* span, int c);
+/* Appends new char_t to an span_t with .ucs=c and all other
+fields zeroed. */
+
+const char* span_string(extract_alloc_t* alloc, span_t* span);
+/* Returns static string containing info about span_t. */
+
+typedef struct
+{
+ span_t** spans;
+ int spans_num;
+} line_t;
+/* List of spans that are aligned on same line. */
+
+span_t* line_span_first(line_t* line);
+/* Returns first span in a line. */
+
+span_t* line_span_last(line_t* line);
+/* Returns last span in a line. */
+
+typedef struct
+{
+ line_t** lines;
+ int lines_num;
+} paragraph_t;
+/* List of lines that are aligned and adjacent to each other so as to form a
+paragraph. */
+
+typedef struct
+{
+ char* type; /* jpg, png etc. */
+ char* name; /* Name of image file within docx. */
+ char* id; /* ID of image within docx. */
+ char* data;
+ size_t data_size;
+
+ extract_image_data_free data_free;
+ void* data_free_handle;
+
+} image_t;
+/* Information about an image. <type> is as passed to extract_add_image();
+<name> and <id> are created to be unique identifiers for use in generated docx
+file. */
+
+typedef struct
+{
+ span_t** spans;
+ int spans_num;
+
+ image_t* images;
+ int images_num;
+
+ line_t** lines;
+ int lines_num;
+ /* These refer to items in .spans. Initially empty, then set by
+ extract_join(). */
+
+ paragraph_t** paragraphs;
+ int paragraphs_num;
+ /* These refer to items in .lines. Initially empty, then set
+ by extract_join(). */
+
+} page_t;
+/* A page. Contains different representations of the list of spans. */
+
+typedef struct
+{
+ page_t** pages;
+ int pages_num;
+} document_t;
+/* A list of pages. */
+
+
+typedef struct
+{
+ image_t* images;
+ int images_num;
+ char** imagetypes;
+ int imagetypes_num;
+} images_t;
+
+int extract_document_join(extract_alloc_t* alloc, document_t* document);
+
+#endif
diff --git a/extract/src/docx.c b/extract/src/docx.c
new file mode 100644
index 00000000..238e81d4
--- /dev/null
+++ b/extract/src/docx.c
@@ -0,0 +1,1097 @@
+/* These extract_docx_*() functions generate docx content and docx zip archive
+data.
+
+Caller must call things in a sensible order to create valid content -
+e.g. don't call docx_paragraph_start() twice without intervening call to
+docx_paragraph_finish(). */
+
+#include "../include/extract.h"
+
+#include "docx_template.h"
+
+#include "astring.h"
+#include "document.h"
+#include "docx.h"
+#include "mem.h"
+#include "memento.h"
+#include "outf.h"
+#include "zip.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <math.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <sys/stat.h>
+
+
+static int extract_docx_paragraph_start(extract_alloc_t* alloc, extract_astring_t* content)
+{
+ return extract_astring_cat(alloc, content, "\n\n<w:p>");
+}
+
+static int extract_docx_paragraph_finish(extract_alloc_t* alloc, extract_astring_t* content)
+{
+ return extract_astring_cat(alloc, content, "\n</w:p>");
+}
+
+static int extract_docx_run_start(
+ extract_alloc_t* alloc,
+ extract_astring_t* content,
+ const char* font_name,
+ double font_size,
+ int bold,
+ int italic
+ )
+/* Starts a new run. Caller must ensure that extract_docx_run_finish() was
+called to terminate any previous run. */
+{
+ int e = 0;
+ if (!e) e = extract_astring_cat(alloc, content, "\n<w:r><w:rPr><w:rFonts w:ascii=\"");
+ if (!e) e = extract_astring_cat(alloc, content, font_name);
+ if (!e) e = extract_astring_cat(alloc, content, "\" w:hAnsi=\"");
+ if (!e) e = extract_astring_cat(alloc, content, font_name);
+ if (!e) e = extract_astring_cat(alloc, content, "\"/>");
+ if (!e && bold) e = extract_astring_cat(alloc, content, "<w:b/>");
+ if (!e && italic) e = extract_astring_cat(alloc, content, "<w:i/>");
+ {
+ char font_size_text[32];
+ if (0) font_size = 10;
+
+ if (!e) e = extract_astring_cat(alloc, content, "<w:sz w:val=\"");
+ snprintf(font_size_text, sizeof(font_size_text), "%f", font_size * 2);
+ extract_astring_cat(alloc, content, font_size_text);
+ extract_astring_cat(alloc, content, "\"/>");
+
+ if (!e) e = extract_astring_cat(alloc, content, "<w:szCs w:val=\"");
+ snprintf(font_size_text, sizeof(font_size_text), "%f", font_size * 1.5);
+ extract_astring_cat(alloc, content, font_size_text);
+ extract_astring_cat(alloc, content, "\"/>");
+ }
+ if (!e) e = extract_astring_cat(alloc, content, "</w:rPr><w:t xml:space=\"preserve\">");
+ return e;
+
+}
+
+static int extract_docx_run_finish(extract_alloc_t* alloc, extract_astring_t* content)
+{
+ return extract_astring_cat(alloc, content, "</w:t></w:r>");
+}
+
+static int extract_docx_char_append_string(extract_alloc_t* alloc, extract_astring_t* content, const char* text)
+{
+ return extract_astring_cat(alloc, content, text);
+}
+
+static int extract_docx_char_append_stringf(extract_alloc_t* alloc, extract_astring_t* content, const char* format, ...)
+{
+ char* buffer = NULL;
+ int e;
+ va_list va;
+ va_start(va, format);
+ e = extract_vasprintf(alloc, &buffer, format, va);
+ va_end(va);
+ if (e < 0) return e;
+ e = extract_astring_cat(alloc, content, buffer);
+ extract_free(alloc, &buffer);
+ return e;
+}
+
+static int extract_docx_char_append_char(extract_alloc_t* alloc, extract_astring_t* content, char c)
+{
+ return extract_astring_catc(alloc, content, c);
+}
+
+static int extract_docx_paragraph_empty(extract_alloc_t* alloc, extract_astring_t* content)
+/* Append an empty paragraph to *content. */
+{
+ int e = -1;
+ if (extract_docx_paragraph_start(alloc, content)) goto end;
+ /* It seems like our choice of font size here doesn't make any difference
+ to the ammount of vertical space, unless we include a non-space
+ character. Presumably something to do with the styles in the template
+ document. */
+ if (extract_docx_run_start(
+ alloc,
+ content,
+ "OpenSans",
+ 10 /*font_size*/,
+ 0 /*font_bold*/,
+ 0 /*font_italic*/
+ )) goto end;
+ //docx_char_append_string(content, "&#160;"); /* &#160; is non-break space. */
+ if (extract_docx_run_finish(alloc, content)) goto end;
+ if (extract_docx_paragraph_finish(alloc, content)) goto end;
+ e = 0;
+ end:
+ return e;
+}
+
+
+/* Removes last <len> chars. */
+static int docx_char_truncate(extract_astring_t* content, int len)
+{
+ assert((size_t) len <= content->chars_num);
+ content->chars_num -= len;
+ content->chars[content->chars_num] = 0;
+ return 0;
+}
+
+static int extract_docx_char_truncate_if(extract_astring_t* content, char c)
+/* Removes last char if it is <c>. */
+{
+ if (content->chars_num && content->chars[content->chars_num-1] == c) {
+ docx_char_truncate(content, 1);
+ }
+ return 0;
+}
+
+
+static double matrices_to_font_size(matrix_t* ctm, matrix_t* trm)
+{
+ double font_size = matrix_expansion(*trm)
+ * matrix_expansion(*ctm);
+ /* Round font_size to nearest 0.01. */
+ font_size = (double) (int) (font_size * 100.0f + 0.5f) / 100.0f;
+ return font_size;
+}
+
+typedef struct
+{
+ const char* font_name;
+ double font_size;
+ int font_bold;
+ int font_italic;
+ matrix_t* ctm_prev;
+} content_state_t;
+/* Used to keep track of font information when writing paragraphs of docx
+content, e.g. so we know whether a font has changed so need to start a new docx
+span. */
+
+
+static int extract_document_to_docx_content_paragraph(
+ extract_alloc_t* alloc,
+ content_state_t* state,
+ paragraph_t* paragraph,
+ extract_astring_t* content
+ )
+/* Append docx xml for <paragraph> to <content>. Updates *state if we change
+font. */
+{
+ int e = -1;
+ int l;
+ if (extract_docx_paragraph_start(alloc, content)) goto end;
+
+ for (l=0; l<paragraph->lines_num; ++l) {
+ line_t* line = paragraph->lines[l];
+ int s;
+ for (s=0; s<line->spans_num; ++s) {
+ int si;
+ span_t* span = line->spans[s];
+ double font_size_new;
+ state->ctm_prev = &span->ctm;
+ font_size_new = matrices_to_font_size(&span->ctm, &span->trm);
+ if (!state->font_name
+ || strcmp(span->font_name, state->font_name)
+ || span->font_bold != state->font_bold
+ || span->font_italic != state->font_italic
+ || font_size_new != state->font_size
+ ) {
+ if (state->font_name) {
+ if (extract_docx_run_finish(alloc, content)) goto end;
+ }
+ state->font_name = span->font_name;
+ state->font_bold = span->font_bold;
+ state->font_italic = span->font_italic;
+ state->font_size = font_size_new;
+ if (extract_docx_run_start(
+ alloc,
+ content,
+ state->font_name,
+ state->font_size,
+ state->font_bold,
+ state->font_italic
+ )) goto end;
+ }
+
+ for (si=0; si<span->chars_num; ++si) {
+ char_t* char_ = &span->chars[si];
+ int c = char_->ucs;
+
+ if (0) {}
+
+ /* Escape XML special characters. */
+ else if (c == '<') extract_docx_char_append_string(alloc, content, "&lt;");
+ else if (c == '>') extract_docx_char_append_string(alloc, content, "&gt;");
+ else if (c == '&') extract_docx_char_append_string(alloc, content, "&amp;");
+ else if (c == '"') extract_docx_char_append_string(alloc, content, "&quot;");
+ else if (c == '\'') extract_docx_char_append_string(alloc, content, "&apos;");
+
+ /* Expand ligatures. */
+ else if (c == 0xFB00) {
+ if (extract_docx_char_append_string(alloc, content, "ff")) goto end;
+ }
+ else if (c == 0xFB01) {
+ if (extract_docx_char_append_string(alloc, content, "fi")) goto end;
+ }
+ else if (c == 0xFB02) {
+ if (extract_docx_char_append_string(alloc, content, "fl")) goto end;
+ }
+ else if (c == 0xFB03) {
+ if (extract_docx_char_append_string(alloc, content, "ffi")) goto end;
+ }
+ else if (c == 0xFB04) {
+ if (extract_docx_char_append_string(alloc, content, "ffl")) goto end;
+ }
+
+ /* Output ASCII verbatim. */
+ else if (c >= 32 && c <= 127) {
+ if (extract_docx_char_append_char(alloc, content, (char) c)) goto end;
+ }
+
+ /* Escape all other characters. */
+ else {
+ char buffer[32];
+ snprintf(buffer, sizeof(buffer), "&#x%x;", c);
+ if (extract_docx_char_append_string(alloc, content, buffer)) goto end;
+ }
+ }
+ /* Remove any trailing '-' at end of line. */
+ if (extract_docx_char_truncate_if(content, '-')) goto end;
+ }
+ }
+ if (state->font_name) {
+ if (extract_docx_run_finish(alloc, content)) goto end;
+ state->font_name = NULL;
+ }
+ if (extract_docx_paragraph_finish(alloc, content)) goto end;
+
+ e = 0;
+
+ end:
+ return e;
+}
+
+static int extract_document_append_image(
+ extract_alloc_t* alloc,
+ extract_astring_t* content,
+ image_t* image
+ )
+/* Write reference to image into docx content. */
+{
+ extract_docx_char_append_string(alloc, content, "\n");
+ extract_docx_char_append_string(alloc, content, " <w:p>\n");
+ extract_docx_char_append_string(alloc, content, " <w:r>\n");
+ extract_docx_char_append_string(alloc, content, " <w:rPr>\n");
+ extract_docx_char_append_string(alloc, content, " <w:noProof/>\n");
+ extract_docx_char_append_string(alloc, content, " </w:rPr>\n");
+ extract_docx_char_append_string(alloc, content, " <w:drawing>\n");
+ extract_docx_char_append_string(alloc, content, " <wp:inline distT=\"0\" distB=\"0\" distL=\"0\" distR=\"0\" wp14:anchorId=\"7057A832\" wp14:editId=\"466EB3FB\">\n");
+ extract_docx_char_append_string(alloc, content, " <wp:extent cx=\"2933700\" cy=\"2200275\"/>\n");
+ extract_docx_char_append_string(alloc, content, " <wp:effectExtent l=\"0\" t=\"0\" r=\"0\" b=\"9525\"/>\n");
+ extract_docx_char_append_string(alloc, content, " <wp:docPr id=\"1\" name=\"Picture 1\"/>\n");
+ extract_docx_char_append_string(alloc, content, " <wp:cNvGraphicFramePr>\n");
+ extract_docx_char_append_string(alloc, content, " <a:graphicFrameLocks xmlns:a=\"http://schemas.openxmlformats.org/drawingml/2006/main\" noChangeAspect=\"1\"/>\n");
+ extract_docx_char_append_string(alloc, content, " </wp:cNvGraphicFramePr>\n");
+ extract_docx_char_append_string(alloc, content, " <a:graphic xmlns:a=\"http://schemas.openxmlformats.org/drawingml/2006/main\">\n");
+ extract_docx_char_append_string(alloc, content, " <a:graphicData uri=\"http://schemas.openxmlformats.org/drawingml/2006/picture\">\n");
+ extract_docx_char_append_string(alloc, content, " <pic:pic xmlns:pic=\"http://schemas.openxmlformats.org/drawingml/2006/picture\">\n");
+ extract_docx_char_append_string(alloc, content, " <pic:nvPicPr>\n");
+ extract_docx_char_append_string(alloc, content, " <pic:cNvPr id=\"1\" name=\"Picture 1\"/>\n");
+ extract_docx_char_append_string(alloc, content, " <pic:cNvPicPr>\n");
+ extract_docx_char_append_string(alloc, content, " <a:picLocks noChangeAspect=\"1\" noChangeArrowheads=\"1\"/>\n");
+ extract_docx_char_append_string(alloc, content, " </pic:cNvPicPr>\n");
+ extract_docx_char_append_string(alloc, content, " </pic:nvPicPr>\n");
+ extract_docx_char_append_string(alloc, content, " <pic:blipFill>\n");
+ extract_docx_char_append_stringf(alloc, content," <a:blip r:embed=\"%s\">\n", image->id);
+ extract_docx_char_append_string(alloc, content, " <a:extLst>\n");
+ extract_docx_char_append_string(alloc, content, " <a:ext uri=\"{28A0092B-C50C-407E-A947-70E740481C1C}\">\n");
+ extract_docx_char_append_string(alloc, content, " <a14:useLocalDpi xmlns:a14=\"http://schemas.microsoft.com/office/drawing/2010/main\" val=\"0\"/>\n");
+ extract_docx_char_append_string(alloc, content, " </a:ext>\n");
+ extract_docx_char_append_string(alloc, content, " </a:extLst>\n");
+ extract_docx_char_append_string(alloc, content, " </a:blip>\n");
+ //extract_docx_char_append_string(alloc, content, " <a:srcRect/>\n");
+ extract_docx_char_append_string(alloc, content, " <a:stretch>\n");
+ extract_docx_char_append_string(alloc, content, " <a:fillRect/>\n");
+ extract_docx_char_append_string(alloc, content, " </a:stretch>\n");
+ extract_docx_char_append_string(alloc, content, " </pic:blipFill>\n");
+ extract_docx_char_append_string(alloc, content, " <pic:spPr bwMode=\"auto\">\n");
+ extract_docx_char_append_string(alloc, content, " <a:xfrm>\n");
+ extract_docx_char_append_string(alloc, content, " <a:off x=\"0\" y=\"0\"/>\n");
+ extract_docx_char_append_string(alloc, content, " <a:ext cx=\"2933700\" cy=\"2200275\"/>\n");
+ extract_docx_char_append_string(alloc, content, " </a:xfrm>\n");
+ extract_docx_char_append_string(alloc, content, " <a:prstGeom prst=\"rect\">\n");
+ extract_docx_char_append_string(alloc, content, " <a:avLst/>\n");
+ extract_docx_char_append_string(alloc, content, " </a:prstGeom>\n");
+ extract_docx_char_append_string(alloc, content, " <a:noFill/>\n");
+ extract_docx_char_append_string(alloc, content, " <a:ln>\n");
+ extract_docx_char_append_string(alloc, content, " <a:noFill/>\n");
+ extract_docx_char_append_string(alloc, content, " </a:ln>\n");
+ extract_docx_char_append_string(alloc, content, " </pic:spPr>\n");
+ extract_docx_char_append_string(alloc, content, " </pic:pic>\n");
+ extract_docx_char_append_string(alloc, content, " </a:graphicData>\n");
+ extract_docx_char_append_string(alloc, content, " </a:graphic>\n");
+ extract_docx_char_append_string(alloc, content, " </wp:inline>\n");
+ extract_docx_char_append_string(alloc, content, " </w:drawing>\n");
+ extract_docx_char_append_string(alloc, content, " </w:r>\n");
+ extract_docx_char_append_string(alloc, content, " </w:p>\n");
+ extract_docx_char_append_string(alloc, content, "\n");
+ return 0;
+}
+
+
+static int extract_document_output_rotated_paragraphs(
+ extract_alloc_t* alloc,
+ page_t* page,
+ int paragraph_begin,
+ int paragraph_end,
+ int rot,
+ int x,
+ int y,
+ int w,
+ int h,
+ int text_box_id,
+ extract_astring_t* content,
+ content_state_t* state
+ )
+/* Writes paragraph to content inside rotated text box. */
+{
+ int e = 0;
+ int p;
+ outf("x,y=%ik,%ik = %i,%i", x/1000, y/1000, x, y);
+ extract_docx_char_append_string(alloc, content, "\n");
+ extract_docx_char_append_string(alloc, content, "\n");
+ extract_docx_char_append_string(alloc, content, "<w:p>\n");
+ extract_docx_char_append_string(alloc, content, " <w:r>\n");
+ extract_docx_char_append_string(alloc, content, " <mc:AlternateContent>\n");
+ extract_docx_char_append_string(alloc, content, " <mc:Choice Requires=\"wps\">\n");
+ extract_docx_char_append_string(alloc, content, " <w:drawing>\n");
+ extract_docx_char_append_string(alloc, content, " <wp:anchor distT=\"0\" distB=\"0\" distL=\"0\" distR=\"0\" simplePos=\"0\" relativeHeight=\"0\" behindDoc=\"0\" locked=\"0\" layoutInCell=\"1\" allowOverlap=\"1\" wp14:anchorId=\"53A210D1\" wp14:editId=\"2B7E8016\">\n");
+ extract_docx_char_append_string(alloc, content, " <wp:simplePos x=\"0\" y=\"0\"/>\n");
+ extract_docx_char_append_string(alloc, content, " <wp:positionH relativeFrom=\"page\">\n");
+ extract_docx_char_append_stringf(alloc, content," <wp:posOffset>%i</wp:posOffset>\n", x);
+ extract_docx_char_append_string(alloc, content, " </wp:positionH>\n");
+ extract_docx_char_append_string(alloc, content, " <wp:positionV relativeFrom=\"page\">\n");
+ extract_docx_char_append_stringf(alloc, content," <wp:posOffset>%i</wp:posOffset>\n", y);
+ extract_docx_char_append_string(alloc, content, " </wp:positionV>\n");
+ extract_docx_char_append_stringf(alloc, content," <wp:extent cx=\"%i\" cy=\"%i\"/>\n", w, h);
+ extract_docx_char_append_string(alloc, content, " <wp:effectExtent l=\"381000\" t=\"723900\" r=\"371475\" b=\"723900\"/>\n");
+ extract_docx_char_append_string(alloc, content, " <wp:wrapNone/>\n");
+ extract_docx_char_append_stringf(alloc, content," <wp:docPr id=\"%i\" name=\"Text Box %i\"/>\n", text_box_id, text_box_id);
+ extract_docx_char_append_string(alloc, content, " <wp:cNvGraphicFramePr/>\n");
+ extract_docx_char_append_string(alloc, content, " <a:graphic xmlns:a=\"http://schemas.openxmlformats.org/drawingml/2006/main\">\n");
+ extract_docx_char_append_string(alloc, content, " <a:graphicData uri=\"http://schemas.microsoft.com/office/word/2010/wordprocessingShape\">\n");
+ extract_docx_char_append_string(alloc, content, " <wps:wsp>\n");
+ extract_docx_char_append_string(alloc, content, " <wps:cNvSpPr txBox=\"1\"/>\n");
+ extract_docx_char_append_string(alloc, content, " <wps:spPr>\n");
+ extract_docx_char_append_stringf(alloc, content," <a:xfrm rot=\"%i\">\n", rot);
+ extract_docx_char_append_string(alloc, content, " <a:off x=\"0\" y=\"0\"/>\n");
+ extract_docx_char_append_string(alloc, content, " <a:ext cx=\"3228975\" cy=\"2286000\"/>\n");
+ extract_docx_char_append_string(alloc, content, " </a:xfrm>\n");
+ extract_docx_char_append_string(alloc, content, " <a:prstGeom prst=\"rect\">\n");
+ extract_docx_char_append_string(alloc, content, " <a:avLst/>\n");
+ extract_docx_char_append_string(alloc, content, " </a:prstGeom>\n");
+
+ /* Give box a solid background. */
+ if (0) {
+ extract_docx_char_append_string(alloc, content, " <a:solidFill>\n");
+ extract_docx_char_append_string(alloc, content, " <a:schemeClr val=\"lt1\"/>\n");
+ extract_docx_char_append_string(alloc, content, " </a:solidFill>\n");
+ }
+
+ /* Draw line around box. */
+ if (0) {
+ extract_docx_char_append_string(alloc, content, " <a:ln w=\"175\">\n");
+ extract_docx_char_append_string(alloc, content, " <a:solidFill>\n");
+ extract_docx_char_append_string(alloc, content, " <a:prstClr val=\"black\"/>\n");
+ extract_docx_char_append_string(alloc, content, " </a:solidFill>\n");
+ extract_docx_char_append_string(alloc, content, " </a:ln>\n");
+ }
+
+ extract_docx_char_append_string(alloc, content, " </wps:spPr>\n");
+ extract_docx_char_append_string(alloc, content, " <wps:txbx>\n");
+ extract_docx_char_append_string(alloc, content, " <w:txbxContent>");
+
+ #if 0
+ if (0) {
+ /* Output inline text describing the rotation. */
+ extract_docx_char_append_stringf(content, "<w:p>\n"
+ "<w:r><w:rPr><w:rFonts w:ascii=\"OpenSans\" w:hAnsi=\"OpenSans\"/><w:sz w:val=\"20.000000\"/><w:szCs w:val=\"15.000000\"/></w:rPr><w:t xml:space=\"preserve\">*** rotate: %f rad, %f deg. rot=%i</w:t></w:r>\n"
+ "</w:p>\n",
+ rotate,
+ rotate * 180 / pi,
+ rot
+ );
+ }
+ #endif
+
+ /* Output paragraphs p0..p2-1. */
+ for (p=paragraph_begin; p<paragraph_end; ++p) {
+ paragraph_t* paragraph = page->paragraphs[p];
+ if (extract_document_to_docx_content_paragraph(alloc, state, paragraph, content)) goto end;
+ }
+
+ extract_docx_char_append_string(alloc, content, "\n");
+ extract_docx_char_append_string(alloc, content, " </w:txbxContent>\n");
+ extract_docx_char_append_string(alloc, content, " </wps:txbx>\n");
+ extract_docx_char_append_string(alloc, content, " <wps:bodyPr rot=\"0\" spcFirstLastPara=\"0\" vertOverflow=\"overflow\" horzOverflow=\"overflow\" vert=\"horz\" wrap=\"square\" lIns=\"91440\" tIns=\"45720\" rIns=\"91440\" bIns=\"45720\" numCol=\"1\" spcCol=\"0\" rtlCol=\"0\" fromWordArt=\"0\" anchor=\"t\" anchorCtr=\"0\" forceAA=\"0\" compatLnSpc=\"1\">\n");
+ extract_docx_char_append_string(alloc, content, " <a:prstTxWarp prst=\"textNoShape\">\n");
+ extract_docx_char_append_string(alloc, content, " <a:avLst/>\n");
+ extract_docx_char_append_string(alloc, content, " </a:prstTxWarp>\n");
+ extract_docx_char_append_string(alloc, content, " <a:noAutofit/>\n");
+ extract_docx_char_append_string(alloc, content, " </wps:bodyPr>\n");
+ extract_docx_char_append_string(alloc, content, " </wps:wsp>\n");
+ extract_docx_char_append_string(alloc, content, " </a:graphicData>\n");
+ extract_docx_char_append_string(alloc, content, " </a:graphic>\n");
+ extract_docx_char_append_string(alloc, content, " </wp:anchor>\n");
+ extract_docx_char_append_string(alloc, content, " </w:drawing>\n");
+ extract_docx_char_append_string(alloc, content, " </mc:Choice>\n");
+
+ /* This fallback is copied from a real Word document. Not sure
+ whether it works - both Libreoffice and Word use the above
+ choice. */
+ extract_docx_char_append_string(alloc, content, " <mc:Fallback>\n");
+ extract_docx_char_append_string(alloc, content, " <w:pict>\n");
+ extract_docx_char_append_string(alloc, content, " <v:shapetype w14:anchorId=\"53A210D1\" id=\"_x0000_t202\" coordsize=\"21600,21600\" o:spt=\"202\" path=\"m,l,21600r21600,l21600,xe\">\n");
+ extract_docx_char_append_string(alloc, content, " <v:stroke joinstyle=\"miter\"/>\n");
+ extract_docx_char_append_string(alloc, content, " <v:path gradientshapeok=\"t\" o:connecttype=\"rect\"/>\n");
+ extract_docx_char_append_string(alloc, content, " </v:shapetype>\n");
+ extract_docx_char_append_stringf(alloc, content," <v:shape id=\"Text Box %i\" o:spid=\"_x0000_s1026\" type=\"#_x0000_t202\" style=\"position:absolute;margin-left:71.25pt;margin-top:48.75pt;width:254.25pt;height:180pt;rotation:-2241476fd;z-index:251659264;visibility:visible;mso-wrap-style:square;mso-wrap-distance-left:9pt;mso-wrap-distance-top:0;mso-wrap-distance-right:9pt;mso-wrap-distance-bottom:0;mso-position-horizontal:absolute;mso-position-horizontal-relative:text;mso-position-vertical:absolute;mso-position-vertical-relative:text;v-text-anchor:top\" o:gfxdata=\"UEsDBBQABgAIAAAAIQC2gziS/gAAAOEBAAATAAAAW0NvbnRlbnRfVHlwZXNdLnhtbJSRQU7DMBBF&#10;90jcwfIWJU67QAgl6YK0S0CoHGBkTxKLZGx5TGhvj5O2G0SRWNoz/78nu9wcxkFMGNg6quQqL6RA&#10;0s5Y6ir5vt9lD1JwBDIwOMJKHpHlpr69KfdHjyxSmriSfYz+USnWPY7AufNIadK6MEJMx9ApD/oD&#10;OlTrorhX2lFEilmcO2RdNtjC5xDF9pCuTyYBB5bi6bQ4syoJ3g9WQ0ymaiLzg5KdCXlKLjvcW893&#10;SUOqXwnz5DrgnHtJTxOsQfEKIT7DmDSUCaxw7Rqn8787ZsmRM9e2VmPeBN4uqYvTtW7jvijg9N/y&#10;JsXecLq0q+WD6m8AAAD//wMAUEsDBBQABgAIAAAAIQA4/SH/1gAAAJQBAAALAAAAX3JlbHMvLnJl&#10;bHOkkMFqwzAMhu+DvYPRfXGawxijTi+j0GvpHsDYimMaW0Yy2fr2M4PBMnrbUb/Q94l/f/hMi1qR&#10;JVI2sOt6UJgd+ZiDgffL8ekFlFSbvV0oo4EbChzGx4f9GRdb25HMsYhqlCwG5lrLq9biZkxWOiqY&#10;22YiTra2kYMu1l1tQD30/bPm3wwYN0x18gb45AdQl1tp5j/sFB2T0FQ7R0nTNEV3j6o9feQzro1i&#10;OWA14Fm+Q8a1a8+Bvu/d/dMb2JY5uiPbhG/ktn4cqGU/er3pcvwCAAD//wMAUEsDBBQABgAIAAAA&#10;IQDQg5pQVgIAALEEAAAOAAAAZHJzL2Uyb0RvYy54bWysVE1v2zAMvQ/YfxB0X+2k+WiDOEXWosOA&#10;oi3QDj0rstwYk0VNUmJ3v35PipMl3U7DLgJFPj+Rj6TnV12j2VY5X5Mp+OAs50wZSWVtXgv+7fn2&#10;0wVnPghTCk1GFfxNeX61+Phh3tqZGtKadKkcA4nxs9YWfB2CnWWZl2vVCH9GVhkEK3KNCLi616x0&#10;ogV7o7Nhnk+yllxpHUnlPbw3uyBfJP6qUjI8VJVXgemCI7eQTpfOVTyzxVzMXp2w61r2aYh/yKIR&#10;tcGjB6obEQTbuPoPqqaWjjxV4UxSk1FV1VKlGlDNIH9XzdNaWJVqgTjeHmTy/49W3m8fHatL9I4z&#10;Ixq06Fl1gX2mjg2iOq31M4CeLGChgzsie7+HMxbdVa5hjiDu4HI8ml5MpkkLVMcAh+xvB6kjt4Tz&#10;fDi8uJyOOZOIwZ7keWpGtmOLrNb58EVRw6JRcIdeJlqxvfMBGQC6h0S4J12Xt7XW6RLnR11rx7YC&#10;ndch5YwvTlDasLbgk/NxnohPYpH68P1KC/k9Vn3KgJs2cEaNdlpEK3SrrhdoReUbdEvSQAZv5W0N&#10;3jvhw6NwGDQ4sTzhAUelCclQb3G2Jvfzb/6IR/8R5azF4Bbc/9gIpzjTXw0m43IwGsVJT5fReDrE&#10;xR1HVscRs2muCQqh+8gumREf9N6sHDUv2LFlfBUhYSTeLnjYm9dht07YUamWywTCbFsR7syTlZF6&#10;383n7kU42/czYBTuaT/iYvaurTts/NLQchOoqlPPo8A7VXvdsRepLf0Ox8U7vifU7z/N4hcAAAD/&#10;/wMAUEsDBBQABgAIAAAAIQBh17L63wAAAAoBAAAPAAAAZHJzL2Rvd25yZXYueG1sTI9BT4NAEIXv&#10;Jv6HzZh4s0ubgpayNIboSW3Syg9Y2BGI7CyyS0v99Y4nPU3ezMub72W72fbihKPvHClYLiIQSLUz&#10;HTUKyvfnuwcQPmgyuneECi7oYZdfX2U6Ne5MBzwdQyM4hHyqFbQhDKmUvm7Rar9wAxLfPtxodWA5&#10;NtKM+szhtperKEqk1R3xh1YPWLRYfx4nq8APVfz9VQxPb+WUNC+vZbGPDhelbm/mxy2IgHP4M8Mv&#10;PqNDzkyVm8h40bNer2K2Ktjc82RDEi+5XKVgHfNG5pn8XyH/AQAA//8DAFBLAQItABQABgAIAAAA&#10;IQC2gziS/gAAAOEBAAATAAAAAAAAAAAAAAAAAAAAAABbQ29udGVudF9UeXBlc10ueG1sUEsBAi0A&#10;FAAGAAgAAAAhADj9If/WAAAAlAEAAAsAAAAAAAAAAAAAAAAALwEAAF9yZWxzLy5yZWxzUEsBAi0A&#10;FAAGAAgAAAAhANCDmlBWAgAAsQQAAA4AAAAAAAAAAAAAAAAALgIAAGRycy9lMm9Eb2MueG1sUEsB&#10;Ai0AFAAGAAgAAAAhAGHXsvrfAAAACgEAAA8AAAAAAAAAAAAAAAAAsAQAAGRycy9kb3ducmV2Lnht&#10;bFBLBQYAAAAABAAEAPMAAAC8BQAAAAA=&#10;\" fillcolor=\"white [3201]\" strokeweight=\".5pt\">\n", text_box_id);
+ extract_docx_char_append_string(alloc, content, " <v:textbox>\n");
+ extract_docx_char_append_string(alloc, content, " <w:txbxContent>");
+
+ for (p=paragraph_begin; p<paragraph_end; ++p) {
+ paragraph_t* paragraph = page->paragraphs[p];
+ if (extract_document_to_docx_content_paragraph(alloc, state, paragraph, content)) goto end;
+ }
+
+ extract_docx_char_append_string(alloc, content, "\n");
+ extract_docx_char_append_string(alloc, content, "\n");
+ extract_docx_char_append_string(alloc, content, " </w:txbxContent>\n");
+ extract_docx_char_append_string(alloc, content, " </v:textbox>\n");
+ extract_docx_char_append_string(alloc, content, " </v:shape>\n");
+ extract_docx_char_append_string(alloc, content, " </w:pict>\n");
+ extract_docx_char_append_string(alloc, content, " </mc:Fallback>\n");
+ extract_docx_char_append_string(alloc, content, " </mc:AlternateContent>\n");
+ extract_docx_char_append_string(alloc, content, " </w:r>\n");
+ extract_docx_char_append_string(alloc, content, "</w:p>");
+ e = 0;
+ end:
+ return e;
+}
+
+
+int extract_document_to_docx_content(
+ extract_alloc_t* alloc,
+ document_t* document,
+ int spacing,
+ int rotation,
+ int images,
+ extract_astring_t* content
+ )
+{
+ int ret = -1;
+ int text_box_id = 0;
+ int p;
+
+ /* Write paragraphs into <content>. */
+ for (p=0; p<document->pages_num; ++p) {
+ page_t* page = document->pages[p];
+ int p;
+ content_state_t state;
+ state.font_name = NULL;
+ state.font_size = 0;
+ state.font_bold = 0;
+ state.font_italic = 0;
+ state.ctm_prev = NULL;
+
+ for (p=0; p<page->paragraphs_num; ++p) {
+ paragraph_t* paragraph = page->paragraphs[p];
+ const matrix_t* ctm = &paragraph->lines[0]->spans[0]->ctm;
+ double rotate = atan2(ctm->b, ctm->a);
+
+ if (spacing
+ && state.ctm_prev
+ && paragraph->lines_num
+ && paragraph->lines[0]->spans_num
+ && matrix_cmp4(
+ state.ctm_prev,
+ &paragraph->lines[0]->spans[0]->ctm
+ )
+ ) {
+ /* Extra vertical space between paragraphs that were at
+ different angles in the original document. */
+ if (extract_docx_paragraph_empty(alloc, content)) goto end;
+ }
+
+ if (spacing) {
+ /* Extra vertical space between paragraphs. */
+ if (extract_docx_paragraph_empty(alloc, content)) goto end;
+ }
+
+ if (rotation && rotate != 0) {
+
+ /* Find extent of paragraphs with this same rotation. extent
+ will contain max width and max height of paragraphs, in units
+ before application of ctm, i.e. before rotation. */
+ point_t extent = {0, 0};
+ int p0 = p;
+ int p1;
+
+ outf("rotate=%.2frad=%.1fdeg ctm: ef=(%f %f) abcd=(%f %f %f %f)",
+ rotate, rotate * 180 / pi,
+ ctm->e,
+ ctm->f,
+ ctm->a,
+ ctm->b,
+ ctm->c,
+ ctm->d
+ );
+
+ {
+ /* We assume that first span is at origin of text
+ block. This assumes left-to-right text. */
+ double rotate0 = rotate;
+ const matrix_t* ctm0 = ctm;
+ point_t origin = {
+ paragraph->lines[0]->spans[0]->chars[0].x,
+ paragraph->lines[0]->spans[0]->chars[0].y
+ };
+ matrix_t ctm_inverse = {1, 0, 0, 1, 0, 0};
+ double ctm_det = ctm->a*ctm->d - ctm->b*ctm->c;
+ if (ctm_det != 0) {
+ ctm_inverse.a = +ctm->d / ctm_det;
+ ctm_inverse.b = -ctm->b / ctm_det;
+ ctm_inverse.c = -ctm->c / ctm_det;
+ ctm_inverse.d = +ctm->a / ctm_det;
+ }
+ else {
+ outf("cannot invert ctm=(%f %f %f %f)",
+ ctm->a, ctm->b, ctm->c, ctm->d);
+ }
+
+ for (p=p0; p<page->paragraphs_num; ++p) {
+ paragraph = page->paragraphs[p];
+ ctm = &paragraph->lines[0]->spans[0]->ctm;
+ rotate = atan2(ctm->b, ctm->a);
+ if (rotate != rotate0) {
+ break;
+ }
+
+ /* Update <extent>. */
+ {
+ int l;
+ for (l=0; l<paragraph->lines_num; ++l) {
+ line_t* line = paragraph->lines[l];
+ span_t* span = line_span_last(line);
+ char_t* char_ = span_char_last(span);
+ double adv = char_->adv * matrix_expansion(span->trm);
+ double x = char_->x + adv * cos(rotate);
+ double y = char_->y + adv * sin(rotate);
+
+ double dx = x - origin.x;
+ double dy = y - origin.y;
+
+ /* Position relative to origin and before box rotation. */
+ double xx = ctm_inverse.a * dx + ctm_inverse.b * dy;
+ double yy = ctm_inverse.c * dx + ctm_inverse.d * dy;
+ yy = -yy;
+ if (xx > extent.x) extent.x = xx;
+ if (yy > extent.y) extent.y = yy;
+ if (0) outf("rotate=%f p=%i: origin=(%f %f) xy=(%f %f) dxy=(%f %f) xxyy=(%f %f) span: %s",
+ rotate, p, origin.x, origin.y, x, y, dx, dy, xx, yy, span_string(alloc, span));
+ }
+ }
+ }
+ p1 = p;
+ rotate = rotate0;
+ ctm = ctm0;
+ outf("rotate=%f p0=%i p1=%i. extent is: (%f %f)",
+ rotate, p0, p1, extent.x, extent.y);
+ }
+
+ /* Paragraphs p0..p1-1 have same rotation. We output them into
+ a single rotated text box. */
+
+ /* We need unique id for text box. */
+ text_box_id += 1;
+
+ {
+ /* Angles are in units of 1/60,000 degree. */
+ int rot = (int) (rotate * 180 / pi * 60000);
+
+ /* <wp:anchor distT=\.. etc are in EMU - 1/360,000 of a cm.
+ relativeHeight is z-ordering. (wp:positionV:wp:posOffset,
+ wp:positionV:wp:posOffset) is position of origin of box in
+ EMU.
+
+ The box rotates about its centre but we want to rotate
+ about the origin (top-left). So we correct the position of
+ box by subtracting the vector that the top-left moves when
+ rotated by angle <rotate> about the middle. */
+ double point_to_emu = 12700; /* https://en.wikipedia.org/wiki/Office_Open_XML_file_formats#DrawingML */
+ int x = (int) (ctm->e * point_to_emu);
+ int y = (int) (ctm->f * point_to_emu);
+ int w = (int) (extent.x * point_to_emu);
+ int h = (int) (extent.y * point_to_emu);
+ int dx;
+ int dy;
+
+ if (0) outf("rotate: %f rad, %f deg. rot=%i", rotate, rotate*180/pi, rot);
+
+ h *= 2;
+ /* We can't predict how much space Word will actually
+ require for the rotated text, so make the box have the
+ original width but allow text to take extra vertical
+ space. There doesn't seem to be a way to make the text box
+ auto-grow to contain the text. */
+
+ dx = (int) ((1-cos(rotate)) * w / 2.0 + sin(rotate) * h / 2.0);
+ dy = (int) ((cos(rotate)-1) * h / 2.0 + sin(rotate) * w / 2.0);
+ outf("ctm->e,f=%f,%f rotate=%f => x,y=%ik %ik dx,dy=%ik %ik",
+ ctm->e,
+ ctm->f,
+ rotate * 180/pi,
+ x/1000,
+ y/1000,
+ dx/1000,
+ dy/1000
+ );
+ x -= dx;
+ y -= -dy;
+
+ if (extract_document_output_rotated_paragraphs(alloc, page, p0, p1, rot, x, y, w, h, text_box_id, content, &state)) goto end;
+ }
+ p = p1 - 1;
+ //p = page->paragraphs_num - 1;
+ }
+ else {
+ if (extract_document_to_docx_content_paragraph(alloc, &state, paragraph, content)) goto end;
+ }
+
+ }
+
+ if (images) {
+ int i;
+ for (i=0; i<page->images_num; ++i) {
+ extract_document_append_image(alloc, content, &page->images[i]);
+ }
+ }
+ }
+ ret = 0;
+
+ end:
+
+ return ret;
+}
+
+
+
+static int systemf(extract_alloc_t* alloc, const char* format, ...)
+/* Like system() but takes printf-style format and args. Also, if we return +ve
+we set errno to EIO. */
+{
+ int e;
+ char* command;
+ va_list va;
+ va_start(va, format);
+ e = extract_vasprintf(alloc, &command, format, va);
+ va_end(va);
+ if (e < 0) return e;
+ outf("running: %s", command);
+ e = system(command);
+ extract_free(alloc, &command);
+ if (e > 0) {
+ errno = EIO;
+ }
+ return e;
+}
+
+static int read_all(extract_alloc_t* alloc, FILE* in, char** o_out)
+/* Reads until eof into zero-terminated malloc'd buffer. */
+{
+ size_t len = 0;
+ size_t delta = 128;
+ for(;;) {
+ size_t n;
+ if (extract_realloc2(alloc, o_out, len, len + delta + 1)) {
+ extract_free(alloc, o_out);
+ return -1;
+ }
+ n = fread(*o_out + len, 1 /*size*/, delta /*nmemb*/, in);
+ len += n;
+ if (feof(in)) {
+ (*o_out)[len] = 0;
+ return 0;
+ }
+ if (ferror(in)) {
+ /* It's weird that fread() and ferror() don't set errno. */
+ errno = EIO;
+ extract_free(alloc, o_out);
+ return -1;
+ }
+ }
+}
+
+static int read_all_path(extract_alloc_t* alloc, const char* path, char** o_text)
+/* Reads entire file into zero-terminated malloc'd buffer. */
+{
+ int e = -1;
+ FILE* f = NULL;
+ f = fopen(path, "rb");
+ if (!f) goto end;
+ if (read_all(alloc, f, o_text)) goto end;
+ e = 0;
+ end:
+ if (f) fclose(f);
+ if (e) extract_free(alloc, &o_text);
+ return e;
+}
+
+static int write_all(const void* data, size_t data_size, const char* path)
+{
+ int e = -1;
+ FILE* f = fopen(path, "w");
+ if (!f) goto end;
+ if (fwrite(data, data_size, 1 /*nmemb*/, f) != 1) goto end;
+ e = 0;
+ end:
+ if (f) fclose(f);
+ return e;
+}
+
+static int extract_docx_content_insert(
+ extract_alloc_t* alloc,
+ const char* original,
+ const char* mid_begin_name,
+ const char* mid_end_name,
+ extract_astring_t* contentss,
+ int contentss_num,
+ char** o_out
+ )
+/* Creates a string consisting of <original> with all strings in <contentss>
+inserted into <original>'s <mid_begin_name>...<mid_end_name> region, and
+appends this string to *o_out. */
+{
+ int e = -1;
+ const char* mid_begin;
+ const char* mid_end;
+ extract_astring_t out;
+ extract_astring_init(&out);
+
+ mid_begin = strstr(original, mid_begin_name);
+ if (!mid_begin) {
+ outf("error: could not find '%s' in docx content",
+ mid_begin_name);
+ errno = ESRCH;
+ goto end;
+ }
+ mid_begin += strlen(mid_begin_name);
+
+ mid_end = strstr(mid_begin, mid_end_name);
+ if (!mid_end) {
+ outf("error: could not find '%s' in docx content",
+ mid_end_name);
+ errno = ESRCH;
+ goto end;
+ }
+
+ if (extract_astring_catl(alloc, &out, original, mid_begin - original)) goto end;
+ {
+ int i;
+ for (i=0; i<contentss_num; ++i) {
+ if (extract_astring_catl(alloc, &out, contentss[i].chars, contentss[i].chars_num)) goto end;
+ }
+ }
+ if (extract_astring_cat(alloc, &out, mid_end)) goto end;
+
+ *o_out = out.chars;
+ out.chars = NULL;
+ e = 0;
+
+ end:
+ if (e) {
+ extract_astring_free(alloc, &out);
+ *o_out = NULL;
+ }
+ return e;
+}
+
+static int s_find_mid(const char* text, const char* begin, const char* end, const char** o_begin, const char** o_end)
+/* Sets *o_begin to end of first occurrence of <begin> in <text>, and *o_end to
+beginning of first occurtence of <end> in <text>. */
+{
+ *o_begin = strstr(text, begin);
+ if (!*o_begin) goto fail;
+ *o_begin += strlen(begin);
+ *o_end = strstr(*o_begin, end);
+ if (!*o_end) goto fail;
+ return 0;
+ fail:
+ errno = ESRCH;
+ return -1;
+}
+
+
+int extract_docx_content_item(
+ extract_alloc_t* alloc,
+ extract_astring_t* contentss,
+ int contentss_num,
+ images_t* images,
+ const char* name,
+ const char* text,
+ char** text2
+ )
+{
+ int e = -1;
+ extract_astring_t temp;
+ extract_astring_init(&temp);
+ *text2 = NULL;
+
+ if (0)
+ {}
+ else if (!strcmp(name, "[Content_Types].xml")) {
+ /* Add information about all image types that we are going to use. */
+ const char* begin;
+ const char* end;
+ const char* insert;
+ int it;
+ extract_astring_free(alloc, &temp);
+ outf("text: %s", text);
+ if (s_find_mid(text, "<Types ", "</Types>", &begin, &end)) goto end;
+
+ insert = begin;
+ insert = strchr(insert, '>');
+ assert(insert);
+ insert += 1;
+
+ if (extract_astring_catl(alloc, &temp, text, insert - text)) goto end;
+ outf("images->imagetypes_num=%i", images->imagetypes_num);
+ for (it=0; it<images->imagetypes_num; ++it) {
+ const char* imagetype = images->imagetypes[it];
+ if (extract_astring_cat(alloc, &temp, "<Default Extension=\"")) goto end;
+ if (extract_astring_cat(alloc, &temp, imagetype)) goto end;
+ if (extract_astring_cat(alloc, &temp, "\" ContentType=\"image/")) goto end;
+ if (extract_astring_cat(alloc, &temp, imagetype)) goto end;
+ if (extract_astring_cat(alloc, &temp, "\"/>")) goto end;
+ }
+ if (extract_astring_cat(alloc, &temp, insert)) goto end;
+ *text2 = temp.chars;
+ extract_astring_init(&temp);
+ }
+ else if (!strcmp(name, "word/_rels/document.xml.rels")) {
+ /* Add relationships between image ids and image names within docx
+ archive. */
+ const char* begin;
+ const char* end;
+ int j;
+ extract_astring_free(alloc, &temp);
+ if (s_find_mid(text, "<Relationships", "</Relationships>", &begin, &end)) goto end;
+ if (extract_astring_catl(alloc, &temp, text, end - text)) goto end;
+ outf("images.images_num=%i", images->images_num);
+ for (j=0; j<images->images_num; ++j) {
+ image_t* image = &images->images[j];
+ if (extract_astring_cat(alloc, &temp, "<Relationship Id=\"")) goto end;
+ if (extract_astring_cat(alloc, &temp, image->id)) goto end;
+ if (extract_astring_cat(alloc, &temp, "\" Type=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships/image\" Target=\"media/")) goto end;
+ if (extract_astring_cat(alloc, &temp, image->name)) goto end;
+ if (extract_astring_cat(alloc, &temp, "\"/>")) goto end;
+ }
+ if (extract_astring_cat(alloc, &temp, end)) goto end;
+ *text2 = temp.chars;
+ extract_astring_init(&temp);
+ }
+ else if (!strcmp(name, "word/document.xml")) {
+ /* Insert paragraphs content. */
+ if (extract_docx_content_insert(
+ alloc,
+ text,
+ "<w:body>",
+ "</w:body>",
+ contentss,
+ contentss_num,
+ text2
+ )) goto end;
+ }
+ else {
+ *text2 = NULL;
+ }
+ e = 0;
+ end:
+ if (e) {
+ /* We might have set <text2> to new content. */
+ extract_free(alloc, text2);
+ /* We might have used <temp> as a temporary buffer. */
+ extract_astring_free(alloc, &temp);
+ }
+ extract_astring_init(&temp);
+ return e;
+}
+
+
+
+static int check_path_shell_safe(const char* path)
+/* Returns -1 with errno=EINVAL if <path> contains sequences that could make it
+unsafe in shell commands. */
+{
+ if (0
+ || strstr(path, "..")
+ || strchr(path, '\'')
+ || strchr(path, '"')
+ || strchr(path, ' ')
+ ) {
+ errno = EINVAL;
+ return -1;
+ }
+ return 0;
+}
+
+static int remove_directory(extract_alloc_t* alloc, const char* path)
+{
+ if (check_path_shell_safe(path)) {
+ outf("path_out is unsafe: %s", path);
+ return -1;
+ }
+ return systemf(alloc, "rm -r '%s'", path);
+}
+
+#ifdef _WIN32
+#include <direct.h>
+static int s_mkdir(const char* path, int mode)
+{
+ (void) mode;
+ return _mkdir(path);
+}
+#else
+static int s_mkdir(const char* path, int mode)
+{
+ return mkdir(path, mode);
+}
+#endif
+
+
+int extract_docx_write_template(
+ extract_alloc_t* alloc,
+ extract_astring_t* contentss,
+ int contentss_num,
+ images_t* images,
+ const char* path_template,
+ const char* path_out,
+ int preserve_dir
+ )
+{
+ int e = -1;
+ int i;
+ char* path_tempdir = NULL;
+ FILE* f = NULL;
+ char* path = NULL;
+ char* text = NULL;
+ char* text2 = NULL;
+
+ assert(path_out);
+ assert(path_template);
+
+ if (check_path_shell_safe(path_out)) {
+ outf("path_out is unsafe: %s", path_out);
+ goto end;
+ }
+
+ outf("images->images_num=%i", images->images_num);
+ if (extract_asprintf(alloc, &path_tempdir, "%s.dir", path_out) < 0) goto end;
+ if (systemf(alloc, "rm -r '%s' 2>/dev/null", path_tempdir) < 0) goto end;
+
+ if (s_mkdir(path_tempdir, 0777)) {
+ outf("Failed to create directory: %s", path_tempdir);
+ goto end;
+ }
+
+ outf("Unzipping template document '%s' to tempdir: %s",
+ path_template, path_tempdir);
+ e = systemf(alloc, "unzip -q -d '%s' '%s'", path_tempdir, path_template);
+ if (e) {
+ outf("Failed to unzip %s into %s",
+ path_template, path_tempdir);
+ goto end;
+ }
+
+ /* Might be nice to iterate through all items in path_tempdir, but for now
+ we look at just the items that we know extract_docx_content_item() will
+ modify. */
+
+ {
+ const char* names[] = {
+ "word/document.xml",
+ "[Content_Types].xml",
+ "word/_rels/document.xml.rels",
+ };
+ int names_num = sizeof(names) / sizeof(names[0]);
+ for (i=0; i<names_num; ++i) {
+ const char* name = names[i];
+ extract_free(alloc, &path);
+ extract_free(alloc, &text);
+ extract_free(alloc, &text2);
+ if (extract_asprintf(alloc, &path, "%s/%s", path_tempdir, name) < 0) goto end;
+ if (read_all_path(alloc, path, &text)) goto end;
+
+ if (extract_docx_content_item(
+ alloc,
+ contentss,
+ contentss_num,
+ images,
+ name,
+ text,
+ &text2
+ )) goto end;
+ {
+ const char* text3 = (text2) ? text2 : text;
+ if (write_all(text3, strlen(text3), path)) goto end;
+ }
+ }
+ }
+
+ /* Copy images into <path_tempdir>/media/. */
+ extract_free(alloc, &path);
+ if (extract_asprintf(alloc, &path, "%s/word/media", path_tempdir) < 0) goto end;
+ if (s_mkdir(path, 0777)) goto end;
+
+ for (i=0; i<images->images_num; ++i) {
+ image_t* image = &images->images[i];
+ extract_free(alloc, &path);
+ if (extract_asprintf(alloc, &path, "%s/word/media/%s", path_tempdir, image->name) < 0) goto end;
+ if (write_all(image->data, image->data_size, path)) goto end;
+ }
+
+ outf("Zipping tempdir to create %s", path_out);
+ {
+ const char* path_out_leaf = strrchr(path_out, '/');
+ if (!path_out_leaf) path_out_leaf = path_out;
+ e = systemf(alloc, "cd '%s' && zip -q -r -D '../%s' .", path_tempdir, path_out_leaf);
+ if (e) {
+ outf("Zip command failed to convert '%s' directory into output file: %s",
+ path_tempdir, path_out);
+ goto end;
+ }
+ }
+
+ if (!preserve_dir) {
+ if (remove_directory(alloc, path_tempdir)) goto end;
+ }
+
+ e = 0;
+
+ end:
+ outf("e=%i", e);
+ extract_free(alloc, &path_tempdir);
+ extract_free(alloc, &path);
+ extract_free(alloc, &text);
+ extract_free(alloc, &text2);
+ if (f) fclose(f);
+
+ if (e) {
+ outf("Failed to create %s", path_out);
+ }
+ return e;
+}
diff --git a/extract/src/docx.h b/extract/src/docx.h
new file mode 100644
index 00000000..6e26568f
--- /dev/null
+++ b/extract/src/docx.h
@@ -0,0 +1,84 @@
+#ifndef ARTIFEX_EXTRACT_DOCX_H
+#define ARTIFEX_EXTRACT_DOCX_H
+
+/* Only for internal use by extract code. */
+
+/* Things for creating docx files. */
+
+int extract_document_to_docx_content(
+ extract_alloc_t* alloc,
+ document_t* document,
+ int spacing,
+ int rotation,
+ int images,
+ extract_astring_t* content
+ );
+/* Makes *o_content point to a string containing all paragraphs in *document in
+docx XML format.
+
+This string can be passed to extract_docx_content_item() or
+extract_docx_write_template() to be inserted into a docx archive's
+word/document.xml. */
+
+
+int extract_docx_write_template(
+ extract_alloc_t* alloc,
+ extract_astring_t* contentss,
+ int contentss_num,
+ images_t* images,
+ const char* path_template,
+ const char* path_out,
+ int preserve_dir
+ );
+/* Creates a new docx file using a provided template document.
+
+Uses the 'zip' and 'unzip' commands internally.
+
+contents
+contentss_num
+ Content to be inserted into word/document.xml.
+document
+ .
+images
+ Information about images.
+path_template
+ Name of docx file to use as a template.
+path_out
+ Name of docx file to create. Must not contain single-quote, double quote,
+ space or ".." sequence - these will force EINVAL error because they could
+ make internal shell commands unsafe.
+preserve_dir
+ If true, we don't delete the temporary directory <path_out>.dir containing
+ unzipped docx content.
+*/
+
+
+int extract_docx_content_item(
+ extract_alloc_t* alloc,
+ extract_astring_t* contentss,
+ int contentss_num,
+ images_t* images,
+ const char* name,
+ const char* text,
+ char** text2
+ );
+/* Determines content of <name> in docx archive.
+
+content
+content_length
+ Text to insert if <name> is word/document.xml.
+images
+ Information about images. If <name> is word/document.xml we insert
+ relationship information mapping from image ids to image names;
+ <text> should already contain reference ids for images. If <name> is
+ [Content_Types].xml we insert information about image types.
+name
+ Path within the docx zip archive.
+text
+ Content of <name> in template docx file.
+text2
+ Out-param. Set to NULL if <text> should be used unchanged. Otherwise set to
+ point to desired text, allocated with malloc() which caller should free.
+*/
+
+#endif
diff --git a/extract/src/docx_template.c b/extract/src/docx_template.c
new file mode 100644
index 00000000..73ab5b71
--- /dev/null
+++ b/extract/src/docx_template.c
@@ -0,0 +1,910 @@
+/* THIS IS AUTO-GENERATED CODE, DO NOT EDIT. */
+
+#include "docx_template.h"
+
+const docx_template_item_t docx_template_items[] =
+{
+ {
+ "[Content_Types].xml",
+ ""
+ "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\r\n"
+ ""
+ "<Types xmlns=\"http://schemas.openxmlformats.org/package/2006/content-types\">"
+ "<Default Extension=\"rels\" ContentType=\"application/vnd.openxmlformats-package.relationships+xml\"/>"
+ "<Default Extension=\"xml\" ContentType=\"application/xml\"/>"
+ "<Override PartName=\"/word/document.xml\" ContentType=\"application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml\"/>"
+ "<Override PartName=\"/word/styles.xml\" ContentType=\"application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml\"/>"
+ "<Override PartName=\"/word/settings.xml\" ContentType=\"application/vnd.openxmlformats-officedocument.wordprocessingml.settings+xml\"/>"
+ "<Override PartName=\"/word/webSettings.xml\" ContentType=\"application/vnd.openxmlformats-officedocument.wordprocessingml.webSettings+xml\"/>"
+ "<Override PartName=\"/word/fontTable.xml\" ContentType=\"application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml\"/>"
+ "<Override PartName=\"/word/theme/theme1.xml\" ContentType=\"application/vnd.openxmlformats-officedocument.theme+xml\"/>"
+ "<Override PartName=\"/docProps/core.xml\" ContentType=\"application/vnd.openxmlformats-package.core-properties+xml\"/>"
+ "<Override PartName=\"/docProps/app.xml\" ContentType=\"application/vnd.openxmlformats-officedocument.extended-properties+xml\"/></Types>"
+ },
+
+ {
+ "_rels/.rels",
+ ""
+ "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\r\n"
+ ""
+ "<Relationships xmlns=\"http://schemas.openxmlformats.org/package/2006/relationships\">"
+ "<Relationship Id=\"rId3\" Type=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties\" Target=\"docProps/app.xml\"/>"
+ "<Relationship Id=\"rId2\" Type=\"http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties\" Target=\"docProps/core.xml\"/>"
+ "<Relationship Id=\"rId1\" Type=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument\" Target=\"word/document.xml\"/></Relationships>"
+ },
+
+ {
+ "docProps/app.xml",
+ ""
+ "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\r\n"
+ ""
+ "<Properties xmlns=\"http://schemas.openxmlformats.org/officeDocument/2006/extended-properties\" xmlns:vt=\"http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes\">"
+ "<Template>Normal.dotm</Template>"
+ "<TotalTime>3</TotalTime>"
+ "<Pages>1</Pages>"
+ "<Words>2</Words>"
+ "<Characters>18</Characters>"
+ "<Application>Microsoft Office Word</Application>"
+ "<DocSecurity>0</DocSecurity>"
+ "<Lines>1</Lines>"
+ "<Paragraphs>1</Paragraphs>"
+ "<ScaleCrop>false</ScaleCrop>"
+ "<Company></Company>"
+ "<LinksUpToDate>false</LinksUpToDate>"
+ "<CharactersWithSpaces>19</CharactersWithSpaces>"
+ "<SharedDoc>false</SharedDoc>"
+ "<HyperlinksChanged>false</HyperlinksChanged>"
+ "<AppVersion>16.0000</AppVersion></Properties>"
+ },
+
+ {
+ "docProps/core.xml",
+ ""
+ "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\r\n"
+ ""
+ "<cp:coreProperties xmlns:cp=\"http://schemas.openxmlformats.org/package/2006/metadata/core-properties\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:dcterms=\"http://purl.org/dc/terms/\" xmlns:dcmitype=\"http://purl.org/dc/dcmitype/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\">"
+ "<dc:title></dc:title>"
+ "<dc:subject></dc:subject>"
+ "<dc:creator></dc:creator>"
+ "<cp:keywords></cp:keywords>"
+ "<dc:description></dc:description>"
+ "<cp:lastModifiedBy></cp:lastModifiedBy>"
+ "<cp:revision>1</cp:revision>"
+ "<dcterms:created xsi:type=\"dcterms:W3CDTF\">2020-09-25T17:04:00Z</dcterms:created>"
+ "<dcterms:modified xsi:type=\"dcterms:W3CDTF\">2020-09-25T17:07:00Z</dcterms:modified></cp:coreProperties>"
+ },
+
+ {
+ "word/document.xml",
+ ""
+ "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\r\n"
+ ""
+ "<w:document xmlns:wpc=\"http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas\" xmlns:cx=\"http://schemas.microsoft.com/office/drawing/2014/chartex\" xmlns:cx1=\"http://schemas.microsoft.com/office/drawing/2015/9/8/chartex\" xmlns:cx2=\"http://schemas.microsoft.com/office/drawing/2015/10/21/chartex\" xmlns:cx3=\"http://schemas.microsoft.com/office/drawing/2016/5/9/chartex\" xmlns:cx4=\"http://schemas.microsoft.com/office/drawing/2016/5/10/chartex\" xmlns:cx5=\"http://schemas.microsoft.com/office/drawing/2016/5/11/chartex\" xmlns:cx6=\"http://schemas.microsoft.com/office/drawing/2016/5/12/chartex\" xmlns:cx7=\"http://schemas.microsoft.com/office/drawing/2016/5/13/chartex\" xmlns:cx8=\"http://schemas.microsoft.com/office/drawing/2016/5/14/chartex\" xmlns:mc=\"http://schemas.openxmlformats.org/markup-compatibility/2006\" xmlns:aink=\"http://schemas.microsoft.com/office/drawing/2016/ink\" xmlns:am3d=\"http://schemas.microsoft.com/office/drawing/2017/model3d\" xmlns:o=\"urn:schemas-microsoft-com:office:office\" xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\" xmlns:m=\"http://schemas.openxmlformats.org/officeDocument/2006/math\" xmlns:v=\"urn:schemas-microsoft-com:vml\" xmlns:wp14=\"http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing\" xmlns:wp=\"http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing\" xmlns:w10=\"urn:schemas-microsoft-com:office:word\" xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" xmlns:w14=\"http://schemas.microsoft.com/office/word/2010/wordml\" xmlns:w15=\"http://schemas.microsoft.com/office/word/2012/wordml\" xmlns:w16cex=\"http://schemas.microsoft.com/office/word/2018/wordml/cex\" xmlns:w16cid=\"http://schemas.microsoft.com/office/word/2016/wordml/cid\" xmlns:w16=\"http://schemas.microsoft.com/office/word/2018/wordml\" xmlns:w16se=\"http://schemas.microsoft.com/office/word/2015/wordml/symex\" xmlns:wpg=\"http://schemas.microsoft.com/office/word/2010/wordprocessingGroup\" xmlns:wpi=\"http://schemas.microsoft.com/office/word/2010/wordprocessingInk\" xmlns:wne=\"http://schemas.microsoft.com/office/word/2006/wordml\" xmlns:wps=\"http://schemas.microsoft.com/office/word/2010/wordprocessingShape\" mc:Ignorable=\"w14 w15 w16se w16cid w16 w16cex wp14\">"
+ "<w:body>"
+ "<w:p w14:paraId=\"7C58A6F1\" w14:textId=\"3E2CAE3F\" w:rsidR=\"00610D78\" w:rsidRDefault=\"007F4427\">"
+ "<w:r>"
+ "<w:t>Hello world</w:t></w:r></w:p>"
+ "<w:p w14:paraId=\"53256C58\" w14:textId=\"13022069\" w:rsidR=\"007F4427\" w:rsidRDefault=\"007F4427\">"
+ "<w:r>"
+ "<w:rPr>"
+ "<w:noProof/></w:rPr>"
+ "<mc:AlternateContent>"
+ "<mc:Choice Requires=\"wps\">"
+ "<w:drawing>"
+ "<wp:anchor distT=\"0\" distB=\"0\" distL=\"114300\" distR=\"114300\" simplePos=\"0\" relativeHeight=\"251659264\" behindDoc=\"0\" locked=\"0\" layoutInCell=\"1\" allowOverlap=\"1\" wp14:anchorId=\"53A210D1\" wp14:editId=\"2B7E8016\">"
+ "<wp:simplePos x=\"0\" y=\"0\"/>"
+ "<wp:positionH relativeFrom=\"column\">"
+ "<wp:posOffset>904875</wp:posOffset></wp:positionH>"
+ "<wp:positionV relativeFrom=\"paragraph\">"
+ "<wp:posOffset>619125</wp:posOffset></wp:positionV>"
+ "<wp:extent cx=\"3228975\" cy=\"2286000\"/>"
+ "<wp:effectExtent l=\"381000\" t=\"723900\" r=\"371475\" b=\"723900\"/>"
+ "<wp:wrapNone/>"
+ "<wp:docPr id=\"1\" name=\"Text Box 1\"/>"
+ "<wp:cNvGraphicFramePr/>"
+ "<a:graphic xmlns:a=\"http://schemas.openxmlformats.org/drawingml/2006/main\">"
+ "<a:graphicData uri=\"http://schemas.microsoft.com/office/word/2010/wordprocessingShape\">"
+ "<wps:wsp>"
+ "<wps:cNvSpPr txBox=\"1\"/>"
+ "<wps:spPr>"
+ "<a:xfrm rot=\"19547867\">"
+ "<a:off x=\"0\" y=\"0\"/>"
+ "<a:ext cx=\"3228975\" cy=\"2286000\"/></a:xfrm>"
+ "<a:prstGeom prst=\"rect\">"
+ "<a:avLst/></a:prstGeom>"
+ "<a:solidFill>"
+ "<a:schemeClr val=\"lt1\"/></a:solidFill>"
+ "<a:ln w=\"6350\">"
+ "<a:solidFill>"
+ "<a:prstClr val=\"black\"/></a:solidFill></a:ln></wps:spPr>"
+ "<wps:txbx>"
+ "<w:txbxContent>"
+ "<w:p w14:paraId=\"31597E69\" w14:textId=\"2903B1F1\" w:rsidR=\"007F4427\" w:rsidRDefault=\"007F4427\">"
+ "<w:r>"
+ "<w:t>Hello. Qwerty. World</w:t></w:r></w:p>"
+ "<w:p w14:paraId=\"0BD8A985\" w14:textId=\"1BFB8248\" w:rsidR=\"007F4427\" w:rsidRDefault=\"007F4427\">"
+ "<w:proofErr w:type=\"spellStart\"/>"
+ "<w:r>"
+ "<w:t>mupdf</w:t></w:r>"
+ "<w:proofErr w:type=\"spellEnd\"/></w:p></w:txbxContent></wps:txbx>"
+ "<wps:bodyPr rot=\"0\" spcFirstLastPara=\"0\" vertOverflow=\"overflow\" horzOverflow=\"overflow\" vert=\"horz\" wrap=\"square\" lIns=\"91440\" tIns=\"45720\" rIns=\"91440\" bIns=\"45720\" numCol=\"1\" spcCol=\"0\" rtlCol=\"0\" fromWordArt=\"0\" anchor=\"t\" anchorCtr=\"0\" forceAA=\"0\" compatLnSpc=\"1\">"
+ "<a:prstTxWarp prst=\"textNoShape\">"
+ "<a:avLst/></a:prstTxWarp>"
+ "<a:noAutofit/></wps:bodyPr></wps:wsp></a:graphicData></a:graphic></wp:anchor></w:drawing></mc:Choice>"
+ "<mc:Fallback>"
+ "<w:pict>"
+ "<v:shapetype w14:anchorId=\"53A210D1\" id=\"_x0000_t202\" coordsize=\"21600,21600\" o:spt=\"202\" path=\"m,l,21600r21600,l21600,xe\">"
+ "<v:stroke joinstyle=\"miter\"/>"
+ "<v:path gradientshapeok=\"t\" o:connecttype=\"rect\"/></v:shapetype>"
+ "<v:shape id=\"Text Box 1\" o:spid=\"_x0000_s1026\" type=\"#_x0000_t202\" style=\"position:absolute;margin-left:71.25pt;margin-top:48.75pt;width:254.25pt;height:180pt;rotation:-2241476fd;z-index:251659264;visibility:visible;mso-wrap-style:square;mso-wrap-distance-left:9pt;mso-wrap-distance-top:0;mso-wrap-distance-right:9pt;mso-wrap-distance-bottom:0;mso-position-horizontal:absolute;mso-position-horizontal-relative:text;mso-position-vertical:absolute;mso-position-vertical-relative:text;v-text-anchor:top\" o:gfxdata=\"UEsDBBQABgAIAAAAIQC2gziS/gAAAOEBAAATAAAAW0NvbnRlbnRfVHlwZXNdLnhtbJSRQU7DMBBF&#xA;90jcwfIWJU67QAgl6YK0S0CoHGBkTxKLZGx5TGhvj5O2G0SRWNoz/78nu9wcxkFMGNg6quQqL6RA&#xA;0s5Y6ir5vt9lD1JwBDIwOMJKHpHlpr69KfdHjyxSmriSfYz+USnWPY7AufNIadK6MEJMx9ApD/oD&#xA;OlTrorhX2lFEilmcO2RdNtjC5xDF9pCuTyYBB5bi6bQ4syoJ3g9WQ0ymaiLzg5KdCXlKLjvcW893&#xA;SUOqXwnz5DrgnHtJTxOsQfEKIT7DmDSUCaxw7Rqn8787ZsmRM9e2VmPeBN4uqYvTtW7jvijg9N/y&#xA;JsXecLq0q+WD6m8AAAD//wMAUEsDBBQABgAIAAAAIQA4/SH/1gAAAJQBAAALAAAAX3JlbHMvLnJl&#xA;bHOkkMFqwzAMhu+DvYPRfXGawxijTi+j0GvpHsDYimMaW0Yy2fr2M4PBMnrbUb/Q94l/f/hMi1qR&#xA;JVI2sOt6UJgd+ZiDgffL8ekFlFSbvV0oo4EbChzGx4f9GRdb25HMsYhqlCwG5lrLq9biZkxWOiqY&#xA;22YiTra2kYMu1l1tQD30/bPm3wwYN0x18gb45AdQl1tp5j/sFB2T0FQ7R0nTNEV3j6o9feQzro1i&#xA;OWA14Fm+Q8a1a8+Bvu/d/dMb2JY5uiPbhG/ktn4cqGU/er3pcvwCAAD//wMAUEsDBBQABgAIAAAA&#xA;IQDQg5pQVgIAALEEAAAOAAAAZHJzL2Uyb0RvYy54bWysVE1v2zAMvQ/YfxB0X+2k+WiDOEXWosOA&#xA;oi3QDj0rstwYk0VNUmJ3v35PipMl3U7DLgJFPj+Rj6TnV12j2VY5X5Mp+OAs50wZSWVtXgv+7fn2&#xA;0wVnPghTCk1GFfxNeX61+Phh3tqZGtKadKkcA4nxs9YWfB2CnWWZl2vVCH9GVhkEK3KNCLi616x0&#xA;ogV7o7Nhnk+yllxpHUnlPbw3uyBfJP6qUjI8VJVXgemCI7eQTpfOVTyzxVzMXp2w61r2aYh/yKIR&#xA;tcGjB6obEQTbuPoPqqaWjjxV4UxSk1FV1VKlGlDNIH9XzdNaWJVqgTjeHmTy/49W3m8fHatL9I4z&#xA;Ixq06Fl1gX2mjg2iOq31M4CeLGChgzsie7+HMxbdVa5hjiDu4HI8ml5MpkkLVMcAh+xvB6kjt4Tz&#xA;fDi8uJyOOZOIwZ7keWpGtmOLrNb58EVRw6JRcIdeJlqxvfMBGQC6h0S4J12Xt7XW6RLnR11rx7YC&#xA;ndch5YwvTlDasLbgk/NxnohPYpH68P1KC/k9Vn3KgJs2cEaNdlpEK3SrrhdoReUbdEvSQAZv5W0N&#xA;3jvhw6NwGDQ4sTzhAUelCclQb3G2Jvfzb/6IR/8R5azF4Bbc/9gIpzjTXw0m43IwGsVJT5fReDrE&#xA;xR1HVscRs2muCQqh+8gumREf9N6sHDUv2LFlfBUhYSTeLnjYm9dht07YUamWywTCbFsR7syTlZF6&#xA;383n7kU42/czYBTuaT/iYvaurTts/NLQchOoqlPPo8A7VXvdsRepLf0Ox8U7vifU7z/N4hcAAAD/&#xA;/wMAUEsDBBQABgAIAAAAIQBh17L63wAAAAoBAAAPAAAAZHJzL2Rvd25yZXYueG1sTI9BT4NAEIXv&#xA;Jv6HzZh4s0ubgpayNIboSW3Syg9Y2BGI7CyyS0v99Y4nPU3ezMub72W72fbihKPvHClYLiIQSLUz&#xA;HTUKyvfnuwcQPmgyuneECi7oYZdfX2U6Ne5MBzwdQyM4hHyqFbQhDKmUvm7Rar9wAxLfPtxodWA5&#xA;NtKM+szhtperKEqk1R3xh1YPWLRYfx4nq8APVfz9VQxPb+WUNC+vZbGPDhelbm/mxy2IgHP4M8Mv&#xA;PqNDzkyVm8h40bNer2K2Ktjc82RDEi+5XKVgHfNG5pn8XyH/AQAA//8DAFBLAQItABQABgAIAAAA&#xA;IQC2gziS/gAAAOEBAAATAAAAAAAAAAAAAAAAAAAAAABbQ29udGVudF9UeXBlc10ueG1sUEsBAi0A&#xA;FAAGAAgAAAAhADj9If/WAAAAlAEAAAsAAAAAAAAAAAAAAAAALwEAAF9yZWxzLy5yZWxzUEsBAi0A&#xA;FAAGAAgAAAAhANCDmlBWAgAAsQQAAA4AAAAAAAAAAAAAAAAALgIAAGRycy9lMm9Eb2MueG1sUEsB&#xA;Ai0AFAAGAAgAAAAhAGHXsvrfAAAACgEAAA8AAAAAAAAAAAAAAAAAsAQAAGRycy9kb3ducmV2Lnht&#xA;bFBLBQYAAAAABAAEAPMAAAC8BQAAAAA=&#xA;\" fillcolor=\"white [3201]\" strokeweight=\".5pt\">"
+ "<v:textbox>"
+ "<w:txbxContent>"
+ "<w:p w14:paraId=\"31597E69\" w14:textId=\"2903B1F1\" w:rsidR=\"007F4427\" w:rsidRDefault=\"007F4427\">"
+ "<w:r>"
+ "<w:t>Hello. Qwerty. World</w:t></w:r></w:p>"
+ "<w:p w14:paraId=\"0BD8A985\" w14:textId=\"1BFB8248\" w:rsidR=\"007F4427\" w:rsidRDefault=\"007F4427\">"
+ "<w:proofErr w:type=\"spellStart\"/>"
+ "<w:r>"
+ "<w:t>mupdf</w:t></w:r>"
+ "<w:proofErr w:type=\"spellEnd\"/></w:p></w:txbxContent></v:textbox></v:shape></w:pict></mc:Fallback></mc:AlternateContent></w:r>"
+ "<w:r>"
+ "<w:t>qwerty</w:t></w:r></w:p>"
+ "<w:sectPr w:rsidR=\"007F4427\">"
+ "<w:pgSz w:w=\"11906\" w:h=\"16838\"/>"
+ "<w:pgMar w:top=\"1440\" w:right=\"1440\" w:bottom=\"1440\" w:left=\"1440\" w:header=\"708\" w:footer=\"708\" w:gutter=\"0\"/>"
+ "<w:cols w:space=\"708\"/>"
+ "<w:docGrid w:linePitch=\"360\"/></w:sectPr></w:body></w:document>"
+ },
+
+ {
+ "word/fontTable.xml",
+ ""
+ "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\r\n"
+ ""
+ "<w:fonts xmlns:mc=\"http://schemas.openxmlformats.org/markup-compatibility/2006\" xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\" xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" xmlns:w14=\"http://schemas.microsoft.com/office/word/2010/wordml\" xmlns:w15=\"http://schemas.microsoft.com/office/word/2012/wordml\" xmlns:w16cex=\"http://schemas.microsoft.com/office/word/2018/wordml/cex\" xmlns:w16cid=\"http://schemas.microsoft.com/office/word/2016/wordml/cid\" xmlns:w16=\"http://schemas.microsoft.com/office/word/2018/wordml\" xmlns:w16se=\"http://schemas.microsoft.com/office/word/2015/wordml/symex\" mc:Ignorable=\"w14 w15 w16se w16cid w16 w16cex\">"
+ "<w:font w:name=\"Calibri\">"
+ "<w:panose1 w:val=\"020F0502020204030204\"/>"
+ "<w:charset w:val=\"00\"/>"
+ "<w:family w:val=\"swiss\"/>"
+ "<w:pitch w:val=\"variable\"/>"
+ "<w:sig w:usb0=\"E4002EFF\" w:usb1=\"C000247B\" w:usb2=\"00000009\" w:usb3=\"00000000\" w:csb0=\"000001FF\" w:csb1=\"00000000\"/></w:font>"
+ "<w:font w:name=\"Times New Roman\">"
+ "<w:panose1 w:val=\"02020603050405020304\"/>"
+ "<w:charset w:val=\"00\"/>"
+ "<w:family w:val=\"roman\"/>"
+ "<w:pitch w:val=\"variable\"/>"
+ "<w:sig w:usb0=\"E0002EFF\" w:usb1=\"C000785B\" w:usb2=\"00000009\" w:usb3=\"00000000\" w:csb0=\"000001FF\" w:csb1=\"00000000\"/></w:font>"
+ "<w:font w:name=\"Calibri Light\">"
+ "<w:panose1 w:val=\"020F0302020204030204\"/>"
+ "<w:charset w:val=\"00\"/>"
+ "<w:family w:val=\"swiss\"/>"
+ "<w:pitch w:val=\"variable\"/>"
+ "<w:sig w:usb0=\"E4002EFF\" w:usb1=\"C000247B\" w:usb2=\"00000009\" w:usb3=\"00000000\" w:csb0=\"000001FF\" w:csb1=\"00000000\"/></w:font></w:fonts>"
+ },
+
+ {
+ "word/settings.xml",
+ ""
+ "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\r\n"
+ ""
+ "<w:settings xmlns:mc=\"http://schemas.openxmlformats.org/markup-compatibility/2006\" xmlns:o=\"urn:schemas-microsoft-com:office:office\" xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\" xmlns:m=\"http://schemas.openxmlformats.org/officeDocument/2006/math\" xmlns:v=\"urn:schemas-microsoft-com:vml\" xmlns:w10=\"urn:schemas-microsoft-com:office:word\" xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" xmlns:w14=\"http://schemas.microsoft.com/office/word/2010/wordml\" xmlns:w15=\"http://schemas.microsoft.com/office/word/2012/wordml\" xmlns:w16cex=\"http://schemas.microsoft.com/office/word/2018/wordml/cex\" xmlns:w16cid=\"http://schemas.microsoft.com/office/word/2016/wordml/cid\" xmlns:w16=\"http://schemas.microsoft.com/office/word/2018/wordml\" xmlns:w16se=\"http://schemas.microsoft.com/office/word/2015/wordml/symex\" xmlns:sl=\"http://schemas.openxmlformats.org/schemaLibrary/2006/main\" mc:Ignorable=\"w14 w15 w16se w16cid w16 w16cex\">"
+ "<w:zoom w:percent=\"100\"/>"
+ "<w:proofState w:spelling=\"clean\" w:grammar=\"clean\"/>"
+ "<w:defaultTabStop w:val=\"720\"/>"
+ "<w:characterSpacingControl w:val=\"doNotCompress\"/>"
+ "<w:compat>"
+ "<w:compatSetting w:name=\"compatibilityMode\" w:uri=\"http://schemas.microsoft.com/office/word\" w:val=\"15\"/>"
+ "<w:compatSetting w:name=\"overrideTableStyleFontSizeAndJustification\" w:uri=\"http://schemas.microsoft.com/office/word\" w:val=\"1\"/>"
+ "<w:compatSetting w:name=\"enableOpenTypeFeatures\" w:uri=\"http://schemas.microsoft.com/office/word\" w:val=\"1\"/>"
+ "<w:compatSetting w:name=\"doNotFlipMirrorIndents\" w:uri=\"http://schemas.microsoft.com/office/word\" w:val=\"1\"/>"
+ "<w:compatSetting w:name=\"differentiateMultirowTableHeaders\" w:uri=\"http://schemas.microsoft.com/office/word\" w:val=\"1\"/>"
+ "<w:compatSetting w:name=\"useWord2013TrackBottomHyphenation\" w:uri=\"http://schemas.microsoft.com/office/word\" w:val=\"0\"/></w:compat>"
+ "<w:rsids>"
+ "<w:rsidRoot w:val=\"007F4427\"/>"
+ "<w:rsid w:val=\"00255448\"/>"
+ "<w:rsid w:val=\"007F4427\"/></w:rsids>"
+ "<m:mathPr>"
+ "<m:mathFont m:val=\"Cambria Math\"/>"
+ "<m:brkBin m:val=\"before\"/>"
+ "<m:brkBinSub m:val=\"--\"/>"
+ "<m:smallFrac m:val=\"0\"/>"
+ "<m:dispDef/>"
+ "<m:lMargin m:val=\"0\"/>"
+ "<m:rMargin m:val=\"0\"/>"
+ "<m:defJc m:val=\"centerGroup\"/>"
+ "<m:wrapIndent m:val=\"1440\"/>"
+ "<m:intLim m:val=\"subSup\"/>"
+ "<m:naryLim m:val=\"undOvr\"/></m:mathPr>"
+ "<w:themeFontLang w:val=\"en-GB\"/>"
+ "<w:clrSchemeMapping w:bg1=\"light1\" w:t1=\"dark1\" w:bg2=\"light2\" w:t2=\"dark2\" w:accent1=\"accent1\" w:accent2=\"accent2\" w:accent3=\"accent3\" w:accent4=\"accent4\" w:accent5=\"accent5\" w:accent6=\"accent6\" w:hyperlink=\"hyperlink\" w:followedHyperlink=\"followedHyperlink\"/>"
+ "<w:shapeDefaults>"
+ "<o:shapedefaults v:ext=\"edit\" spidmax=\"1026\"/>"
+ "<o:shapelayout v:ext=\"edit\">"
+ "<o:idmap v:ext=\"edit\" data=\"1\"/></o:shapelayout></w:shapeDefaults>"
+ "<w:decimalSymbol w:val=\".\"/>"
+ "<w:listSeparator w:val=\",\"/>"
+ "<w14:docId w14:val=\"32E52EF8\"/>"
+ "<w15:chartTrackingRefBased/>"
+ "<w15:docId w15:val=\"{A10F59F7-497D-44D4-A338-47719734E7A0}\"/></w:settings>"
+ },
+
+ {
+ "word/styles.xml",
+ ""
+ "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\r\n"
+ ""
+ "<w:styles xmlns:mc=\"http://schemas.openxmlformats.org/markup-compatibility/2006\" xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\" xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" xmlns:w14=\"http://schemas.microsoft.com/office/word/2010/wordml\" xmlns:w15=\"http://schemas.microsoft.com/office/word/2012/wordml\" xmlns:w16cex=\"http://schemas.microsoft.com/office/word/2018/wordml/cex\" xmlns:w16cid=\"http://schemas.microsoft.com/office/word/2016/wordml/cid\" xmlns:w16=\"http://schemas.microsoft.com/office/word/2018/wordml\" xmlns:w16se=\"http://schemas.microsoft.com/office/word/2015/wordml/symex\" mc:Ignorable=\"w14 w15 w16se w16cid w16 w16cex\">"
+ "<w:docDefaults>"
+ "<w:rPrDefault>"
+ "<w:rPr>"
+ "<w:rFonts w:asciiTheme=\"minorHAnsi\" w:eastAsiaTheme=\"minorHAnsi\" w:hAnsiTheme=\"minorHAnsi\" w:cstheme=\"minorBidi\"/>"
+ "<w:sz w:val=\"22\"/>"
+ "<w:szCs w:val=\"22\"/>"
+ "<w:lang w:val=\"en-GB\" w:eastAsia=\"en-US\" w:bidi=\"ar-SA\"/></w:rPr></w:rPrDefault>"
+ "<w:pPrDefault>"
+ "<w:pPr>"
+ "<w:spacing w:after=\"160\" w:line=\"259\" w:lineRule=\"auto\"/></w:pPr></w:pPrDefault></w:docDefaults>"
+ "<w:latentStyles w:defLockedState=\"0\" w:defUIPriority=\"99\" w:defSemiHidden=\"0\" w:defUnhideWhenUsed=\"0\" w:defQFormat=\"0\" w:count=\"376\">"
+ "<w:lsdException w:name=\"Normal\" w:uiPriority=\"0\" w:qFormat=\"1\"/>"
+ "<w:lsdException w:name=\"heading 1\" w:uiPriority=\"9\" w:qFormat=\"1\"/>"
+ "<w:lsdException w:name=\"heading 2\" w:semiHidden=\"1\" w:uiPriority=\"9\" w:unhideWhenUsed=\"1\" w:qFormat=\"1\"/>"
+ "<w:lsdException w:name=\"heading 3\" w:semiHidden=\"1\" w:uiPriority=\"9\" w:unhideWhenUsed=\"1\" w:qFormat=\"1\"/>"
+ "<w:lsdException w:name=\"heading 4\" w:semiHidden=\"1\" w:uiPriority=\"9\" w:unhideWhenUsed=\"1\" w:qFormat=\"1\"/>"
+ "<w:lsdException w:name=\"heading 5\" w:semiHidden=\"1\" w:uiPriority=\"9\" w:unhideWhenUsed=\"1\" w:qFormat=\"1\"/>"
+ "<w:lsdException w:name=\"heading 6\" w:semiHidden=\"1\" w:uiPriority=\"9\" w:unhideWhenUsed=\"1\" w:qFormat=\"1\"/>"
+ "<w:lsdException w:name=\"heading 7\" w:semiHidden=\"1\" w:uiPriority=\"9\" w:unhideWhenUsed=\"1\" w:qFormat=\"1\"/>"
+ "<w:lsdException w:name=\"heading 8\" w:semiHidden=\"1\" w:uiPriority=\"9\" w:unhideWhenUsed=\"1\" w:qFormat=\"1\"/>"
+ "<w:lsdException w:name=\"heading 9\" w:semiHidden=\"1\" w:uiPriority=\"9\" w:unhideWhenUsed=\"1\" w:qFormat=\"1\"/>"
+ "<w:lsdException w:name=\"index 1\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"index 2\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"index 3\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"index 4\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"index 5\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"index 6\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"index 7\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"index 8\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"index 9\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"toc 1\" w:semiHidden=\"1\" w:uiPriority=\"39\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"toc 2\" w:semiHidden=\"1\" w:uiPriority=\"39\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"toc 3\" w:semiHidden=\"1\" w:uiPriority=\"39\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"toc 4\" w:semiHidden=\"1\" w:uiPriority=\"39\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"toc 5\" w:semiHidden=\"1\" w:uiPriority=\"39\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"toc 6\" w:semiHidden=\"1\" w:uiPriority=\"39\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"toc 7\" w:semiHidden=\"1\" w:uiPriority=\"39\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"toc 8\" w:semiHidden=\"1\" w:uiPriority=\"39\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"toc 9\" w:semiHidden=\"1\" w:uiPriority=\"39\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Normal Indent\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"footnote text\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"annotation text\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"header\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"footer\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"index heading\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"caption\" w:semiHidden=\"1\" w:uiPriority=\"35\" w:unhideWhenUsed=\"1\" w:qFormat=\"1\"/>"
+ "<w:lsdException w:name=\"table of figures\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"envelope address\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"envelope return\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"footnote reference\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"annotation reference\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"line number\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"page number\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"endnote reference\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"endnote text\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"table of authorities\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"macro\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"toa heading\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"List\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"List Bullet\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"List Number\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"List 2\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"List 3\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"List 4\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"List 5\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"List Bullet 2\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"List Bullet 3\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"List Bullet 4\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"List Bullet 5\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"List Number 2\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"List Number 3\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"List Number 4\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"List Number 5\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Title\" w:uiPriority=\"10\" w:qFormat=\"1\"/>"
+ "<w:lsdException w:name=\"Closing\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Signature\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Default Paragraph Font\" w:semiHidden=\"1\" w:uiPriority=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Body Text\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Body Text Indent\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"List Continue\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"List Continue 2\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"List Continue 3\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"List Continue 4\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"List Continue 5\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Message Header\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Subtitle\" w:uiPriority=\"11\" w:qFormat=\"1\"/>"
+ "<w:lsdException w:name=\"Salutation\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Date\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Body Text First Indent\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Body Text First Indent 2\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Note Heading\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Body Text 2\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Body Text 3\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Body Text Indent 2\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Body Text Indent 3\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Block Text\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Hyperlink\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"FollowedHyperlink\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Strong\" w:uiPriority=\"22\" w:qFormat=\"1\"/>"
+ "<w:lsdException w:name=\"Emphasis\" w:uiPriority=\"20\" w:qFormat=\"1\"/>"
+ "<w:lsdException w:name=\"Document Map\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Plain Text\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"E-mail Signature\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"HTML Top of Form\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"HTML Bottom of Form\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Normal (Web)\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"HTML Acronym\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"HTML Address\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"HTML Cite\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"HTML Code\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"HTML Definition\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"HTML Keyboard\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"HTML Preformatted\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"HTML Sample\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"HTML Typewriter\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"HTML Variable\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Normal Table\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"annotation subject\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"No List\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Outline List 1\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Outline List 2\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Outline List 3\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table Simple 1\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table Simple 2\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table Simple 3\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table Classic 1\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table Classic 2\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table Classic 3\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table Classic 4\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table Colorful 1\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table Colorful 2\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table Colorful 3\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table Columns 1\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table Columns 2\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table Columns 3\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table Columns 4\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table Columns 5\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table Grid 1\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table Grid 2\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table Grid 3\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table Grid 4\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table Grid 5\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table Grid 6\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table Grid 7\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table Grid 8\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table List 1\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table List 2\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table List 3\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table List 4\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table List 5\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table List 6\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table List 7\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table List 8\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table 3D effects 1\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table 3D effects 2\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table 3D effects 3\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table Contemporary\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table Elegant\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table Professional\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table Subtle 1\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table Subtle 2\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table Web 1\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table Web 2\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table Web 3\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Balloon Text\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Table Grid\" w:uiPriority=\"39\"/>"
+ "<w:lsdException w:name=\"Table Theme\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Placeholder Text\" w:semiHidden=\"1\"/>"
+ "<w:lsdException w:name=\"No Spacing\" w:uiPriority=\"1\" w:qFormat=\"1\"/>"
+ "<w:lsdException w:name=\"Light Shading\" w:uiPriority=\"60\"/>"
+ "<w:lsdException w:name=\"Light List\" w:uiPriority=\"61\"/>"
+ "<w:lsdException w:name=\"Light Grid\" w:uiPriority=\"62\"/>"
+ "<w:lsdException w:name=\"Medium Shading 1\" w:uiPriority=\"63\"/>"
+ "<w:lsdException w:name=\"Medium Shading 2\" w:uiPriority=\"64\"/>"
+ "<w:lsdException w:name=\"Medium List 1\" w:uiPriority=\"65\"/>"
+ "<w:lsdException w:name=\"Medium List 2\" w:uiPriority=\"66\"/>"
+ "<w:lsdException w:name=\"Medium Grid 1\" w:uiPriority=\"67\"/>"
+ "<w:lsdException w:name=\"Medium Grid 2\" w:uiPriority=\"68\"/>"
+ "<w:lsdException w:name=\"Medium Grid 3\" w:uiPriority=\"69\"/>"
+ "<w:lsdException w:name=\"Dark List\" w:uiPriority=\"70\"/>"
+ "<w:lsdException w:name=\"Colorful Shading\" w:uiPriority=\"71\"/>"
+ "<w:lsdException w:name=\"Colorful List\" w:uiPriority=\"72\"/>"
+ "<w:lsdException w:name=\"Colorful Grid\" w:uiPriority=\"73\"/>"
+ "<w:lsdException w:name=\"Light Shading Accent 1\" w:uiPriority=\"60\"/>"
+ "<w:lsdException w:name=\"Light List Accent 1\" w:uiPriority=\"61\"/>"
+ "<w:lsdException w:name=\"Light Grid Accent 1\" w:uiPriority=\"62\"/>"
+ "<w:lsdException w:name=\"Medium Shading 1 Accent 1\" w:uiPriority=\"63\"/>"
+ "<w:lsdException w:name=\"Medium Shading 2 Accent 1\" w:uiPriority=\"64\"/>"
+ "<w:lsdException w:name=\"Medium List 1 Accent 1\" w:uiPriority=\"65\"/>"
+ "<w:lsdException w:name=\"Revision\" w:semiHidden=\"1\"/>"
+ "<w:lsdException w:name=\"List Paragraph\" w:uiPriority=\"34\" w:qFormat=\"1\"/>"
+ "<w:lsdException w:name=\"Quote\" w:uiPriority=\"29\" w:qFormat=\"1\"/>"
+ "<w:lsdException w:name=\"Intense Quote\" w:uiPriority=\"30\" w:qFormat=\"1\"/>"
+ "<w:lsdException w:name=\"Medium List 2 Accent 1\" w:uiPriority=\"66\"/>"
+ "<w:lsdException w:name=\"Medium Grid 1 Accent 1\" w:uiPriority=\"67\"/>"
+ "<w:lsdException w:name=\"Medium Grid 2 Accent 1\" w:uiPriority=\"68\"/>"
+ "<w:lsdException w:name=\"Medium Grid 3 Accent 1\" w:uiPriority=\"69\"/>"
+ "<w:lsdException w:name=\"Dark List Accent 1\" w:uiPriority=\"70\"/>"
+ "<w:lsdException w:name=\"Colorful Shading Accent 1\" w:uiPriority=\"71\"/>"
+ "<w:lsdException w:name=\"Colorful List Accent 1\" w:uiPriority=\"72\"/>"
+ "<w:lsdException w:name=\"Colorful Grid Accent 1\" w:uiPriority=\"73\"/>"
+ "<w:lsdException w:name=\"Light Shading Accent 2\" w:uiPriority=\"60\"/>"
+ "<w:lsdException w:name=\"Light List Accent 2\" w:uiPriority=\"61\"/>"
+ "<w:lsdException w:name=\"Light Grid Accent 2\" w:uiPriority=\"62\"/>"
+ "<w:lsdException w:name=\"Medium Shading 1 Accent 2\" w:uiPriority=\"63\"/>"
+ "<w:lsdException w:name=\"Medium Shading 2 Accent 2\" w:uiPriority=\"64\"/>"
+ "<w:lsdException w:name=\"Medium List 1 Accent 2\" w:uiPriority=\"65\"/>"
+ "<w:lsdException w:name=\"Medium List 2 Accent 2\" w:uiPriority=\"66\"/>"
+ "<w:lsdException w:name=\"Medium Grid 1 Accent 2\" w:uiPriority=\"67\"/>"
+ "<w:lsdException w:name=\"Medium Grid 2 Accent 2\" w:uiPriority=\"68\"/>"
+ "<w:lsdException w:name=\"Medium Grid 3 Accent 2\" w:uiPriority=\"69\"/>"
+ "<w:lsdException w:name=\"Dark List Accent 2\" w:uiPriority=\"70\"/>"
+ "<w:lsdException w:name=\"Colorful Shading Accent 2\" w:uiPriority=\"71\"/>"
+ "<w:lsdException w:name=\"Colorful List Accent 2\" w:uiPriority=\"72\"/>"
+ "<w:lsdException w:name=\"Colorful Grid Accent 2\" w:uiPriority=\"73\"/>"
+ "<w:lsdException w:name=\"Light Shading Accent 3\" w:uiPriority=\"60\"/>"
+ "<w:lsdException w:name=\"Light List Accent 3\" w:uiPriority=\"61\"/>"
+ "<w:lsdException w:name=\"Light Grid Accent 3\" w:uiPriority=\"62\"/>"
+ "<w:lsdException w:name=\"Medium Shading 1 Accent 3\" w:uiPriority=\"63\"/>"
+ "<w:lsdException w:name=\"Medium Shading 2 Accent 3\" w:uiPriority=\"64\"/>"
+ "<w:lsdException w:name=\"Medium List 1 Accent 3\" w:uiPriority=\"65\"/>"
+ "<w:lsdException w:name=\"Medium List 2 Accent 3\" w:uiPriority=\"66\"/>"
+ "<w:lsdException w:name=\"Medium Grid 1 Accent 3\" w:uiPriority=\"67\"/>"
+ "<w:lsdException w:name=\"Medium Grid 2 Accent 3\" w:uiPriority=\"68\"/>"
+ "<w:lsdException w:name=\"Medium Grid 3 Accent 3\" w:uiPriority=\"69\"/>"
+ "<w:lsdException w:name=\"Dark List Accent 3\" w:uiPriority=\"70\"/>"
+ "<w:lsdException w:name=\"Colorful Shading Accent 3\" w:uiPriority=\"71\"/>"
+ "<w:lsdException w:name=\"Colorful List Accent 3\" w:uiPriority=\"72\"/>"
+ "<w:lsdException w:name=\"Colorful Grid Accent 3\" w:uiPriority=\"73\"/>"
+ "<w:lsdException w:name=\"Light Shading Accent 4\" w:uiPriority=\"60\"/>"
+ "<w:lsdException w:name=\"Light List Accent 4\" w:uiPriority=\"61\"/>"
+ "<w:lsdException w:name=\"Light Grid Accent 4\" w:uiPriority=\"62\"/>"
+ "<w:lsdException w:name=\"Medium Shading 1 Accent 4\" w:uiPriority=\"63\"/>"
+ "<w:lsdException w:name=\"Medium Shading 2 Accent 4\" w:uiPriority=\"64\"/>"
+ "<w:lsdException w:name=\"Medium List 1 Accent 4\" w:uiPriority=\"65\"/>"
+ "<w:lsdException w:name=\"Medium List 2 Accent 4\" w:uiPriority=\"66\"/>"
+ "<w:lsdException w:name=\"Medium Grid 1 Accent 4\" w:uiPriority=\"67\"/>"
+ "<w:lsdException w:name=\"Medium Grid 2 Accent 4\" w:uiPriority=\"68\"/>"
+ "<w:lsdException w:name=\"Medium Grid 3 Accent 4\" w:uiPriority=\"69\"/>"
+ "<w:lsdException w:name=\"Dark List Accent 4\" w:uiPriority=\"70\"/>"
+ "<w:lsdException w:name=\"Colorful Shading Accent 4\" w:uiPriority=\"71\"/>"
+ "<w:lsdException w:name=\"Colorful List Accent 4\" w:uiPriority=\"72\"/>"
+ "<w:lsdException w:name=\"Colorful Grid Accent 4\" w:uiPriority=\"73\"/>"
+ "<w:lsdException w:name=\"Light Shading Accent 5\" w:uiPriority=\"60\"/>"
+ "<w:lsdException w:name=\"Light List Accent 5\" w:uiPriority=\"61\"/>"
+ "<w:lsdException w:name=\"Light Grid Accent 5\" w:uiPriority=\"62\"/>"
+ "<w:lsdException w:name=\"Medium Shading 1 Accent 5\" w:uiPriority=\"63\"/>"
+ "<w:lsdException w:name=\"Medium Shading 2 Accent 5\" w:uiPriority=\"64\"/>"
+ "<w:lsdException w:name=\"Medium List 1 Accent 5\" w:uiPriority=\"65\"/>"
+ "<w:lsdException w:name=\"Medium List 2 Accent 5\" w:uiPriority=\"66\"/>"
+ "<w:lsdException w:name=\"Medium Grid 1 Accent 5\" w:uiPriority=\"67\"/>"
+ "<w:lsdException w:name=\"Medium Grid 2 Accent 5\" w:uiPriority=\"68\"/>"
+ "<w:lsdException w:name=\"Medium Grid 3 Accent 5\" w:uiPriority=\"69\"/>"
+ "<w:lsdException w:name=\"Dark List Accent 5\" w:uiPriority=\"70\"/>"
+ "<w:lsdException w:name=\"Colorful Shading Accent 5\" w:uiPriority=\"71\"/>"
+ "<w:lsdException w:name=\"Colorful List Accent 5\" w:uiPriority=\"72\"/>"
+ "<w:lsdException w:name=\"Colorful Grid Accent 5\" w:uiPriority=\"73\"/>"
+ "<w:lsdException w:name=\"Light Shading Accent 6\" w:uiPriority=\"60\"/>"
+ "<w:lsdException w:name=\"Light List Accent 6\" w:uiPriority=\"61\"/>"
+ "<w:lsdException w:name=\"Light Grid Accent 6\" w:uiPriority=\"62\"/>"
+ "<w:lsdException w:name=\"Medium Shading 1 Accent 6\" w:uiPriority=\"63\"/>"
+ "<w:lsdException w:name=\"Medium Shading 2 Accent 6\" w:uiPriority=\"64\"/>"
+ "<w:lsdException w:name=\"Medium List 1 Accent 6\" w:uiPriority=\"65\"/>"
+ "<w:lsdException w:name=\"Medium List 2 Accent 6\" w:uiPriority=\"66\"/>"
+ "<w:lsdException w:name=\"Medium Grid 1 Accent 6\" w:uiPriority=\"67\"/>"
+ "<w:lsdException w:name=\"Medium Grid 2 Accent 6\" w:uiPriority=\"68\"/>"
+ "<w:lsdException w:name=\"Medium Grid 3 Accent 6\" w:uiPriority=\"69\"/>"
+ "<w:lsdException w:name=\"Dark List Accent 6\" w:uiPriority=\"70\"/>"
+ "<w:lsdException w:name=\"Colorful Shading Accent 6\" w:uiPriority=\"71\"/>"
+ "<w:lsdException w:name=\"Colorful List Accent 6\" w:uiPriority=\"72\"/>"
+ "<w:lsdException w:name=\"Colorful Grid Accent 6\" w:uiPriority=\"73\"/>"
+ "<w:lsdException w:name=\"Subtle Emphasis\" w:uiPriority=\"19\" w:qFormat=\"1\"/>"
+ "<w:lsdException w:name=\"Intense Emphasis\" w:uiPriority=\"21\" w:qFormat=\"1\"/>"
+ "<w:lsdException w:name=\"Subtle Reference\" w:uiPriority=\"31\" w:qFormat=\"1\"/>"
+ "<w:lsdException w:name=\"Intense Reference\" w:uiPriority=\"32\" w:qFormat=\"1\"/>"
+ "<w:lsdException w:name=\"Book Title\" w:uiPriority=\"33\" w:qFormat=\"1\"/>"
+ "<w:lsdException w:name=\"Bibliography\" w:semiHidden=\"1\" w:uiPriority=\"37\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"TOC Heading\" w:semiHidden=\"1\" w:uiPriority=\"39\" w:unhideWhenUsed=\"1\" w:qFormat=\"1\"/>"
+ "<w:lsdException w:name=\"Plain Table 1\" w:uiPriority=\"41\"/>"
+ "<w:lsdException w:name=\"Plain Table 2\" w:uiPriority=\"42\"/>"
+ "<w:lsdException w:name=\"Plain Table 3\" w:uiPriority=\"43\"/>"
+ "<w:lsdException w:name=\"Plain Table 4\" w:uiPriority=\"44\"/>"
+ "<w:lsdException w:name=\"Plain Table 5\" w:uiPriority=\"45\"/>"
+ "<w:lsdException w:name=\"Grid Table Light\" w:uiPriority=\"40\"/>"
+ "<w:lsdException w:name=\"Grid Table 1 Light\" w:uiPriority=\"46\"/>"
+ "<w:lsdException w:name=\"Grid Table 2\" w:uiPriority=\"47\"/>"
+ "<w:lsdException w:name=\"Grid Table 3\" w:uiPriority=\"48\"/>"
+ "<w:lsdException w:name=\"Grid Table 4\" w:uiPriority=\"49\"/>"
+ "<w:lsdException w:name=\"Grid Table 5 Dark\" w:uiPriority=\"50\"/>"
+ "<w:lsdException w:name=\"Grid Table 6 Colorful\" w:uiPriority=\"51\"/>"
+ "<w:lsdException w:name=\"Grid Table 7 Colorful\" w:uiPriority=\"52\"/>"
+ "<w:lsdException w:name=\"Grid Table 1 Light Accent 1\" w:uiPriority=\"46\"/>"
+ "<w:lsdException w:name=\"Grid Table 2 Accent 1\" w:uiPriority=\"47\"/>"
+ "<w:lsdException w:name=\"Grid Table 3 Accent 1\" w:uiPriority=\"48\"/>"
+ "<w:lsdException w:name=\"Grid Table 4 Accent 1\" w:uiPriority=\"49\"/>"
+ "<w:lsdException w:name=\"Grid Table 5 Dark Accent 1\" w:uiPriority=\"50\"/>"
+ "<w:lsdException w:name=\"Grid Table 6 Colorful Accent 1\" w:uiPriority=\"51\"/>"
+ "<w:lsdException w:name=\"Grid Table 7 Colorful Accent 1\" w:uiPriority=\"52\"/>"
+ "<w:lsdException w:name=\"Grid Table 1 Light Accent 2\" w:uiPriority=\"46\"/>"
+ "<w:lsdException w:name=\"Grid Table 2 Accent 2\" w:uiPriority=\"47\"/>"
+ "<w:lsdException w:name=\"Grid Table 3 Accent 2\" w:uiPriority=\"48\"/>"
+ "<w:lsdException w:name=\"Grid Table 4 Accent 2\" w:uiPriority=\"49\"/>"
+ "<w:lsdException w:name=\"Grid Table 5 Dark Accent 2\" w:uiPriority=\"50\"/>"
+ "<w:lsdException w:name=\"Grid Table 6 Colorful Accent 2\" w:uiPriority=\"51\"/>"
+ "<w:lsdException w:name=\"Grid Table 7 Colorful Accent 2\" w:uiPriority=\"52\"/>"
+ "<w:lsdException w:name=\"Grid Table 1 Light Accent 3\" w:uiPriority=\"46\"/>"
+ "<w:lsdException w:name=\"Grid Table 2 Accent 3\" w:uiPriority=\"47\"/>"
+ "<w:lsdException w:name=\"Grid Table 3 Accent 3\" w:uiPriority=\"48\"/>"
+ "<w:lsdException w:name=\"Grid Table 4 Accent 3\" w:uiPriority=\"49\"/>"
+ "<w:lsdException w:name=\"Grid Table 5 Dark Accent 3\" w:uiPriority=\"50\"/>"
+ "<w:lsdException w:name=\"Grid Table 6 Colorful Accent 3\" w:uiPriority=\"51\"/>"
+ "<w:lsdException w:name=\"Grid Table 7 Colorful Accent 3\" w:uiPriority=\"52\"/>"
+ "<w:lsdException w:name=\"Grid Table 1 Light Accent 4\" w:uiPriority=\"46\"/>"
+ "<w:lsdException w:name=\"Grid Table 2 Accent 4\" w:uiPriority=\"47\"/>"
+ "<w:lsdException w:name=\"Grid Table 3 Accent 4\" w:uiPriority=\"48\"/>"
+ "<w:lsdException w:name=\"Grid Table 4 Accent 4\" w:uiPriority=\"49\"/>"
+ "<w:lsdException w:name=\"Grid Table 5 Dark Accent 4\" w:uiPriority=\"50\"/>"
+ "<w:lsdException w:name=\"Grid Table 6 Colorful Accent 4\" w:uiPriority=\"51\"/>"
+ "<w:lsdException w:name=\"Grid Table 7 Colorful Accent 4\" w:uiPriority=\"52\"/>"
+ "<w:lsdException w:name=\"Grid Table 1 Light Accent 5\" w:uiPriority=\"46\"/>"
+ "<w:lsdException w:name=\"Grid Table 2 Accent 5\" w:uiPriority=\"47\"/>"
+ "<w:lsdException w:name=\"Grid Table 3 Accent 5\" w:uiPriority=\"48\"/>"
+ "<w:lsdException w:name=\"Grid Table 4 Accent 5\" w:uiPriority=\"49\"/>"
+ "<w:lsdException w:name=\"Grid Table 5 Dark Accent 5\" w:uiPriority=\"50\"/>"
+ "<w:lsdException w:name=\"Grid Table 6 Colorful Accent 5\" w:uiPriority=\"51\"/>"
+ "<w:lsdException w:name=\"Grid Table 7 Colorful Accent 5\" w:uiPriority=\"52\"/>"
+ "<w:lsdException w:name=\"Grid Table 1 Light Accent 6\" w:uiPriority=\"46\"/>"
+ "<w:lsdException w:name=\"Grid Table 2 Accent 6\" w:uiPriority=\"47\"/>"
+ "<w:lsdException w:name=\"Grid Table 3 Accent 6\" w:uiPriority=\"48\"/>"
+ "<w:lsdException w:name=\"Grid Table 4 Accent 6\" w:uiPriority=\"49\"/>"
+ "<w:lsdException w:name=\"Grid Table 5 Dark Accent 6\" w:uiPriority=\"50\"/>"
+ "<w:lsdException w:name=\"Grid Table 6 Colorful Accent 6\" w:uiPriority=\"51\"/>"
+ "<w:lsdException w:name=\"Grid Table 7 Colorful Accent 6\" w:uiPriority=\"52\"/>"
+ "<w:lsdException w:name=\"List Table 1 Light\" w:uiPriority=\"46\"/>"
+ "<w:lsdException w:name=\"List Table 2\" w:uiPriority=\"47\"/>"
+ "<w:lsdException w:name=\"List Table 3\" w:uiPriority=\"48\"/>"
+ "<w:lsdException w:name=\"List Table 4\" w:uiPriority=\"49\"/>"
+ "<w:lsdException w:name=\"List Table 5 Dark\" w:uiPriority=\"50\"/>"
+ "<w:lsdException w:name=\"List Table 6 Colorful\" w:uiPriority=\"51\"/>"
+ "<w:lsdException w:name=\"List Table 7 Colorful\" w:uiPriority=\"52\"/>"
+ "<w:lsdException w:name=\"List Table 1 Light Accent 1\" w:uiPriority=\"46\"/>"
+ "<w:lsdException w:name=\"List Table 2 Accent 1\" w:uiPriority=\"47\"/>"
+ "<w:lsdException w:name=\"List Table 3 Accent 1\" w:uiPriority=\"48\"/>"
+ "<w:lsdException w:name=\"List Table 4 Accent 1\" w:uiPriority=\"49\"/>"
+ "<w:lsdException w:name=\"List Table 5 Dark Accent 1\" w:uiPriority=\"50\"/>"
+ "<w:lsdException w:name=\"List Table 6 Colorful Accent 1\" w:uiPriority=\"51\"/>"
+ "<w:lsdException w:name=\"List Table 7 Colorful Accent 1\" w:uiPriority=\"52\"/>"
+ "<w:lsdException w:name=\"List Table 1 Light Accent 2\" w:uiPriority=\"46\"/>"
+ "<w:lsdException w:name=\"List Table 2 Accent 2\" w:uiPriority=\"47\"/>"
+ "<w:lsdException w:name=\"List Table 3 Accent 2\" w:uiPriority=\"48\"/>"
+ "<w:lsdException w:name=\"List Table 4 Accent 2\" w:uiPriority=\"49\"/>"
+ "<w:lsdException w:name=\"List Table 5 Dark Accent 2\" w:uiPriority=\"50\"/>"
+ "<w:lsdException w:name=\"List Table 6 Colorful Accent 2\" w:uiPriority=\"51\"/>"
+ "<w:lsdException w:name=\"List Table 7 Colorful Accent 2\" w:uiPriority=\"52\"/>"
+ "<w:lsdException w:name=\"List Table 1 Light Accent 3\" w:uiPriority=\"46\"/>"
+ "<w:lsdException w:name=\"List Table 2 Accent 3\" w:uiPriority=\"47\"/>"
+ "<w:lsdException w:name=\"List Table 3 Accent 3\" w:uiPriority=\"48\"/>"
+ "<w:lsdException w:name=\"List Table 4 Accent 3\" w:uiPriority=\"49\"/>"
+ "<w:lsdException w:name=\"List Table 5 Dark Accent 3\" w:uiPriority=\"50\"/>"
+ "<w:lsdException w:name=\"List Table 6 Colorful Accent 3\" w:uiPriority=\"51\"/>"
+ "<w:lsdException w:name=\"List Table 7 Colorful Accent 3\" w:uiPriority=\"52\"/>"
+ "<w:lsdException w:name=\"List Table 1 Light Accent 4\" w:uiPriority=\"46\"/>"
+ "<w:lsdException w:name=\"List Table 2 Accent 4\" w:uiPriority=\"47\"/>"
+ "<w:lsdException w:name=\"List Table 3 Accent 4\" w:uiPriority=\"48\"/>"
+ "<w:lsdException w:name=\"List Table 4 Accent 4\" w:uiPriority=\"49\"/>"
+ "<w:lsdException w:name=\"List Table 5 Dark Accent 4\" w:uiPriority=\"50\"/>"
+ "<w:lsdException w:name=\"List Table 6 Colorful Accent 4\" w:uiPriority=\"51\"/>"
+ "<w:lsdException w:name=\"List Table 7 Colorful Accent 4\" w:uiPriority=\"52\"/>"
+ "<w:lsdException w:name=\"List Table 1 Light Accent 5\" w:uiPriority=\"46\"/>"
+ "<w:lsdException w:name=\"List Table 2 Accent 5\" w:uiPriority=\"47\"/>"
+ "<w:lsdException w:name=\"List Table 3 Accent 5\" w:uiPriority=\"48\"/>"
+ "<w:lsdException w:name=\"List Table 4 Accent 5\" w:uiPriority=\"49\"/>"
+ "<w:lsdException w:name=\"List Table 5 Dark Accent 5\" w:uiPriority=\"50\"/>"
+ "<w:lsdException w:name=\"List Table 6 Colorful Accent 5\" w:uiPriority=\"51\"/>"
+ "<w:lsdException w:name=\"List Table 7 Colorful Accent 5\" w:uiPriority=\"52\"/>"
+ "<w:lsdException w:name=\"List Table 1 Light Accent 6\" w:uiPriority=\"46\"/>"
+ "<w:lsdException w:name=\"List Table 2 Accent 6\" w:uiPriority=\"47\"/>"
+ "<w:lsdException w:name=\"List Table 3 Accent 6\" w:uiPriority=\"48\"/>"
+ "<w:lsdException w:name=\"List Table 4 Accent 6\" w:uiPriority=\"49\"/>"
+ "<w:lsdException w:name=\"List Table 5 Dark Accent 6\" w:uiPriority=\"50\"/>"
+ "<w:lsdException w:name=\"List Table 6 Colorful Accent 6\" w:uiPriority=\"51\"/>"
+ "<w:lsdException w:name=\"List Table 7 Colorful Accent 6\" w:uiPriority=\"52\"/>"
+ "<w:lsdException w:name=\"Mention\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Smart Hyperlink\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Hashtag\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Unresolved Mention\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/>"
+ "<w:lsdException w:name=\"Smart Link\" w:semiHidden=\"1\" w:unhideWhenUsed=\"1\"/></w:latentStyles>"
+ "<w:style w:type=\"paragraph\" w:default=\"1\" w:styleId=\"Normal\">"
+ "<w:name w:val=\"Normal\"/>"
+ "<w:qFormat/></w:style>"
+ "<w:style w:type=\"character\" w:default=\"1\" w:styleId=\"DefaultParagraphFont\">"
+ "<w:name w:val=\"Default Paragraph Font\"/>"
+ "<w:uiPriority w:val=\"1\"/>"
+ "<w:semiHidden/>"
+ "<w:unhideWhenUsed/></w:style>"
+ "<w:style w:type=\"table\" w:default=\"1\" w:styleId=\"TableNormal\">"
+ "<w:name w:val=\"Normal Table\"/>"
+ "<w:uiPriority w:val=\"99\"/>"
+ "<w:semiHidden/>"
+ "<w:unhideWhenUsed/>"
+ "<w:tblPr>"
+ "<w:tblInd w:w=\"0\" w:type=\"dxa\"/>"
+ "<w:tblCellMar>"
+ "<w:top w:w=\"0\" w:type=\"dxa\"/>"
+ "<w:left w:w=\"108\" w:type=\"dxa\"/>"
+ "<w:bottom w:w=\"0\" w:type=\"dxa\"/>"
+ "<w:right w:w=\"108\" w:type=\"dxa\"/></w:tblCellMar></w:tblPr></w:style>"
+ "<w:style w:type=\"numbering\" w:default=\"1\" w:styleId=\"NoList\">"
+ "<w:name w:val=\"No List\"/>"
+ "<w:uiPriority w:val=\"99\"/>"
+ "<w:semiHidden/>"
+ "<w:unhideWhenUsed/></w:style></w:styles>"
+ },
+
+ {
+ "word/webSettings.xml",
+ ""
+ "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\r\n"
+ ""
+ "<w:webSettings xmlns:mc=\"http://schemas.openxmlformats.org/markup-compatibility/2006\" xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\" xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" xmlns:w14=\"http://schemas.microsoft.com/office/word/2010/wordml\" xmlns:w15=\"http://schemas.microsoft.com/office/word/2012/wordml\" xmlns:w16cex=\"http://schemas.microsoft.com/office/word/2018/wordml/cex\" xmlns:w16cid=\"http://schemas.microsoft.com/office/word/2016/wordml/cid\" xmlns:w16=\"http://schemas.microsoft.com/office/word/2018/wordml\" xmlns:w16se=\"http://schemas.microsoft.com/office/word/2015/wordml/symex\" mc:Ignorable=\"w14 w15 w16se w16cid w16 w16cex\">"
+ "<w:optimizeForBrowser/>"
+ "<w:allowPNG/></w:webSettings>"
+ },
+
+ {
+ "word/_rels/document.xml.rels",
+ ""
+ "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\r\n"
+ ""
+ "<Relationships xmlns=\"http://schemas.openxmlformats.org/package/2006/relationships\">"
+ "<Relationship Id=\"rId3\" Type=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships/webSettings\" Target=\"webSettings.xml\"/>"
+ "<Relationship Id=\"rId2\" Type=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships/settings\" Target=\"settings.xml\"/>"
+ "<Relationship Id=\"rId1\" Type=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles\" Target=\"styles.xml\"/>"
+ "<Relationship Id=\"rId5\" Type=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships/theme\" Target=\"theme/theme1.xml\"/>"
+ "<Relationship Id=\"rId4\" Type=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable\" Target=\"fontTable.xml\"/></Relationships>"
+ },
+
+ {
+ "word/theme/theme1.xml",
+ ""
+ "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\r\n"
+ ""
+ "<a:theme xmlns:a=\"http://schemas.openxmlformats.org/drawingml/2006/main\" name=\"Office Theme\">"
+ "<a:themeElements>"
+ "<a:clrScheme name=\"Office\">"
+ "<a:dk1>"
+ "<a:sysClr val=\"windowText\" lastClr=\"000000\"/></a:dk1>"
+ "<a:lt1>"
+ "<a:sysClr val=\"window\" lastClr=\"FFFFFF\"/></a:lt1>"
+ "<a:dk2>"
+ "<a:srgbClr val=\"44546A\"/></a:dk2>"
+ "<a:lt2>"
+ "<a:srgbClr val=\"E7E6E6\"/></a:lt2>"
+ "<a:accent1>"
+ "<a:srgbClr val=\"4472C4\"/></a:accent1>"
+ "<a:accent2>"
+ "<a:srgbClr val=\"ED7D31\"/></a:accent2>"
+ "<a:accent3>"
+ "<a:srgbClr val=\"A5A5A5\"/></a:accent3>"
+ "<a:accent4>"
+ "<a:srgbClr val=\"FFC000\"/></a:accent4>"
+ "<a:accent5>"
+ "<a:srgbClr val=\"5B9BD5\"/></a:accent5>"
+ "<a:accent6>"
+ "<a:srgbClr val=\"70AD47\"/></a:accent6>"
+ "<a:hlink>"
+ "<a:srgbClr val=\"0563C1\"/></a:hlink>"
+ "<a:folHlink>"
+ "<a:srgbClr val=\"954F72\"/></a:folHlink></a:clrScheme>"
+ "<a:fontScheme name=\"Office\">"
+ "<a:majorFont>"
+ "<a:latin typeface=\"Calibri Light\" panose=\"020F0302020204030204\"/>"
+ "<a:ea typeface=\"\"/>"
+ "<a:cs typeface=\"\"/>"
+ "<a:font script=\"Jpan\" typeface=\"游ゴシック Light\"/>"
+ "<a:font script=\"Hang\" typeface=\"맑은 고딕\"/>"
+ "<a:font script=\"Hans\" typeface=\"等线 Light\"/>"
+ "<a:font script=\"Hant\" typeface=\"新細明體\"/>"
+ "<a:font script=\"Arab\" typeface=\"Times New Roman\"/>"
+ "<a:font script=\"Hebr\" typeface=\"Times New Roman\"/>"
+ "<a:font script=\"Thai\" typeface=\"Angsana New\"/>"
+ "<a:font script=\"Ethi\" typeface=\"Nyala\"/>"
+ "<a:font script=\"Beng\" typeface=\"Vrinda\"/>"
+ "<a:font script=\"Gujr\" typeface=\"Shruti\"/>"
+ "<a:font script=\"Khmr\" typeface=\"MoolBoran\"/>"
+ "<a:font script=\"Knda\" typeface=\"Tunga\"/>"
+ "<a:font script=\"Guru\" typeface=\"Raavi\"/>"
+ "<a:font script=\"Cans\" typeface=\"Euphemia\"/>"
+ "<a:font script=\"Cher\" typeface=\"Plantagenet Cherokee\"/>"
+ "<a:font script=\"Yiii\" typeface=\"Microsoft Yi Baiti\"/>"
+ "<a:font script=\"Tibt\" typeface=\"Microsoft Himalaya\"/>"
+ "<a:font script=\"Thaa\" typeface=\"MV Boli\"/>"
+ "<a:font script=\"Deva\" typeface=\"Mangal\"/>"
+ "<a:font script=\"Telu\" typeface=\"Gautami\"/>"
+ "<a:font script=\"Taml\" typeface=\"Latha\"/>"
+ "<a:font script=\"Syrc\" typeface=\"Estrangelo Edessa\"/>"
+ "<a:font script=\"Orya\" typeface=\"Kalinga\"/>"
+ "<a:font script=\"Mlym\" typeface=\"Kartika\"/>"
+ "<a:font script=\"Laoo\" typeface=\"DokChampa\"/>"
+ "<a:font script=\"Sinh\" typeface=\"Iskoola Pota\"/>"
+ "<a:font script=\"Mong\" typeface=\"Mongolian Baiti\"/>"
+ "<a:font script=\"Viet\" typeface=\"Times New Roman\"/>"
+ "<a:font script=\"Uigh\" typeface=\"Microsoft Uighur\"/>"
+ "<a:font script=\"Geor\" typeface=\"Sylfaen\"/>"
+ "<a:font script=\"Armn\" typeface=\"Arial\"/>"
+ "<a:font script=\"Bugi\" typeface=\"Leelawadee UI\"/>"
+ "<a:font script=\"Bopo\" typeface=\"Microsoft JhengHei\"/>"
+ "<a:font script=\"Java\" typeface=\"Javanese Text\"/>"
+ "<a:font script=\"Lisu\" typeface=\"Segoe UI\"/>"
+ "<a:font script=\"Mymr\" typeface=\"Myanmar Text\"/>"
+ "<a:font script=\"Nkoo\" typeface=\"Ebrima\"/>"
+ "<a:font script=\"Olck\" typeface=\"Nirmala UI\"/>"
+ "<a:font script=\"Osma\" typeface=\"Ebrima\"/>"
+ "<a:font script=\"Phag\" typeface=\"Phagspa\"/>"
+ "<a:font script=\"Syrn\" typeface=\"Estrangelo Edessa\"/>"
+ "<a:font script=\"Syrj\" typeface=\"Estrangelo Edessa\"/>"
+ "<a:font script=\"Syre\" typeface=\"Estrangelo Edessa\"/>"
+ "<a:font script=\"Sora\" typeface=\"Nirmala UI\"/>"
+ "<a:font script=\"Tale\" typeface=\"Microsoft Tai Le\"/>"
+ "<a:font script=\"Talu\" typeface=\"Microsoft New Tai Lue\"/>"
+ "<a:font script=\"Tfng\" typeface=\"Ebrima\"/></a:majorFont>"
+ "<a:minorFont>"
+ "<a:latin typeface=\"Calibri\" panose=\"020F0502020204030204\"/>"
+ "<a:ea typeface=\"\"/>"
+ "<a:cs typeface=\"\"/>"
+ "<a:font script=\"Jpan\" typeface=\"游明朝\"/>"
+ "<a:font script=\"Hang\" typeface=\"맑은 고딕\"/>"
+ "<a:font script=\"Hans\" typeface=\"等线\"/>"
+ "<a:font script=\"Hant\" typeface=\"新細明體\"/>"
+ "<a:font script=\"Arab\" typeface=\"Arial\"/>"
+ "<a:font script=\"Hebr\" typeface=\"Arial\"/>"
+ "<a:font script=\"Thai\" typeface=\"Cordia New\"/>"
+ "<a:font script=\"Ethi\" typeface=\"Nyala\"/>"
+ "<a:font script=\"Beng\" typeface=\"Vrinda\"/>"
+ "<a:font script=\"Gujr\" typeface=\"Shruti\"/>"
+ "<a:font script=\"Khmr\" typeface=\"DaunPenh\"/>"
+ "<a:font script=\"Knda\" typeface=\"Tunga\"/>"
+ "<a:font script=\"Guru\" typeface=\"Raavi\"/>"
+ "<a:font script=\"Cans\" typeface=\"Euphemia\"/>"
+ "<a:font script=\"Cher\" typeface=\"Plantagenet Cherokee\"/>"
+ "<a:font script=\"Yiii\" typeface=\"Microsoft Yi Baiti\"/>"
+ "<a:font script=\"Tibt\" typeface=\"Microsoft Himalaya\"/>"
+ "<a:font script=\"Thaa\" typeface=\"MV Boli\"/>"
+ "<a:font script=\"Deva\" typeface=\"Mangal\"/>"
+ "<a:font script=\"Telu\" typeface=\"Gautami\"/>"
+ "<a:font script=\"Taml\" typeface=\"Latha\"/>"
+ "<a:font script=\"Syrc\" typeface=\"Estrangelo Edessa\"/>"
+ "<a:font script=\"Orya\" typeface=\"Kalinga\"/>"
+ "<a:font script=\"Mlym\" typeface=\"Kartika\"/>"
+ "<a:font script=\"Laoo\" typeface=\"DokChampa\"/>"
+ "<a:font script=\"Sinh\" typeface=\"Iskoola Pota\"/>"
+ "<a:font script=\"Mong\" typeface=\"Mongolian Baiti\"/>"
+ "<a:font script=\"Viet\" typeface=\"Arial\"/>"
+ "<a:font script=\"Uigh\" typeface=\"Microsoft Uighur\"/>"
+ "<a:font script=\"Geor\" typeface=\"Sylfaen\"/>"
+ "<a:font script=\"Armn\" typeface=\"Arial\"/>"
+ "<a:font script=\"Bugi\" typeface=\"Leelawadee UI\"/>"
+ "<a:font script=\"Bopo\" typeface=\"Microsoft JhengHei\"/>"
+ "<a:font script=\"Java\" typeface=\"Javanese Text\"/>"
+ "<a:font script=\"Lisu\" typeface=\"Segoe UI\"/>"
+ "<a:font script=\"Mymr\" typeface=\"Myanmar Text\"/>"
+ "<a:font script=\"Nkoo\" typeface=\"Ebrima\"/>"
+ "<a:font script=\"Olck\" typeface=\"Nirmala UI\"/>"
+ "<a:font script=\"Osma\" typeface=\"Ebrima\"/>"
+ "<a:font script=\"Phag\" typeface=\"Phagspa\"/>"
+ "<a:font script=\"Syrn\" typeface=\"Estrangelo Edessa\"/>"
+ "<a:font script=\"Syrj\" typeface=\"Estrangelo Edessa\"/>"
+ "<a:font script=\"Syre\" typeface=\"Estrangelo Edessa\"/>"
+ "<a:font script=\"Sora\" typeface=\"Nirmala UI\"/>"
+ "<a:font script=\"Tale\" typeface=\"Microsoft Tai Le\"/>"
+ "<a:font script=\"Talu\" typeface=\"Microsoft New Tai Lue\"/>"
+ "<a:font script=\"Tfng\" typeface=\"Ebrima\"/></a:minorFont></a:fontScheme>"
+ "<a:fmtScheme name=\"Office\">"
+ "<a:fillStyleLst>"
+ "<a:solidFill>"
+ "<a:schemeClr val=\"phClr\"/></a:solidFill>"
+ "<a:gradFill rotWithShape=\"1\">"
+ "<a:gsLst>"
+ "<a:gs pos=\"0\">"
+ "<a:schemeClr val=\"phClr\">"
+ "<a:lumMod val=\"110000\"/>"
+ "<a:satMod val=\"105000\"/>"
+ "<a:tint val=\"67000\"/></a:schemeClr></a:gs>"
+ "<a:gs pos=\"50000\">"
+ "<a:schemeClr val=\"phClr\">"
+ "<a:lumMod val=\"105000\"/>"
+ "<a:satMod val=\"103000\"/>"
+ "<a:tint val=\"73000\"/></a:schemeClr></a:gs>"
+ "<a:gs pos=\"100000\">"
+ "<a:schemeClr val=\"phClr\">"
+ "<a:lumMod val=\"105000\"/>"
+ "<a:satMod val=\"109000\"/>"
+ "<a:tint val=\"81000\"/></a:schemeClr></a:gs></a:gsLst>"
+ "<a:lin ang=\"5400000\" scaled=\"0\"/></a:gradFill>"
+ "<a:gradFill rotWithShape=\"1\">"
+ "<a:gsLst>"
+ "<a:gs pos=\"0\">"
+ "<a:schemeClr val=\"phClr\">"
+ "<a:satMod val=\"103000\"/>"
+ "<a:lumMod val=\"102000\"/>"
+ "<a:tint val=\"94000\"/></a:schemeClr></a:gs>"
+ "<a:gs pos=\"50000\">"
+ "<a:schemeClr val=\"phClr\">"
+ "<a:satMod val=\"110000\"/>"
+ "<a:lumMod val=\"100000\"/>"
+ "<a:shade val=\"100000\"/></a:schemeClr></a:gs>"
+ "<a:gs pos=\"100000\">"
+ "<a:schemeClr val=\"phClr\">"
+ "<a:lumMod val=\"99000\"/>"
+ "<a:satMod val=\"120000\"/>"
+ "<a:shade val=\"78000\"/></a:schemeClr></a:gs></a:gsLst>"
+ "<a:lin ang=\"5400000\" scaled=\"0\"/></a:gradFill></a:fillStyleLst>"
+ "<a:lnStyleLst>"
+ "<a:ln w=\"6350\" cap=\"flat\" cmpd=\"sng\" algn=\"ctr\">"
+ "<a:solidFill>"
+ "<a:schemeClr val=\"phClr\"/></a:solidFill>"
+ "<a:prstDash val=\"solid\"/>"
+ "<a:miter lim=\"800000\"/></a:ln>"
+ "<a:ln w=\"12700\" cap=\"flat\" cmpd=\"sng\" algn=\"ctr\">"
+ "<a:solidFill>"
+ "<a:schemeClr val=\"phClr\"/></a:solidFill>"
+ "<a:prstDash val=\"solid\"/>"
+ "<a:miter lim=\"800000\"/></a:ln>"
+ "<a:ln w=\"19050\" cap=\"flat\" cmpd=\"sng\" algn=\"ctr\">"
+ "<a:solidFill>"
+ "<a:schemeClr val=\"phClr\"/></a:solidFill>"
+ "<a:prstDash val=\"solid\"/>"
+ "<a:miter lim=\"800000\"/></a:ln></a:lnStyleLst>"
+ "<a:effectStyleLst>"
+ "<a:effectStyle>"
+ "<a:effectLst/></a:effectStyle>"
+ "<a:effectStyle>"
+ "<a:effectLst/></a:effectStyle>"
+ "<a:effectStyle>"
+ "<a:effectLst>"
+ "<a:outerShdw blurRad=\"57150\" dist=\"19050\" dir=\"5400000\" algn=\"ctr\" rotWithShape=\"0\">"
+ "<a:srgbClr val=\"000000\">"
+ "<a:alpha val=\"63000\"/></a:srgbClr></a:outerShdw></a:effectLst></a:effectStyle></a:effectStyleLst>"
+ "<a:bgFillStyleLst>"
+ "<a:solidFill>"
+ "<a:schemeClr val=\"phClr\"/></a:solidFill>"
+ "<a:solidFill>"
+ "<a:schemeClr val=\"phClr\">"
+ "<a:tint val=\"95000\"/>"
+ "<a:satMod val=\"170000\"/></a:schemeClr></a:solidFill>"
+ "<a:gradFill rotWithShape=\"1\">"
+ "<a:gsLst>"
+ "<a:gs pos=\"0\">"
+ "<a:schemeClr val=\"phClr\">"
+ "<a:tint val=\"93000\"/>"
+ "<a:satMod val=\"150000\"/>"
+ "<a:shade val=\"98000\"/>"
+ "<a:lumMod val=\"102000\"/></a:schemeClr></a:gs>"
+ "<a:gs pos=\"50000\">"
+ "<a:schemeClr val=\"phClr\">"
+ "<a:tint val=\"98000\"/>"
+ "<a:satMod val=\"130000\"/>"
+ "<a:shade val=\"90000\"/>"
+ "<a:lumMod val=\"103000\"/></a:schemeClr></a:gs>"
+ "<a:gs pos=\"100000\">"
+ "<a:schemeClr val=\"phClr\">"
+ "<a:shade val=\"63000\"/>"
+ "<a:satMod val=\"120000\"/></a:schemeClr></a:gs></a:gsLst>"
+ "<a:lin ang=\"5400000\" scaled=\"0\"/></a:gradFill></a:bgFillStyleLst></a:fmtScheme></a:themeElements>"
+ "<a:objectDefaults/>"
+ "<a:extraClrSchemeLst/>"
+ "<a:extLst>"
+ "<a:ext uri=\"{05A4C25C-085E-4340-85A3-A5531E510DB2}\">"
+ "<thm15:themeFamily xmlns:thm15=\"http://schemas.microsoft.com/office/thememl/2012/main\" name=\"Office Theme\" id=\"{62F939B6-93AF-4DB8-9C6B-D6C7DFDC589F}\" vid=\"{4A3C46E8-61CC-4603-A589-7422A47A8E4A}\"/></a:ext></a:extLst></a:theme>"
+ },
+
+};
+
+int docx_template_items_num = 11;
diff --git a/extract/src/docx_template.h b/extract/src/docx_template.h
new file mode 100644
index 00000000..8a73d5b2
--- /dev/null
+++ b/extract/src/docx_template.h
@@ -0,0 +1,17 @@
+#ifndef EXTRACT_DOCX_TEMPLATE_H
+#define EXTRACT_DOCX_TEMPLATE_H
+
+/* THIS IS AUTO-GENERATED CODE, DO NOT EDIT. */
+
+
+typedef struct
+{
+ const char* name; /* Name of item in docx archive. */
+ const char* text; /* Contents of item in docx archive. */
+} docx_template_item_t;
+
+extern const docx_template_item_t docx_template_items[];
+extern int docx_template_items_num;
+
+
+#endif
diff --git a/extract/src/docx_template_build.py b/extract/src/docx_template_build.py
new file mode 100755
index 00000000..b528bcb5
--- /dev/null
+++ b/extract/src/docx_template_build.py
@@ -0,0 +1,210 @@
+#! /usr/bin/env python3
+
+'''
+Creates C code for creating docx files using internal template docx content.
+
+Args:
+
+ -i <docx-path>
+ Set template docx file to extract from.
+
+ -o <out-path>
+ Set name of output files.
+
+ We write to <out-path>.c and <out-path>.h.
+'''
+
+import io
+import os
+import re
+import sys
+import textwrap
+
+
+def system(command):
+ '''
+ Like os.system() but raises exception if command fails.
+ '''
+ e = os.system(command)
+ if e:
+ print(f'command failed: {command}')
+ assert 0
+
+def read(path):
+ '''
+ Returns contents of file. We assume it is utf-8.
+ '''
+ with open(path, 'rb') as f:
+ raw = f.read()
+ return raw.decode('utf-8')
+
+def write(text, path):
+ '''
+ Writes text to file.
+ '''
+ parent = os.path.dirname(path)
+ if parent:
+ os.makedirs(parent, exist_ok=True)
+ with open(path, 'w') as f:
+ f.write(text)
+
+def write_if_diff(text, path):
+ try:
+ old = read(path)
+ except Exception:
+ old = None
+ if text != old:
+ write(text, path)
+
+def check_path_safe(path):
+ '''
+ Raises exception unless path consists only of characters and sequences that
+ are known to be safe for shell commands.
+ '''
+ if '..' in path:
+ raise Exception(f'Path is unsafe because contains "..": {path!r}')
+ for c in path:
+ if not c.isalnum() and c not in '/._-':
+ #print(f'unsafe character {c} in: {path}')
+ raise Exception(f'Path is unsafe because contains "{c}": {path!r}')
+
+def path_safe(path):
+ '''
+ Returns True if path is safe else False.
+ '''
+ try:
+ check_path_safe(path)
+ except Exception:
+ return False
+ else:
+ return True
+
+assert not path_safe('foo;rm -rf *')
+assert not path_safe('..')
+assert path_safe('foo/bar.x')
+
+
+def main():
+
+ path_in = None
+ path_out = None
+ args = iter(sys.argv[1:])
+ while 1:
+ try: arg = next(args)
+ except StopIteration: break
+ if arg == '-h' or arg == '--help':
+ print(__doc__)
+ return
+ elif arg == '--docx-pretty':
+ d = next(args)
+ for dirpath, dirnames, filenames in os.walk(d):
+ for filename in filenames:
+ if not filename.endswith('.xml'):
+ continue
+ path = os.path.join(dirpath, filename)
+ system(f'xmllint --format {path} > {path}-')
+ system(f'mv {path}- {path}')
+ elif arg == '-i':
+ path_in = next(args)
+ elif arg == '-o':
+ path_out = next(args)
+ else:
+ assert 0
+
+ if not path_in:
+ return
+
+ if not path_in:
+ raise Exception('Need to specify -i <docx-path>')
+ if not path_out:
+ raise Exception('Need to specify -o <out-path>')
+
+ check_path_safe(path_in)
+ check_path_safe(path_out)
+ path_temp = f'{path_in}.dir'
+ os.system(f'rm -r "{path_temp}" 2>/dev/null')
+ system(f'unzip -q -d {path_temp} {path_in}')
+
+ out_c = io.StringIO()
+ out_c.write(f'/* THIS IS AUTO-GENERATED CODE, DO NOT EDIT. */\n')
+ out_c.write(f'\n')
+ out_c.write(f'#include "{os.path.basename(path_out)}.h"\n')
+ out_c.write(f'\n')
+
+
+ out_c.write('const docx_template_item_t docx_template_items[] =\n')
+ out_c.write(f'{{\n')
+
+ num_items = 0
+ for dirpath, dirnames, filenames in os.walk(path_temp):
+ dirnames.sort()
+
+ if 0:
+ # Write code to create directory item in zip. This isn't recognised by zipinfo, and doesn't
+ # make Word like the file.
+ #
+ name = dirpath[ len(path_temp)+1: ]
+ if name:
+ if not name.endswith('/'):
+ name += '/'
+ out_c3.write(f' if (extract_zip_write_file(zip, NULL, 0, "{name}")) goto end;\n')
+
+ for filename in sorted(filenames):
+ num_items += 1
+ path = os.path.join(dirpath, filename)
+ name = path[ len(path_temp)+1: ]
+ text = read(os.path.join(dirpath, filename))
+ #print(f'first line is: %r' % text.split("\n")[0])
+ text = text.replace('"', '\\"')
+
+ # Looks like template files use \r\n when we interpret them as
+ # utf-8, so we preserve this in the generated strings.
+ #
+ text = text.replace('\r\n', '\\r\\n"\n "')
+
+ # Split on '<' to avoid overly-long lines, which break windows
+ # compiler.
+ #
+ text = re.sub('([<][^/])', '"\n "\\1', text)
+
+ # Remove name of document creator.
+ #
+ for tag in 'dc:creator', 'cp:lastModifiedBy':
+ text = re.sub(f'[<]{tag}[>][^<]*[<]/{tag}[>]', f'<{tag}></{tag}>', text)
+
+ out_c.write(f' {{\n')
+ out_c.write(f' "{name}",\n')
+ out_c.write(f' "{text}"\n')
+ out_c.write(f' }},\n')
+ out_c.write(f' \n')
+
+ out_c.write(f'}};\n')
+ out_c.write(f'\n')
+ out_c.write(f'int docx_template_items_num = {num_items};\n')
+
+ out_c = out_c.getvalue()
+ write_if_diff(out_c, f'{path_out}.c')
+
+ out_h = io.StringIO()
+ out_h.write(f'#ifndef EXTRACT_DOCX_TEMPLATE_H\n')
+ out_h.write(f'#define EXTRACT_DOCX_TEMPLATE_H\n')
+ out_h.write(f'\n')
+ out_h.write(f'/* THIS IS AUTO-GENERATED CODE, DO NOT EDIT. */\n')
+ out_h.write(f'\n')
+ out_h.write(f'\n')
+ out_h.write(f'typedef struct\n')
+ out_h.write(f'{{\n')
+ out_h.write(f' const char* name; /* Name of item in docx archive. */\n')
+ out_h.write(f' const char* text; /* Contents of item in docx archive. */\n')
+ out_h.write(f'}} docx_template_item_t;\n')
+ out_h.write(f'\n')
+ out_h.write(f'extern const docx_template_item_t docx_template_items[];\n')
+ out_h.write(f'extern int docx_template_items_num;\n')
+ out_h.write(f'\n')
+ out_h.write(f'\n')
+ out_h.write(f'#endif\n')
+ write_if_diff(out_h.getvalue(), f'{path_out}.h')
+ #os.system(f'rm -r "{path_temp}"')
+
+if __name__ == '__main__':
+ main()
diff --git a/extract/src/extract-exe.c b/extract/src/extract-exe.c
new file mode 100644
index 00000000..d3ac81d0
--- /dev/null
+++ b/extract/src/extract-exe.c
@@ -0,0 +1,244 @@
+/* Command-line programme for extract_ API. */
+
+#include "../include/extract.h"
+#include "../include/extract_alloc.h"
+
+#include "memento.h"
+#include "outf.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+/* Error-detecting equivalent to *out = argv[++i].
+*/
+static int arg_next_string(char** argv, int argc, int* i, const char** out)
+{
+ if (*i + 1 >= argc) {
+ printf("Expected arg after: %s\n", argv[*i]);
+ errno = EINVAL;
+ return -1;
+ }
+ *i += 1;
+ *out = argv[*i];
+ return 0;
+}
+
+/* Error-detecting equivalent to *out = atoi(argv[++i]).
+*/
+static int arg_next_int(char** argv, int argc, int* i, int* out)
+{
+ if (*i + 1 >= argc) {
+ printf("Expected integer arg after: %s\n", argv[*i]);
+ errno = EINVAL;
+ return -1;
+ }
+ *i += 1;
+ *out = atoi(argv[*i]);
+ return 0;
+}
+
+static void* s_realloc(void* state, void* prev, size_t size)
+{
+ assert(state == (void*) 123);
+ return realloc(prev, size);
+}
+
+int main(int argc, char** argv)
+{
+ int e = -1;
+ const char* docx_out_path = NULL;
+ const char* input_path = NULL;
+ const char* docx_template_path = NULL;
+ const char* content_path = NULL;
+ int preserve_dir = 0;
+ int spacing = 1;
+ int rotation = 1;
+ int autosplit = 0;
+ int images = 1;
+ int alloc_stats = 0;
+ int i;
+
+ extract_alloc_t* alloc = NULL;
+ extract_buffer_t* out_buffer = NULL;
+ extract_buffer_t* intermediate = NULL;
+ extract_t* extract = NULL;
+
+ /* Create an allocator so we test the allocation code. */
+ if (extract_alloc_create(s_realloc, (void*) 123, &alloc))
+ {
+ assert(0);
+ }
+
+ for (i=1; i<argc; ++i) {
+ const char* arg = argv[i];
+ if (!strcmp(arg, "-h") || !strcmp(arg, "--help")) {
+ printf(
+ "Converts intermediate data from mupdf or gs into a docx file.\n"
+ "\n"
+ "We require a file containing XML output from one of these commands:\n"
+ " mutool draw -F xmltext ...\n"
+ " gs -sDEVICE=txtwrite -dTextFormat=4 ...\n"
+ "\n"
+ "We also requires a template docx file.\n"
+ "\n"
+ "Args:\n"
+ " --alloc-exp-min <bytes>\n"
+ " Internal: set exponential allocation with minimum alloc size.\n"
+ " --autosplit 0|1\n"
+ " If 1, we initially split spans when y coordinate changes. This\n"
+ " stresses our handling of spans when input is from mupdf.\n"
+ " -i <intermediate-path>\n"
+ " Path of XML file containing intermediate text spans.\n"
+ " -o <docx-path>\n"
+ " If specified, we generate the specified docx file.\n"
+ " --o-content <path>\n"
+ " If specified, we write raw docx content to <path>; this is the\n"
+ " text that we embed inside the template word/document.xml file\n"
+ " when generating the docx file.\n"
+ " -p 0|1\n"
+ " If 1 and -t <docx-template> is specified, we preserve the\n"
+ " uncompressed <docx-path>.lib/ directory.\n"
+ " -r 0|1\n"
+ " If 1, we we output rotated text inside a rotated drawing. Otherwise\n"
+ " output text is always horizontal.\n"
+ " -s 0|1\n"
+ " If 1, we insert extra vertical space between paragraphs and extra\n"
+ " vertical space between paragraphs that had different ctm matrices\n"
+ " in the original document.\n"
+ " -t <docx-template>\n"
+ " If specified we use <docx-template> as template. Otheerwise we use"
+ " an internal template.\n"
+ " -v <verbose>\n"
+ " Set verbose level.\n"
+ " -v-alloc\n"
+ " Show alloc stats.\n"
+ );
+ if (i + 1 == argc) {
+ e = 0;
+ goto end;
+ }
+ }
+ else if (!strcmp(arg, "--alloc-exp-min")) {
+ int size;
+ if (arg_next_int(argv, argc, &i, &size)) goto end;
+ outf("Calling alloc_set_min_alloc_size(%i)", size);
+ extract_exp_min(extract, size);
+ }
+ else if (!strcmp(arg, "--autosplit")) {
+ if (arg_next_int(argv, argc, &i, &autosplit)) goto end;
+ }
+ else if (!strcmp(arg, "-i")) {
+ if (arg_next_string(argv, argc, &i, &input_path)) goto end;
+ }
+ else if (!strcmp(arg, "-o")) {
+ if (arg_next_string(argv, argc, &i, &docx_out_path)) goto end;
+ }
+ else if (!strcmp(arg, "--o-content")) {
+ if (arg_next_string(argv, argc, &i, &content_path)) goto end;
+ }
+ else if (!strcmp(arg, "-p")) {
+ if (arg_next_int(argv, argc, &i, &preserve_dir)) goto end;
+ }
+ else if (!strcmp(arg, "-r")) {
+ if (arg_next_int(argv, argc, &i, &rotation)) goto end;
+ }
+ else if (!strcmp(arg, "-s")) {
+ if (arg_next_int(argv, argc, &i, &spacing)) goto end;
+ }
+ else if (!strcmp(arg, "-t")) {
+ if (arg_next_string(argv, argc, &i, &docx_template_path)) goto end;
+ }
+ else if (!strcmp(arg, "-v")) {
+ int verbose;
+ if (arg_next_int(argv, argc, &i, &verbose)) goto end;
+ outf_verbose_set(verbose);
+ outf("Have changed verbose to %i", verbose);
+ }
+ else if (!strcmp(arg, "--v-alloc")) {
+ if (arg_next_int(argv, argc, &i, &alloc_stats)) goto end;
+ }
+ else {
+ printf("Unrecognised arg: '%s'\n", arg);
+ errno = EINVAL;
+ goto end;
+ }
+
+ assert(i < argc);
+ }
+
+ if (!input_path) {
+ printf("-i <input-path> not specified.\n");
+ errno = EINVAL;
+ goto end;
+ }
+
+ if (extract_buffer_open_file(alloc, input_path, 0 /*writable*/, &intermediate)) {
+ printf("Failed to open intermediate file: %s\n", input_path);
+ goto end;
+ }
+
+ if (extract_begin(alloc, &extract)) goto end;
+ if (extract_read_intermediate(extract, intermediate, autosplit)) goto end;
+ if (extract_process(extract, spacing, rotation, images)) goto end;
+
+ if (content_path) {
+ if (extract_buffer_open_file(alloc, content_path, 1 /*writable*/, &out_buffer)) goto end;
+ if (extract_write_content(extract, out_buffer)) goto end;
+ if (extract_buffer_close(&out_buffer)) goto end;
+ }
+ if (docx_out_path) {
+ if (docx_template_path) {
+ if (extract_write_template(
+ extract,
+ docx_template_path,
+ docx_out_path,
+ preserve_dir
+ )) {
+ printf("Failed to create docx file: %s\n", docx_out_path);
+ goto end;
+ }
+ }
+ else {
+ if (extract_buffer_open_file(alloc, docx_out_path, 1 /*writable*/, &out_buffer)) goto end;
+ if (extract_write(extract, out_buffer)) {
+ printf("Failed to create docx file: %s\n", docx_out_path);
+ goto end;
+ }
+ if (extract_buffer_close(&out_buffer)) goto end;
+ }
+ }
+
+ e = 0;
+ end:
+
+ extract_buffer_close(&intermediate);
+ extract_buffer_close(&out_buffer);
+ extract_end(&extract);
+
+ if (e) {
+ printf("Failed (errno=%i): %s\n", errno, strerror(errno));
+ return 1;
+ }
+
+ extract_internal_end();
+
+ if (alloc_stats) {
+ extract_alloc_stats_t* stats = extract_alloc_stats(alloc);
+ printf("Alloc stats: num_malloc=%i num_realloc=%i num_free=%i num_libc_realloc=%i\n",
+ stats->num_malloc,
+ stats->num_realloc,
+ stats->num_free,
+ stats->num_libc_realloc
+ );
+ }
+
+ extract_alloc_destroy(&alloc);
+ assert(alloc == NULL);
+
+ printf("Finished.\n");
+ return 0;
+}
diff --git a/extract/src/extract.c b/extract/src/extract.c
new file mode 100644
index 00000000..adb3565e
--- /dev/null
+++ b/extract/src/extract.c
@@ -0,0 +1,1226 @@
+#include "../include/extract.h"
+#include "../include/extract_alloc.h"
+
+#include "astring.h"
+#include "document.h"
+#include "docx.h"
+#include "docx_template.h"
+#include "mem.h"
+#include "memento.h"
+#include "outf.h"
+#include "xml.h"
+#include "zip.h"
+
+
+#include <assert.h>
+#include <errno.h>
+#include <math.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+
+
+double matrix_expansion(matrix_t m)
+{
+ return sqrt(fabs(m.a * m.d - m.b * m.c));
+}
+
+
+static void char_init(char_t* item)
+{
+ item->pre_x = 0;
+ item->pre_y = 0;
+ item->x = 0;
+ item->y = 0;
+ item->ucs = 0;
+ item->adv = 0;
+}
+
+
+const char* span_string(extract_alloc_t* alloc, span_t* span)
+{
+ static extract_astring_t ret = {0};
+ double x0 = 0;
+ double y0 = 0;
+ double x1 = 0;
+ double y1 = 0;
+ int c0 = 0;
+ int c1 = 0;
+ int i;
+ extract_astring_free(alloc, &ret);
+ if (!span) {
+ /* This frees our internal data, and is used by extract_internal_end().
+ */
+ return NULL;
+ }
+ if (span->chars_num) {
+ c0 = span->chars[0].ucs;
+ x0 = span->chars[0].x;
+ y0 = span->chars[0].y;
+ c1 = span->chars[span->chars_num-1].ucs;
+ x1 = span->chars[span->chars_num-1].x;
+ y1 = span->chars[span->chars_num-1].y;
+ }
+ {
+ char buffer[200];
+ snprintf(buffer, sizeof(buffer),
+ "span chars_num=%i (%c:%f,%f)..(%c:%f,%f) font=%s:(%f,%f) wmode=%i chars_num=%i: ",
+ span->chars_num,
+ c0, x0, y0,
+ c1, x1, y1,
+ span->font_name,
+ span->trm.a,
+ span->trm.d,
+ span->wmode,
+ span->chars_num
+ );
+ extract_astring_cat(alloc, &ret, buffer);
+ for (i=0; i<span->chars_num; ++i) {
+ snprintf(
+ buffer,
+ sizeof(buffer),
+ " i=%i {x=%f adv=%f}",
+ i,
+ span->chars[i].x,
+ span->chars[i].adv
+ );
+ extract_astring_cat(alloc, &ret, buffer);
+ }
+ }
+ extract_astring_cat(alloc, &ret, ": ");
+ extract_astring_catc(alloc, &ret, '"');
+ for (i=0; i<span->chars_num; ++i) {
+ extract_astring_catc(alloc, &ret, (char) span->chars[i].ucs);
+ }
+ extract_astring_catc(alloc, &ret, '"');
+ return ret.chars;
+}
+
+int span_append_c(extract_alloc_t* alloc, span_t* span, int c)
+{
+ char_t* item;
+ if (extract_realloc2(
+ alloc,
+ &span->chars,
+ sizeof(*span->chars) * span->chars_num,
+ sizeof(*span->chars) * (span->chars_num + 1)
+ )) {
+ return -1;
+ }
+ item = &span->chars[span->chars_num];
+ span->chars_num += 1;
+ char_init(item);
+ item->ucs = c;
+ return 0;
+}
+
+char_t* span_char_last(span_t* span)
+{
+ assert(span->chars_num > 0);
+ return &span->chars[span->chars_num-1];
+}
+
+/* Unused but useful to keep code here. */
+#if 0
+/* Returns static string containing info about line_t. */
+static const char* line_string(line_t* line)
+{
+ static extract_astring_t ret = {0};
+ char buffer[32];
+ extract_astring_free(&ret);
+ snprintf(buffer, sizeof(buffer), "line spans_num=%i:", line->spans_num);
+ extract_astring_cat(&ret, buffer);
+ int i;
+ for (i=0; i<line->spans_num; ++i) {
+ extract_astring_cat(&ret, " ");
+ extract_astring_cat(&ret, span_string(line->spans[i]));
+ }
+ return ret.chars;
+}
+#endif
+
+/* Returns first span in a line. */
+span_t* line_span_last(line_t* line)
+{
+ assert(line->spans_num > 0);
+ return line->spans[line->spans_num - 1];
+}
+
+span_t* line_span_first(line_t* line)
+{
+ assert(line->spans_num > 0);
+ return line->spans[0];
+}
+
+static void page_free(extract_alloc_t* alloc, page_t* page)
+{
+ int s;
+ if (!page) return;
+
+ for (s=0; s<page->spans_num; ++s) {
+ span_t* span = page->spans[s];
+ if (span) {
+ extract_free(alloc, &span->chars);
+ extract_free(alloc, &span->font_name);
+ }
+ extract_free(alloc, &span);
+ }
+ extract_free(alloc, &page->spans);
+
+ {
+ int l;
+ for (l=0; l<page->lines_num; ++l) {
+ line_t* line = page->lines[l];
+ extract_free(alloc, &line->spans);
+ extract_free(alloc, &line);
+ /* We don't free line->spans->chars[] because already freed via
+ page->spans. */
+ }
+ }
+ extract_free(alloc, &page->lines);
+
+ {
+ int p;
+ for (p=0; p<page->paragraphs_num; ++p) {
+ paragraph_t* paragraph = page->paragraphs[p];
+ if (paragraph) extract_free(alloc, &paragraph->lines);
+ extract_free(alloc, &paragraph);
+ }
+ }
+ extract_free(alloc, &page->paragraphs);
+
+ {
+ int i;
+ for (i=0; i<page->images_num; ++i) {
+ extract_free(alloc, &page->images[i].data);
+ extract_free(alloc, &page->images[i].type);
+ extract_free(alloc, &page->images[i].id);
+ extract_free(alloc, &page->images[i].name);
+ }
+ }
+ extract_free(alloc, &page->images);
+}
+
+static span_t* page_span_append(extract_alloc_t* alloc, page_t* page)
+/* Appends new empty span_ to an page_t; returns NULL with errno set on error.
+*/
+{
+ span_t* span;
+ if (extract_malloc(alloc, &span, sizeof(*span))) return NULL;
+ span->font_name = NULL;
+ span->chars = NULL;
+ span->chars_num = 0;
+ if (extract_realloc2(
+ alloc,
+ &page->spans,
+ sizeof(*page->spans) * page->spans_num,
+ sizeof(*page->spans) * (page->spans_num + 1)
+ )) {
+ extract_free(alloc, &span);
+ return NULL;
+ }
+ page->spans[page->spans_num] = span;
+ page->spans_num += 1;
+ return span;
+}
+
+
+static void extract_images_free(extract_alloc_t* alloc, images_t* images)
+{
+ int i;
+ for (i=0; i<images->images_num; ++i) {
+ image_t* image = &images->images[i];
+ extract_free(alloc, &image->type);
+ extract_free(alloc, &image->name);
+ extract_free(alloc, &image->id);
+ if (image->data_free) {
+ image->data_free(image->data_free_handle, image->data);
+ }
+ extract_free(alloc, &images->images[i]);
+ }
+ extract_free(alloc, &images->images);
+ extract_free(alloc, &images->imagetypes);
+ images->images_num = 0;
+ images->imagetypes_num = 0;
+}
+
+
+static int extract_document_images(extract_alloc_t* alloc, document_t* document, images_t* o_images)
+/* Moves image_t's from document->page[] to *o_images.
+
+On return document->page[].images* will be NULL etc.
+*/
+{
+ int e = -1;
+ int p;
+ images_t images = {0};
+ outf("images.images_num=%i", images.images_num);
+ for (p=0; p<document->pages_num; ++p) {
+ page_t* page = document->pages[p];
+ int i;
+ for (i=0; i<page->images_num; ++i) {
+ image_t* image;
+ if (extract_realloc2(
+ alloc,
+ &images.images,
+ sizeof(image_t) * images.images_num,
+ sizeof(image_t) * (images.images_num + 1)
+ )) goto end;
+ image = &page->images[i];
+ outf("p=%i i=%i image->name=%s image->id=%s", p, i, image->name, image->id);
+ assert(image->name);
+ images.images[images.images_num] = *image;
+ images.images_num += 1;
+
+ /* Add image type if we haven't seen it before. */
+ {
+ int it;
+ for (it=0; it<images.imagetypes_num; ++it) {
+ outf("it=%i images.imagetypes[it]=%s image->type=%s",
+ it, images.imagetypes[it], image->type);
+ if (!strcmp(images.imagetypes[it], image->type)) {
+ break;
+ }
+ }
+ if (it == images.imagetypes_num) {
+ if (extract_realloc2(
+ alloc,
+ &images.imagetypes,
+ sizeof(char*) * images.imagetypes_num,
+ sizeof(char*) * (images.imagetypes_num + 1)
+ )) goto end;
+ assert(image->type);
+ images.imagetypes[images.imagetypes_num] = image->type;
+ images.imagetypes_num += 1;
+ outf("have added images.imagetypes_num=%i", images.imagetypes_num);
+ }
+ }
+
+ /* We've taken ownership of image->* so NULL the original values
+ here to ensure we can't use things after free. */
+ image->type = NULL;
+ image->name = NULL;
+ image->id = NULL;
+ image->data = NULL;
+ image->data_size = 0;
+ }
+ extract_free(alloc, &page->images);
+ page->images_num = 0;
+ }
+ e = 0;
+ end:
+ if (e) {
+ }
+ else {
+ *o_images = images;
+ }
+ return e;
+}
+
+static void extract_document_free(extract_alloc_t* alloc, document_t* document)
+{
+ int p;
+ if (!document) {
+ return;
+ }
+ for (p=0; p<document->pages_num; ++p) {
+ page_t* page = document->pages[p];
+ page_free(alloc, page);
+ extract_free(alloc, &page);
+ }
+ extract_free(alloc, &document->pages);
+ document->pages = NULL;
+ document->pages_num = 0;
+}
+
+
+/* Returns +1, 0 or -1 depending on sign of x. */
+static int s_sign(double x)
+{
+ if (x < 0) return -1;
+ if (x > 0) return +1;
+ return 0;
+}
+
+int matrix_cmp4(const matrix_t* lhs, const matrix_t* rhs)
+{
+ int ret;
+ ret = s_sign(lhs->a - rhs->a); if (ret) return ret;
+ ret = s_sign(lhs->b - rhs->b); if (ret) return ret;
+ ret = s_sign(lhs->c - rhs->c); if (ret) return ret;
+ ret = s_sign(lhs->d - rhs->d); if (ret) return ret;
+ return 0;
+}
+
+
+static point_t multiply_matrix_point(matrix_t m, point_t p)
+{
+ double x = p.x;
+ p.x = m.a * x + m.c * p.y;
+ p.y = m.b * x + m.d * p.y;
+ return p;
+}
+
+static int s_matrix_read(const char* text, matrix_t* matrix)
+{
+ int n;
+ if (!text) {
+ outf("text is NULL in s_matrix_read()");
+ errno = EINVAL;
+ return -1;
+ }
+ n = sscanf(text,
+ "%lf %lf %lf %lf %lf %lf",
+ &matrix->a,
+ &matrix->b,
+ &matrix->c,
+ &matrix->d,
+ &matrix->e,
+ &matrix->f
+ );
+ if (n != 6) {
+ errno = EINVAL;
+ return -1;
+ }
+ return 0;
+}
+
+
+static void s_document_init(document_t* document)
+{
+ document->pages = NULL;
+ document->pages_num = 0;
+}
+
+
+static int page_span_end_clean(extract_alloc_t* alloc, page_t* page)
+/* Does preliminary processing of the end of the last span in a page; intended
+to be called as we load span information.
+
+Looks at last two char_t's in last span_t of <page>, and either
+leaves unchanged, or removes space in last-but-one position, or moves last
+char_t into a new span_t. */
+{
+ int ret = -1;
+ span_t* span;
+ char_t* char_;
+ double font_size;
+ double x;
+ double y;
+ double err_x;
+ double err_y;
+ point_t dir;
+
+ assert(page->spans_num);
+ span = page->spans[page->spans_num-1];
+ assert(span->chars_num);
+
+ /* Last two char_t's are char_[-2] and char_[-1]. */
+ char_ = &span->chars[span->chars_num];
+
+ if (span->chars_num == 1) {
+ return 0;
+ }
+
+ font_size = matrix_expansion(span->trm)
+ * matrix_expansion(span->ctm);
+
+ if (span->wmode) {
+ dir.x = 0;
+ dir.y = 1;
+ }
+ else {
+ dir.x = 1;
+ dir.y = 0;
+ }
+ dir = multiply_matrix_point(span->trm, dir);
+
+ x = char_[-2].pre_x + char_[-2].adv * dir.x;
+ y = char_[-2].pre_y + char_[-2].adv * dir.y;
+
+ err_x = (char_[-1].pre_x - x) / font_size;
+ err_y = (char_[-1].pre_y - y) / font_size;
+
+ if (span->chars_num >= 2 && span->chars[span->chars_num-2].ucs == ' ') {
+ int remove_penultimate_space = 0;
+ if (err_x < -span->chars[span->chars_num-2].adv / 2
+ && err_x > -span->chars[span->chars_num-2].adv
+ ) {
+ remove_penultimate_space = 1;
+ }
+ if ((char_[-1].pre_x - char_[-2].pre_x) / font_size < char_[-1].adv / 10) {
+ outfx(
+ "removing penultimate space because space very narrow:"
+ "char_[-1].pre_x-char_[-2].pre_x=%f font_size=%f"
+ " char_[-1].adv=%f",
+ char_[-1].pre_x - char_[-2].pre_x,
+ font_size,
+ char_[-1].adv
+ );
+ remove_penultimate_space = 1;
+ }
+ if (remove_penultimate_space) {
+ /* This character overlaps with previous space
+ character. We discard previous space character - these
+ sometimes seem to appear in the middle of words for some
+ reason. */
+ outfx("removing space before final char in: %s",
+ span_string(span));
+ span->chars[span->chars_num-2] = span->chars[span->chars_num-1];
+ span->chars_num -= 1;
+ outfx("span is now: %s", span_string(span));
+ return 0;
+ }
+ }
+ else if (fabs(err_x) > 0.01 || fabs(err_y) > 0.01) {
+ /* This character doesn't seem to be a continuation of
+ previous characters, so split into two spans. This often
+ splits text incorrectly, but this is corrected later when
+ we join spans into lines. */
+ outfx(
+ "Splitting last char into new span. font_size=%f dir.x=%f"
+ " char[-1].pre=(%f, %f) err=(%f, %f): %s",
+ font_size,
+ dir.x,
+ char_[-1].pre_x,
+ char_[-1].pre_y,
+ err_x,
+ err_y,
+ span_string2(span)
+ );
+ {
+ span_t* span2 = page_span_append(alloc, page);
+ if (!span2) goto end;
+ *span2 = *span;
+ if (extract_strdup(alloc, span->font_name, &span2->font_name)) goto end;
+ span2->chars_num = 1;
+ if (extract_malloc(alloc, &span2->chars, sizeof(char_t) * span2->chars_num)) goto end;
+ span2->chars[0] = char_[-1];
+ span->chars_num -= 1;
+ }
+ return 0;
+ }
+ ret = 0;
+ end:
+ return ret;
+}
+
+
+struct extract_t
+{
+ extract_alloc_t* alloc;
+
+ document_t document;
+
+ int num_spans_split;
+ /* Number of extra spans from page_span_end_clean(). */
+
+ int num_spans_autosplit;
+ /* Number of extra spans from autosplit=1. */
+
+ double span_offset_x;
+ double span_offset_y;
+ /* Only used if autosplit is non-zero. */
+
+ int image_n;
+ /* Used to generate unique ids for images. */
+
+ /* List of strings that are the generated docx content for each page. When
+ zip_* can handle appending of data, we will be able to remove this list. */
+ extract_astring_t* contentss;
+ int contentss_num;
+
+ images_t images;
+};
+
+
+
+int extract_begin(
+ extract_alloc_t* alloc,
+ extract_t** pextract
+ )
+{
+ int e = -1;
+ extract_t* extract;
+
+ /* Use a temporary extract_alloc_t to allocate space for the extract_t. */
+ if (extract_malloc(alloc, &extract, sizeof(*extract))) goto end;
+
+ extract_bzero(extract, sizeof(*extract));
+ extract->alloc = alloc;
+ s_document_init(&extract->document);
+
+ /* Start at 10 because template document might use some low-numbered IDs.
+ */
+ extract->image_n = 10;
+
+ e = 0;
+
+ end:
+ *pextract = (e) ? NULL : extract;
+ return e;
+}
+
+static void image_free_fn(void* handle, void* image_data)
+{
+ (void) handle;
+ free(image_data);
+}
+
+int extract_read_intermediate(extract_t* extract, extract_buffer_t* buffer, int autosplit)
+{
+ int ret = -1;
+
+ document_t* document = &extract->document;
+ char* image_data = NULL;
+ int num_spans = 0;
+
+ extract_xml_tag_t tag;
+ extract_xml_tag_init(&tag);
+
+ if (extract_xml_pparse_init(extract->alloc, buffer, NULL /*first_line*/)) {
+ outf("Failed to read start of intermediate data: %s", strerror(errno));
+ goto end;
+ }
+ /* Data read from <path> is expected to be XML looking like:
+
+ <page>
+ <span>
+ <char ...>
+ <char ...>
+ ...
+ </span>
+ <span>
+ ...
+ </span>
+ ...
+ </page>
+ <page>
+ ...
+ </page>
+ ...
+
+ We convert this into a list of page_t's, each containing a list of
+ span_t's, each containing a list of char_t's.
+
+ While doing this, we do some within-span processing by calling
+ page_span_end_clean():
+ Remove spurious spaces.
+ Split spans in two where there seem to be large gaps between glyphs.
+ */
+ for(;;) {
+ page_t* page;
+ int e = extract_xml_pparse_next(buffer, &tag);
+ if (e == 1) break; /* EOF. */
+ if (e) goto end;
+ if (!strcmp(tag.name, "?xml")) {
+ /* We simply skip this if we find it. As of 2020-07-31, mutool adds
+ this header to mupdf raw output, but gs txtwrite does not include
+ it. */
+ continue;
+ }
+ if (strcmp(tag.name, "page")) {
+ outf("Expected <page> but tag.name='%s'", tag.name);
+ errno = ESRCH;
+ goto end;
+ }
+ outfx("loading spans for page %i...", document->pages_num);
+ if (extract_page_begin(extract)) goto end;
+ page = extract->document.pages[extract->document.pages_num-1];
+ if (!page) goto end;
+
+ for(;;) {
+ if (extract_xml_pparse_next(buffer, &tag)) goto end;
+ if (!strcmp(tag.name, "/page")) {
+ num_spans += page->spans_num;
+ break;
+ }
+ if (!strcmp(tag.name, "image")) {
+ const char* type = extract_xml_tag_attributes_find(&tag, "type");
+ if (!type) {
+ errno = EINVAL;
+ goto end;
+ }
+ outf("image type=%s", type);
+ if (!strcmp(type, "pixmap")) {
+ int w;
+ int h;
+ int y;
+ if (extract_xml_tag_attributes_find_int(&tag, "w", &w)) goto end;
+ if (extract_xml_tag_attributes_find_int(&tag, "h", &h)) goto end;
+ for (y=0; y<h; ++y) {
+ int yy;
+ if (extract_xml_pparse_next(buffer, &tag)) goto end;
+ if (strcmp(tag.name, "line")) {
+ outf("Expected <line> but tag.name='%s'", tag.name);
+ errno = ESRCH;
+ goto end;
+ }
+ if (extract_xml_tag_attributes_find_int(&tag, "y", &yy)) goto end;
+ if (yy != y) {
+ outf("Expected <line y=%i> but found <line y=%i>", y, yy);
+ errno = ESRCH;
+ goto end;
+ }
+ if (extract_xml_pparse_next(buffer, &tag)) goto end;
+ if (strcmp(tag.name, "/line")) {
+ outf("Expected </line> but tag.name='%s'", tag.name);
+ errno = ESRCH;
+ goto end;
+ }
+ }
+ }
+ else {
+ /* Compressed. */
+ size_t image_data_size;
+ const char* c;
+ size_t i;
+ if (extract_xml_tag_attributes_find_size(&tag, "datasize", &image_data_size)) goto end;
+ if (extract_malloc(extract->alloc, &image_data, image_data_size)) goto end;
+ c = tag.text.chars;
+ for(i=0;;) {
+ int byte = 0;
+ int cc;
+ cc = *c;
+ c += 1;
+ if (cc == ' ' || cc == '\n') continue;
+ if (cc >= '0' && cc <= '9') byte += cc-'0';
+ else if (cc >= 'a' && cc <= 'f') byte += 10 + cc - 'a';
+ else goto compressed_error;
+ byte *= 16;
+
+ cc = *c;
+ c += 1;
+ if (cc >= '0' && cc <= '9') byte += cc-'0';
+ else if (cc >= 'a' && cc <= 'f') byte += 10 + cc - 'a';
+ else goto compressed_error;
+
+ image_data[i] = (char) byte;
+ i += 1;
+ if (i == image_data_size) {
+ break;
+ }
+ continue;
+
+ compressed_error:
+ outf("Unrecognised hex character '%x' at offset %lli in image data", cc, (long long) (c-tag.text.chars));
+ errno = EINVAL;
+ goto end;
+ }
+ if (extract_add_image(
+ extract,
+ type,
+ 0 /*x*/,
+ 0 /*y*/,
+ 0 /*w*/,
+ 0 /*h*/,
+ image_data,
+ image_data_size,
+ image_free_fn,
+ NULL
+ ))
+ {
+ goto end;
+ }
+ image_data = NULL;
+ }
+ if (extract_xml_pparse_next(buffer, &tag)) goto end;
+ if (strcmp(tag.name, "/image")) {
+ outf("Expected </image> but tag.name='%s'", tag.name);
+ errno = ESRCH;
+ goto end;
+ }
+ continue;
+ }
+ if (strcmp(tag.name, "span")) {
+ outf("Expected <span> but tag.name='%s'", tag.name);
+ errno = ESRCH;
+ goto end;
+ }
+
+ {
+ matrix_t ctm;
+ matrix_t trm;
+ char* font_name;
+ char* font_name2;
+ int font_bold;
+ int font_italic;
+ int wmode;
+ if (s_matrix_read(extract_xml_tag_attributes_find(&tag, "ctm"), &ctm)) goto end;
+ if (s_matrix_read(extract_xml_tag_attributes_find(&tag, "trm"), &trm)) goto end;
+ font_name = extract_xml_tag_attributes_find(&tag, "font_name");
+ if (!font_name) {
+ outf("Failed to find attribute 'font_name'");
+ goto end;
+ }
+ font_name2 = strchr(font_name, '+');
+ if (font_name2) font_name = font_name2 + 1;
+ font_bold = strstr(font_name, "-Bold") ? 1 : 0;
+ font_italic = strstr(font_name, "-Oblique") ? 1 : 0;
+ if (extract_xml_tag_attributes_find_int(&tag, "wmode", &wmode)) goto end;
+ if (extract_span_begin(
+ extract,
+ font_name,
+ font_bold,
+ font_italic,
+ wmode,
+ ctm.a,
+ ctm.b,
+ ctm.c,
+ ctm.d,
+ ctm.e,
+ ctm.f,
+ trm.a,
+ trm.b,
+ trm.c,
+ trm.d,
+ trm.e,
+ trm.f
+ )) goto end;
+
+ for(;;) {
+ double x;
+ double y;
+ double adv;
+ unsigned ucs;
+
+ if (extract_xml_pparse_next(buffer, &tag)) {
+ outf("Failed to find <char or </span");
+ goto end;
+ }
+ if (!strcmp(tag.name, "/span")) {
+ break;
+ }
+ if (strcmp(tag.name, "char")) {
+ errno = ESRCH;
+ outf("Expected <char> but tag.name='%s'", tag.name);
+ goto end;
+ }
+
+ if (extract_xml_tag_attributes_find_double(&tag, "x", &x)) goto end;
+ if (extract_xml_tag_attributes_find_double(&tag, "y", &y)) goto end;
+ if (extract_xml_tag_attributes_find_double(&tag, "adv", &adv)) goto end;
+ if (extract_xml_tag_attributes_find_uint(&tag, "ucs", &ucs)) goto end;
+
+ if (extract_add_char(extract, x, y, ucs, adv, autosplit)) goto end;
+ }
+
+ extract_xml_tag_free(extract->alloc, &tag);
+ }
+ }
+ if (extract_page_end(extract)) goto end;
+ outf("page=%i page->num_spans=%i",
+ document->pages_num, page->spans_num);
+ }
+
+ outf("num_spans=%i num_spans_split=%i num_spans_autosplit=%i",
+ num_spans,
+ extract->num_spans_split,
+ extract->num_spans_autosplit
+ );
+
+ ret = 0;
+
+ end:
+ extract_xml_tag_free(extract->alloc, &tag);
+ extract_free(extract->alloc, &image_data);
+
+ return ret;
+}
+
+
+int extract_span_begin(
+ extract_t* extract,
+ const char* font_name,
+ int font_bold,
+ int font_italic,
+ int wmode,
+ double ctm_a,
+ double ctm_b,
+ double ctm_c,
+ double ctm_d,
+ double ctm_e,
+ double ctm_f,
+ double trm_a,
+ double trm_b,
+ double trm_c,
+ double trm_d,
+ double trm_e,
+ double trm_f
+ )
+{
+ int e = -1;
+ page_t* page;
+ span_t* span;
+ assert(extract->document.pages_num > 0);
+ page = extract->document.pages[extract->document.pages_num-1];
+ span = page_span_append(extract->alloc, page);
+ if (!span) goto end;
+ span->ctm.a = ctm_a;
+ span->ctm.b = ctm_b;
+ span->ctm.c = ctm_c;
+ span->ctm.d = ctm_d;
+ span->ctm.e = ctm_e;
+ span->ctm.f = ctm_f;
+ span->trm.a = trm_a;
+ span->trm.b = trm_b;
+ span->trm.c = trm_c;
+ span->trm.d = trm_d;
+ span->trm.e = trm_e;
+ span->trm.f = trm_f;
+ {
+ const char* ff = strchr(font_name, '+');
+ const char* f = (ff) ? ff+1 : font_name;
+ if (extract_strdup(extract->alloc, f, &span->font_name)) goto end;
+ span->font_bold = font_bold ? 1 : 0;
+ span->font_italic = font_italic ? 1 : 0;
+ span->wmode = wmode ? 1 : 0;
+ extract->span_offset_x = 0;
+ extract->span_offset_y = 0;
+ }
+ e = 0;
+ end:
+ return e;
+}
+
+
+int extract_add_char(
+ extract_t* extract,
+ double x,
+ double y,
+ unsigned ucs,
+ double adv,
+ int autosplit
+ )
+{
+ int e = -1;
+ char_t* char_;
+ page_t* page = extract->document.pages[extract->document.pages_num-1];
+ span_t* span = page->spans[page->spans_num - 1];
+
+ if (autosplit && y - extract->span_offset_y != 0) {
+
+ double e = span->ctm.e + span->ctm.a * (x - extract->span_offset_x)
+ + span->ctm.b * (y - extract->span_offset_y);
+ double f = span->ctm.f + span->ctm.c * (x - extract->span_offset_x)
+ + span->ctm.d * (y - extract->span_offset_y);
+ extract->span_offset_x = x;
+ extract->span_offset_y = y;
+ outfx("autosplit: char_pre_y=%f offset_y=%f",
+ char_pre_y, offset_y);
+ outfx(
+ "autosplit: changing ctm.{e,f} from (%f, %f) to (%f, %f)",
+ span->ctm.e,
+ span->ctm.f,
+ e, f
+ );
+ if (span->chars_num > 0) {
+ /* Create new span. */
+ span_t* span0 = span;
+ extract->num_spans_autosplit += 1;
+ span = page_span_append(extract->alloc, page);
+ if (!span) goto end;
+ *span = *span0;
+ span->chars = NULL;
+ span->chars_num = 0;
+ if (extract_strdup(extract->alloc, span0->font_name, &span->font_name)) goto end;
+ }
+ span->ctm.e = e;
+ span->ctm.f = f;
+ outfx("autosplit: char_pre_y=%f offset_y=%f",
+ char_pre_y, offset_y);
+ }
+
+ if (span_append_c(extract->alloc, span, 0 /*c*/)) goto end;
+ char_ = &span->chars[ span->chars_num-1];
+
+ char_->pre_x = x - extract->span_offset_x;
+ char_->pre_y = y - extract->span_offset_y;
+
+ char_->x = span->ctm.a * char_->pre_x + span->ctm.b * char_->pre_y;
+ char_->y = span->ctm.c * char_->pre_x + span->ctm.d * char_->pre_y;
+
+ char_->adv = adv;
+ char_->ucs = ucs;
+
+ char_->x += span->ctm.e;
+ char_->y += span->ctm.f;
+
+ {
+ int page_spans_num_old = page->spans_num;
+ if (page_span_end_clean(extract->alloc, page)) goto end;
+ span = page->spans[page->spans_num-1]; /* fixme: unnecessary. */
+ if (page->spans_num != page_spans_num_old) {
+ extract->num_spans_split += 1;
+ }
+ }
+ e = 0;
+
+ end:
+ return e;
+}
+
+
+int extract_span_end(extract_t* extract)
+{
+ page_t* page = extract->document.pages[extract->document.pages_num-1];
+ span_t* span = page->spans[page->spans_num - 1];
+ if (span->chars_num == 0) {
+ /* Calling code called extract_span_begin() then extract_span_end()
+ without any call to extract_add_char(). Our joining code assumes that
+ all spans are non-empty, so we need to delete this span. */
+ extract_free(extract->alloc, &page->spans[page->spans_num - 1]);
+ page->spans_num -= 1;
+ }
+ return 0;
+}
+
+
+int extract_add_image(
+ extract_t* extract,
+ const char* type,
+ double x,
+ double y,
+ double w,
+ double h,
+ char* data,
+ size_t data_size,
+ extract_image_data_free data_free,
+ void* data_free_handle
+ )
+{
+ int e = -1;
+ page_t* page = extract->document.pages[extract->document.pages_num-1];
+ image_t image_temp = {0};
+
+ (void) x;
+ (void) y;
+ (void) w;
+ (void) h;
+
+ extract->image_n += 1;
+ image_temp.data = data;
+ image_temp.data_size = data_size;
+ image_temp.data_free = data_free;
+ image_temp.data_free_handle = data_free_handle;
+ if (extract_strdup(extract->alloc, type, &image_temp.type)) goto end;
+ if (extract_asprintf(extract->alloc, &image_temp.id, "rId%i", extract->image_n) < 0) goto end;
+ if (extract_asprintf(extract->alloc, &image_temp.name, "image%i.%s", extract->image_n, image_temp.type) < 0) goto end;
+
+ if (extract_realloc2(
+ extract->alloc,
+ &page->images,
+ sizeof(image_t) * page->images_num,
+ sizeof(image_t) * (page->images_num + 1)
+ )) goto end;
+
+ page->images[page->images_num] = image_temp;
+ page->images_num += 1;
+ outf("page->images_num=%i", page->images_num);
+
+ e = 0;
+
+ end:
+
+ if (e) {
+ extract_free(extract->alloc, &image_temp.type);
+ extract_free(extract->alloc, &image_temp.data);
+ extract_free(extract->alloc, &image_temp.id);
+ extract_free(extract->alloc, &image_temp.name);
+ }
+
+ return e;
+}
+
+int extract_page_begin(extract_t* extract)
+{
+ /* Appends new empty page_t to an extract->document. */
+ page_t* page;
+ if (extract_malloc(extract->alloc, &page, sizeof(page_t))) return -1;
+ page->spans = NULL;
+ page->spans_num = 0;
+ page->lines = NULL;
+ page->lines_num = 0;
+ page->paragraphs = NULL;
+ page->paragraphs_num = 0;
+ page->images = NULL;
+ page->images_num = 0;
+ if (extract_realloc2(
+ extract->alloc,
+ &extract->document.pages,
+ sizeof(page_t*) * extract->document.pages_num + 1,
+ sizeof(page_t*) * (extract->document.pages_num + 1)
+ )) {
+ extract_free(extract->alloc, &page);
+ return -1;
+ }
+ extract->document.pages[extract->document.pages_num] = page;
+ extract->document.pages_num += 1;
+ return 0;
+}
+
+
+int extract_page_end(extract_t* extract)
+{
+ (void) extract;
+ return 0;
+}
+
+int extract_process(
+ extract_t* extract,
+ int spacing,
+ int rotation,
+ int images
+ )
+{
+ int e = -1;
+
+ if (extract_realloc2(
+ extract->alloc,
+ &extract->contentss,
+ sizeof(*extract->contentss) * extract->contentss_num,
+ sizeof(*extract->contentss) * (extract->contentss_num + 1)
+ )) goto end;
+ extract_astring_init(&extract->contentss[extract->contentss_num]);
+ extract->contentss_num += 1;
+
+ if (extract_document_join(extract->alloc, &extract->document)) goto end;
+
+ if (extract_document_to_docx_content(
+ extract->alloc,
+ &extract->document,
+ spacing,
+ rotation,
+ images,
+ &extract->contentss[extract->contentss_num - 1]
+ )) goto end;
+
+ if (extract_document_images(extract->alloc, &extract->document, &extract->images)) goto end;
+
+ {
+ int i;
+ for (i=0; i<extract->document.pages_num; ++i) {
+ page_free(extract->alloc, extract->document.pages[i]);
+ extract_free(extract->alloc, &extract->document.pages[i]);
+ }
+ extract_free(extract->alloc, &extract->document.pages);
+ extract->document.pages_num = 0;
+ }
+
+ e = 0;
+
+ end:
+ return e;
+}
+
+int extract_write(extract_t* extract, extract_buffer_t* buffer)
+{
+ int e = -1;
+ extract_zip_t* zip = NULL;
+ char* text2 = NULL;
+ int i;
+
+ if (extract_zip_open(buffer, &zip)) goto end;
+ for (i=0; i<docx_template_items_num; ++i) {
+ const docx_template_item_t* item = &docx_template_items[i];
+ extract_free(extract->alloc, &text2);
+ outf("i=%i item->name=%s", i, item->name);
+ if (extract_docx_content_item(
+ extract->alloc,
+ extract->contentss,
+ extract->contentss_num,
+ &extract->images,
+ item->name,
+ item->text,
+ &text2
+ )) {
+ goto end;
+ }
+
+ {
+ const char* text3 = (text2) ? text2 : item->text;
+ if (extract_zip_write_file(zip, text3, strlen(text3), item->name)) goto end;
+ }
+ }
+
+ for (i=0; i<extract->images.images_num; ++i) {
+ image_t* image = &extract->images.images[i];
+ extract_free(extract->alloc, &text2);
+ if (extract_asprintf(extract->alloc, &text2, "word/media/%s", image->name) < 0) goto end;
+ if (extract_zip_write_file(zip, image->data, image->data_size, text2)) goto end;
+ }
+
+ if (extract_zip_close(&zip)) goto end;
+ assert(!zip);
+
+ e = 0;
+
+ end:
+ if (e) outf("failed: %s", strerror(errno));
+ extract_free(extract->alloc, &text2);
+ extract_zip_close(&zip);
+
+ return e;
+}
+
+int extract_write_content(extract_t* extract, extract_buffer_t* buffer)
+{
+ int i;
+ for (i=0; i<extract->contentss_num; ++i) {
+ if (extract_buffer_write(
+ buffer,
+ extract->contentss[i].chars,
+ extract->contentss[i].chars_num,
+ NULL /*o_actual*/
+ )) return -1;
+ }
+ return 0;
+}
+
+int extract_write_template(
+ extract_t* extract,
+ const char* path_template,
+ const char* path_out,
+ int preserve_dir
+ )
+{
+ return extract_docx_write_template(
+ extract->alloc,
+ extract->contentss,
+ extract->contentss_num,
+ &extract->images,
+ path_template,
+ path_out,
+ preserve_dir
+ );
+}
+
+void extract_end(extract_t** pextract)
+{
+ extract_t* extract = *pextract;
+ if (!extract) return;
+ extract_document_free(extract->alloc, &extract->document);
+
+ {
+ int i;
+ for (i=0; i<extract->contentss_num; ++i) {
+ extract_astring_free(extract->alloc, &extract->contentss[i]);
+ }
+ extract_free(extract->alloc, &extract->contentss);
+ }
+ extract_images_free(extract->alloc, &extract->images);
+ extract_free(extract->alloc, pextract);
+}
+
+void extract_internal_end(void)
+{
+ span_string(NULL, NULL);
+}
+
+void extract_exp_min(extract_t* extract, size_t size)
+{
+ extract_alloc_exp_min(extract->alloc, size);
+}
diff --git a/extract/src/join.c b/extract/src/join.c
new file mode 100644
index 00000000..bc02ea21
--- /dev/null
+++ b/extract/src/join.c
@@ -0,0 +1,951 @@
+#include "../include/extract.h"
+#include "../include/extract_alloc.h"
+
+#include "astring.h"
+#include "document.h"
+#include "mem.h"
+#include "outf.h"
+
+#include <assert.h>
+#include <math.h>
+#include <stdio.h>
+
+
+static char_t* span_char_first(span_t* span)
+{
+ assert(span->chars_num > 0);
+ return &span->chars[0];
+}
+
+/* Returns first char_t in a line. */
+static char_t* line_item_first(line_t* line)
+{
+ span_t* span = line_span_first(line);
+ return span_char_first(span);
+}
+
+/* Returns last char_t in a line. */
+static char_t* line_item_last(line_t* line)
+{
+ span_t* span = line_span_last(line);
+ return span_char_last(span);
+}
+
+static const char* matrix_string(const matrix_t* matrix)
+{
+ static char ret[64];
+ snprintf(ret, sizeof(ret), "{%f %f %f %f %f %f}",
+ matrix->a,
+ matrix->b,
+ matrix->c,
+ matrix->d,
+ matrix->e,
+ matrix->f
+ );
+ return ret;
+}
+
+/* Returns total width of span. */
+static double span_adv_total(span_t* span)
+{
+ double dx = span_char_last(span)->x - span_char_first(span)->x;
+ double dy = span_char_last(span)->y - span_char_first(span)->y;
+ /* We add on the advance of the last item; this avoids us returning zero if
+ there's only one item. */
+ double adv = span_char_last(span)->adv * matrix_expansion(span->trm);
+ return sqrt(dx*dx + dy*dy) + adv;
+}
+
+/* Returns distance between end of <a> and beginning of <b>. */
+static double spans_adv(
+ span_t* a_span,
+ char_t* a,
+ char_t* b
+ )
+{
+ double delta_x = b->x - a->x;
+ double delta_y = b->y - a->y;
+ double s = sqrt( delta_x*delta_x + delta_y*delta_y);
+ double a_size = a->adv * matrix_expansion(a_span->trm);
+ s -= a_size;
+ return s;
+}
+
+static double span_angle(span_t* span)
+{
+ /* Assume ctm is a rotation matix. */
+ double ret = atan2(-span->ctm.c, span->ctm.a);
+ outfx("ctm.a=%f ctm.b=%f ret=%f", span->ctm.a, span->ctm.b, ret);
+ return ret;
+ /* Not sure whether this is right. Inclined text seems to be done by
+ setting the ctm matrix, so not really sure what trm matrix does. This code
+ assumes that it also inclines text, but maybe it only rotates individual
+ glyphs? */
+ /*if (span->wmode == 0) {
+ return atan2(span->trm.b, span->trm.a);
+ }
+ else {
+ return atan2(span->trm.d, span->trm.c);
+ }*/
+}
+
+/* Returns static string containing brief info about span_t. */
+static const char* span_string2(extract_alloc_t* alloc, span_t* span)
+{
+ static extract_astring_t ret = {0};
+ int i;
+ extract_astring_free(alloc, &ret);
+ extract_astring_catc(alloc, &ret, '"');
+ for (i=0; i<span->chars_num; ++i) {
+ extract_astring_catc(alloc, &ret, (char) span->chars[i].ucs);
+ }
+ extract_astring_catc(alloc, &ret, '"');
+ return ret.chars;
+}
+
+/* Returns angle of <line>. */
+static double line_angle(line_t* line)
+{
+ /* All spans in a line must have same angle, so just use the first span. */
+ assert(line->spans_num > 0);
+ return span_angle(line->spans[0]);
+}
+
+/* Returns static string containing brief info about line_t. */
+static const char* line_string2(extract_alloc_t* alloc, line_t* line)
+{
+ static extract_astring_t ret = {0};
+ char buffer[256];
+ int i;
+ extract_astring_free(alloc, &ret);
+ snprintf(buffer, sizeof(buffer), "line x=%f y=%f spans_num=%i:",
+ line->spans[0]->chars[0].x,
+ line->spans[0]->chars[0].y,
+ line->spans_num
+ );
+ extract_astring_cat(alloc, &ret, buffer);
+ for (i=0; i<line->spans_num; ++i) {
+ extract_astring_cat(alloc, &ret, " ");
+ extract_astring_cat(alloc, &ret, span_string2(alloc, line->spans[i]));
+ }
+ return ret.chars;
+}
+
+/* Array of pointers to lines that are aligned and adjacent to each other so as
+to form a paragraph. */
+static const char* paragraph_string(extract_alloc_t* alloc, paragraph_t* paragraph)
+{
+ static extract_astring_t ret = {0};
+ extract_astring_free(alloc, &ret);
+ extract_astring_cat(alloc, &ret, "paragraph: ");
+ if (paragraph->lines_num) {
+ extract_astring_cat(alloc, &ret, line_string2(alloc, paragraph->lines[0]));
+ if (paragraph->lines_num > 1) {
+ extract_astring_cat(alloc, &ret, "..");
+ extract_astring_cat(
+ alloc,
+ &ret,
+ line_string2(alloc, paragraph->lines[paragraph->lines_num-1])
+ );
+ }
+ }
+ return ret.chars;
+}
+
+/* Returns first line in paragraph. */
+static line_t* paragraph_line_first(const paragraph_t* paragraph)
+{
+ assert(paragraph->lines_num);
+ return paragraph->lines[0];
+}
+
+/* Returns last line in paragraph. */
+static line_t* paragraph_line_last(const paragraph_t* paragraph)
+{
+ assert(paragraph->lines_num);
+ return paragraph->lines[ paragraph->lines_num-1];
+}
+
+
+
+/* Things for direct conversion of text spans into lines and paragraphs. */
+
+/* Returns 1 if lines have same wmode and are at the same angle, else 0.
+
+todo: allow small epsilon? */
+static int lines_are_compatible(
+ line_t* a,
+ line_t* b,
+ double angle_a,
+ int verbose
+ )
+{
+ if (a == b) return 0;
+ if (!a->spans || !b->spans) return 0;
+ if (line_span_first(a)->wmode != line_span_first(b)->wmode) {
+ return 0;
+ }
+ if (matrix_cmp4(
+ &line_span_first(a)->ctm,
+ &line_span_first(b)->ctm
+ )) {
+ if (verbose) {
+ outf("ctm's differ:");
+ outf(" %f %f %f %f %f %f",
+ line_span_first(a)->ctm.a,
+ line_span_first(a)->ctm.b,
+ line_span_first(a)->ctm.c,
+ line_span_first(a)->ctm.d,
+ line_span_first(a)->ctm.e,
+ line_span_first(a)->ctm.f
+ );
+ outf(" %f %f %f %f %f %f",
+ line_span_first(b)->ctm.a,
+ line_span_first(b)->ctm.b,
+ line_span_first(b)->ctm.c,
+ line_span_first(b)->ctm.d,
+ line_span_first(b)->ctm.e,
+ line_span_first(b)->ctm.f
+ );
+ }
+ return 0;
+ }
+ {
+ double angle_b = span_angle(line_span_first(b));
+ if (angle_b != angle_a) {
+ outfx("%s:%i: angles differ");
+ return 0;
+ }
+ }
+ return 1;
+}
+
+
+/* Creates representation of span_t's that consists of a list of line_t's, with
+each line_t contains pointers to a list of span_t's.
+
+We only join spans that are at the same angle and are aligned.
+
+On entry:
+ Original value of *o_lines and *o_lines_num are ignored.
+
+ <spans> points to array of <spans_num> span_t*'s, each pointing to
+ an span_t.
+
+On exit:
+ If we succeed, we return 0, with *o_lines pointing to array of *o_lines_num
+ line_t*'s, each pointing to an line_t.
+
+ Otherwise we return -1 with errno set. *o_lines and *o_lines_num are
+ undefined.
+*/
+static int make_lines(
+ extract_alloc_t* alloc,
+ span_t** spans,
+ int spans_num,
+ line_t*** o_lines,
+ int* o_lines_num
+ )
+{
+ int ret = -1;
+
+ /* Make an line_t for each span. Then we will join some of these
+ line_t's together before returning. */
+ int lines_num = spans_num;
+ line_t** lines = NULL;
+ int a;
+ int num_compatible;
+ int num_joins;
+ if (extract_malloc(alloc, &lines, sizeof(*lines) * lines_num)) goto end;
+
+ /* Ensure we can clean up after error. */
+ for (a=0; a<lines_num; ++a) {
+ lines[a] = NULL;
+ }
+ for (a=0; a<lines_num; ++a) {
+ if (extract_malloc(alloc, &lines[a], sizeof(line_t))) goto end;
+ lines[a]->spans_num = 0;
+ if (extract_malloc(alloc, &lines[a]->spans, sizeof(span_t*) * 1)) goto end;
+ lines[a]->spans_num = 1;
+ lines[a]->spans[0] = spans[a];
+ outfx("initial line a=%i: %s", a, line_string(lines[a]));
+ }
+
+ num_compatible = 0;
+
+ /* For each line, look for nearest aligned line, and append if found. */
+ num_joins = 0;
+ for (a=0; a<lines_num; ++a) {
+ int b;
+ int verbose = 0;
+ int nearest_line_b = -1;
+ double nearest_adv = 0;
+ line_t* nearest_line = NULL;
+ span_t* span_a;
+ double angle_a;
+
+ line_t* line_a = lines[a];
+ if (!line_a) {
+ continue;
+ }
+
+ if (0 && a < 1) verbose = 1;
+ outfx("looking at line_a=%s", line_string2(line_a));
+
+ span_a = line_span_last(line_a);
+ angle_a = span_angle(span_a);
+ if (verbose) outf("a=%i angle_a=%f ctm=%s: %s",
+ a,
+ angle_a * 180/pi,
+ matrix_string(&span_a->ctm),
+ line_string2(alloc, line_a)
+ );
+
+ for (b=0; b<lines_num; ++b) {
+ line_t* line_b = lines[b];
+ if (!line_b) {
+ continue;
+ }
+ if (b == a) {
+ continue;
+ }
+ if (verbose) {
+ outf("");
+ outf("a=%i b=%i: nearest_line_b=%i nearest_adv=%f",
+ a,
+ b,
+ nearest_line_b,
+ nearest_adv
+ );
+ outf(" line_a=%s", line_string2(alloc, line_a));
+ outf(" line_b=%s", line_string2(alloc, line_b));
+ }
+ if (!lines_are_compatible(line_a, line_b, angle_a, 0*verbose)) {
+ if (verbose) outf("not compatible");
+ continue;
+ }
+
+ num_compatible += 1;
+ {
+ /* Find angle between last glyph of span_a and first glyph of
+ span_b. This detects whether the lines are lined up with each other
+ (as opposed to being at the same angle but in different lines). */
+ span_t* span_b = line_span_first(line_b);
+ double dx = span_char_first(span_b)->x - span_char_last(span_a)->x;
+ double dy = span_char_first(span_b)->y - span_char_last(span_a)->y;
+ double angle_a_b = atan2(-dy, dx);
+ const double angle_tolerance_deg = 1;
+ if (verbose) {
+ outf("delta=(%f %f) alast=(%f %f) bfirst=(%f %f): angle_a=%f angle_a_b=%f",
+ dx,
+ dy,
+ span_char_last(span_a)->x,
+ span_char_last(span_a)->y,
+ span_char_first(span_b)->x,
+ span_char_first(span_b)->y,
+ angle_a * 180 / pi,
+ angle_a_b * 180 / pi
+ );
+ }
+ /* Might want to relax this when we test on non-horizontal lines.
+ */
+ if (fabs(angle_a_b - angle_a) * 180 / pi <= angle_tolerance_deg) {
+ /* Find distance between end of line_a and beginning of line_b. */
+ double adv = spans_adv(
+ span_a,
+ span_char_last(span_a),
+ span_char_first(span_b)
+ );
+ if (verbose) outf("nearest_adv=%f. angle_a_b=%f adv=%f",
+ nearest_adv,
+ angle_a_b,
+ adv
+ );
+ if (!nearest_line || adv < nearest_adv) {
+ nearest_line = line_b;
+ nearest_adv = adv;
+ nearest_line_b = b;
+ }
+ }
+ else {
+ if (verbose) outf(
+ "angle beyond tolerance: span_a last=(%f,%f) span_b first=(%f,%f) angle_a_b=%g angle_a=%g span_a.trm{a=%f b=%f}",
+ span_char_last(span_a)->x,
+ span_char_last(span_a)->y,
+ span_char_first(span_b)->x,
+ span_char_first(span_b)->y,
+ angle_a_b * 180 / pi,
+ angle_a * 180 / pi,
+ span_a->trm.a,
+ span_a->trm.b
+ );
+ }
+ }
+ }
+
+ if (nearest_line) {
+ /* line_a and nearest_line are aligned so we can move line_b's
+ spans on to the end of line_a. */
+ span_t* span_b = line_span_first(nearest_line);
+ b = nearest_line_b;
+ if (verbose) outf("found nearest line. a=%i b=%i", a, b);
+
+ if (1
+ && span_char_last(span_a)->ucs != ' '
+ && span_char_first(span_b)->ucs != ' '
+ ) {
+ /* Find average advance of the two adjacent spans in the two
+ lines we are considering joining, so that we can decide whether
+ the distance between them is large enough to merit joining with
+ a space character). */
+ double average_adv = (
+ (span_adv_total(span_a) + span_adv_total(span_b))
+ /
+ (double) (span_a->chars_num + span_b->chars_num)
+ );
+
+ int insert_space = (nearest_adv > 0.25 * average_adv);
+ if (insert_space) {
+ /* Append space to span_a before concatenation. */
+ char_t* item;
+ if (verbose) {
+ outf("(inserted space) nearest_adv=%f average_adv=%f",
+ nearest_adv,
+ average_adv
+ );
+ outf(" a: %s", span_string(alloc, span_a));
+ outf(" b: %s", span_string(alloc, span_b));
+ }
+ if (extract_realloc2(
+ alloc,
+ &span_a->chars,
+ sizeof(char_t) * span_a->chars_num,
+ sizeof(char_t) * (span_a->chars_num + 1)
+ )) goto end;
+ item = &span_a->chars[span_a->chars_num];
+ span_a->chars_num += 1;
+ extract_bzero(item, sizeof(*item));
+ item->ucs = ' ';
+ item->adv = nearest_adv;
+ }
+
+ if (verbose) {
+ outf("Joining spans a=%i b=%i:", a, b);
+ outf(" %s", span_string2(alloc, span_a));
+ outf(" %s", span_string2(alloc, span_b));
+ }
+ if (0) {
+ /* Show details about what we're joining. */
+ outf(
+ "joining line insert_space=%i a=%i (y=%f) to line b=%i (y=%f). nearest_adv=%f average_adv=%f",
+ insert_space,
+ a,
+ span_char_last(span_a)->y,
+ b,
+ span_char_first(span_b)->y,
+ nearest_adv,
+ average_adv
+ );
+ outf("a: %s", span_string(alloc, span_a));
+ outf("b: %s", span_string(alloc, span_b));
+ }
+ }
+
+ /* We might end up with two adjacent spaces here. But removing a
+ space could result in an empty line_t, which could break various
+ assumptions elsewhere. */
+
+ if (verbose) {
+ outf("Joining spans a=%i b=%i:", a, b);
+ outf(" %s", span_string2(alloc, span_a));
+ outf(" %s", span_string2(alloc, span_b));
+ }
+ if (extract_realloc2(
+ alloc,
+ &line_a->spans,
+ sizeof(span_t*) * line_a->spans_num,
+ sizeof(span_t*) * (line_a->spans_num + nearest_line->spans_num)
+ )) goto end;
+ {
+ int k;
+ for (k=0; k<nearest_line->spans_num; ++k) {
+ line_a->spans[ line_a->spans_num + k] = nearest_line->spans[k];
+ }
+ }
+ line_a->spans_num += nearest_line->spans_num;
+
+ /* Ensure that we ignore nearest_line from now on. */
+ extract_free(alloc, &nearest_line->spans);
+ extract_free(alloc, &nearest_line);
+ outfx("setting line[b=%i] to NULL", b);
+ lines[b] = NULL;
+
+ num_joins += 1;
+
+ if (b > a) {
+ /* We haven't yet tried appending any spans to nearest_line, so
+ the new extended line_a needs checking again. */
+ a -= 1;
+ }
+ outfx("new line is:\n %s", line_string2(line_a));
+ }
+ }
+
+ {
+ /* Remove empty lines left behind after we appended pairs of lines. */
+ int from;
+ int to;
+ int lines_num_old;
+ for (from=0, to=0; from<lines_num; ++from) {
+ if (lines[from]) {
+ outfx("final line from=%i: %s",
+ from,
+ lines[from] ? line_string(lines[from]) : "NULL"
+ );
+ lines[to] = lines[from];
+ to += 1;
+ }
+ }
+ lines_num_old = lines_num;
+ lines_num = to;
+ if (extract_realloc2(
+ alloc,
+ &lines,
+ sizeof(line_t*) * lines_num_old,
+ sizeof(line_t*) * lines_num
+ )) {
+ /* Should always succeed because we're not increasing allocation size. */
+ goto end;
+ }
+ }
+
+ *o_lines = lines;
+ *o_lines_num = lines_num;
+ ret = 0;
+
+ outf("Turned %i spans into %i lines. num_compatible=%i",
+ spans_num,
+ lines_num,
+ num_compatible
+ );
+
+ end:
+ if (ret) {
+ /* Free everything. */
+ if (lines) {
+ for (a=0; a<lines_num; ++a) {
+ if (lines[a]) extract_free(alloc, &lines[a]->spans);
+ extract_free(alloc, &lines[a]);
+ }
+ }
+ extract_free(alloc, &lines);
+ }
+ return ret;
+}
+
+
+/* Returns max font size of all span_t's in an line_t. */
+static double line_font_size_max(line_t* line)
+{
+ double size_max = 0;
+ int i;
+ for (i=0; i<line->spans_num; ++i) {
+ span_t* span = line->spans[i];
+ /* fixme: <size> should be double, which changes some output. */
+ double size = matrix_expansion(span->trm);
+ if (size > size_max) {
+ size_max = size;
+ }
+ }
+ return size_max;
+}
+
+
+
+/* Find distance between parallel lines line_a and line_b, both at <angle>.
+
+ _-R
+ _-
+ A------------_P
+ \ _-
+ \ _B
+ \_-
+ Q
+
+A is (ax, ay)
+B is (bx, by)
+APB and PAR are both <angle>.
+
+AR and QBP are parallel, and are the lines of text a and b
+respectively.
+
+AQB is a right angle. We need to find AQ.
+*/
+static double line_distance(
+ double ax,
+ double ay,
+ double bx,
+ double by,
+ double angle
+ )
+{
+ double dx = bx - ax;
+ double dy = by - ay;
+
+
+ return dx * sin(angle) + dy * cos(angle);
+}
+
+
+/* A comparison function for use with qsort(), for sorting paragraphs within a
+page. */
+static int paragraphs_cmp(const void* a, const void* b)
+{
+ const paragraph_t* const* a_paragraph = a;
+ const paragraph_t* const* b_paragraph = b;
+ line_t* a_line = paragraph_line_first(*a_paragraph);
+ line_t* b_line = paragraph_line_first(*b_paragraph);
+
+ span_t* a_span = line_span_first(a_line);
+ span_t* b_span = line_span_first(b_line);
+
+ /* If ctm matrices differ, always return this diff first. Note that we
+ ignore .e and .f because if data is from ghostscript then .e and .f vary
+ for each span, and we don't care about these differences. */
+ int d = matrix_cmp4(&a_span->ctm, &b_span->ctm);
+ if (d) return d;
+
+ {
+ double a_angle = line_angle(a_line);
+ double b_angle = line_angle(b_line);
+ if (fabs(a_angle - b_angle) > 3.14/2) {
+ /* Give up if more than 90 deg. */
+ return 0;
+ }
+ {
+ double angle = (a_angle + b_angle) / 2;
+ double ax = line_item_first(a_line)->x;
+ double ay = line_item_first(a_line)->y;
+ double bx = line_item_first(b_line)->x;
+ double by = line_item_first(b_line)->y;
+ double distance = line_distance(ax, ay, bx, by, angle);
+ if (distance > 0) return -1;
+ if (distance < 0) return +1;
+ }
+ }
+ return 0;
+}
+
+
+/* Creates a representation of line_t's that consists of a list of
+paragraph_t's.
+
+We only join lines that are at the same angle and are adjacent.
+
+On entry:
+ Original value of *o_paragraphs and *o_paragraphs_num are ignored.
+
+ <lines> points to array of <lines_num> line_t*'s, each pointing to
+ a line_t.
+
+On exit:
+ On sucess, returns zero, *o_paragraphs points to array of *o_paragraphs_num
+ paragraph_t*'s, each pointing to an paragraph_t. In the
+ array, paragraph_t's with same angle are sorted.
+
+ On failure, returns -1 with errno set. *o_paragraphs and *o_paragraphs_num
+ are undefined.
+*/
+static int make_paragraphs(
+ extract_alloc_t* alloc,
+ line_t** lines,
+ int lines_num,
+ paragraph_t*** o_paragraphs,
+ int* o_paragraphs_num
+ )
+{
+ int ret = -1;
+ int a;
+ int num_joins;
+ paragraph_t** paragraphs = NULL;
+
+ /* Start off with an paragraph_t for each line_t. */
+ int paragraphs_num = lines_num;
+ if (extract_malloc(alloc, &paragraphs, sizeof(*paragraphs) * paragraphs_num)) goto end;
+ /* Ensure we can clean up after error when setting up. */
+ for (a=0; a<paragraphs_num; ++a) {
+ paragraphs[a] = NULL;
+ }
+ /* Set up initial paragraphs. */
+ for (a=0; a<paragraphs_num; ++a) {
+ if (extract_malloc(alloc, &paragraphs[a], sizeof(paragraph_t))) goto end;
+ paragraphs[a]->lines_num = 0;
+ if (extract_malloc(alloc, &paragraphs[a]->lines, sizeof(line_t*) * 1)) goto end;
+ paragraphs[a]->lines_num = 1;
+ paragraphs[a]->lines[0] = lines[a];
+ }
+
+ num_joins = 0;
+ for (a=0; a<paragraphs_num; ++a) {
+ paragraph_t* nearest_paragraph;
+ int nearest_paragraph_b;
+ double nearest_paragraph_distance;
+ line_t* line_a;
+ double angle_a;
+ int verbose;
+ int b;
+
+ paragraph_t* paragraph_a = paragraphs[a];
+ if (!paragraph_a) {
+ /* This paragraph is empty - already been appended to a different
+ paragraph. */
+ continue;
+ }
+
+ nearest_paragraph = NULL;
+ nearest_paragraph_b = -1;
+ nearest_paragraph_distance = -1;
+ assert(paragraph_a->lines_num > 0);
+
+ line_a = paragraph_line_last(paragraph_a);
+ angle_a = line_angle(line_a);
+
+ verbose = 0;
+
+ /* Look for nearest paragraph_t that could be appended to
+ paragraph_a. */
+ for (b=0; b<paragraphs_num; ++b) {
+ paragraph_t* paragraph_b = paragraphs[b];
+ line_t* line_b;
+ if (!paragraph_b) {
+ /* This paragraph is empty - already been appended to a different
+ paragraph. */
+ continue;
+ }
+ line_b = paragraph_line_first(paragraph_b);
+ if (!lines_are_compatible(line_a, line_b, angle_a, 0)) {
+ continue;
+ }
+
+ {
+ double ax = line_item_last(line_a)->x;
+ double ay = line_item_last(line_a)->y;
+ double bx = line_item_first(line_b)->x;
+ double by = line_item_first(line_b)->y;
+ double distance = line_distance(ax, ay, bx, by, angle_a);
+ if (verbose) {
+ outf(
+ "angle_a=%f a=(%f %f) b=(%f %f) delta=(%f %f) distance=%f:",
+ angle_a * 180 / pi,
+ ax, ay,
+ bx, by,
+ bx - ax,
+ by - ay,
+ distance
+ );
+ outf(" line_a=%s", line_string2(alloc, line_a));
+ outf(" line_b=%s", line_string2(alloc, line_b));
+ }
+ if (distance > 0) {
+ if (nearest_paragraph_distance == -1
+ || distance < nearest_paragraph_distance) {
+ if (verbose) {
+ outf("updating nearest. distance=%f:", distance);
+ outf(" line_a=%s", line_string2(alloc, line_a));
+ outf(" line_b=%s", line_string2(alloc, line_b));
+ }
+ nearest_paragraph_distance = distance;
+ nearest_paragraph_b = b;
+ nearest_paragraph = paragraph_b;
+ }
+ }
+ }
+ }
+
+ if (nearest_paragraph) {
+ double line_b_size = line_font_size_max(
+ paragraph_line_first(nearest_paragraph)
+ );
+ line_t* line_b = paragraph_line_first(nearest_paragraph);
+ (void) line_b; /* Only used in outfx(). */
+ if (nearest_paragraph_distance < 1.4 * line_b_size) {
+ /* Paragraphs are close together vertically compared to maximum
+ font size of first line in second paragraph, so we'll join them
+ into a single paragraph. */
+ span_t* a_span;
+ int a_lines_num_new;
+ if (verbose) {
+ outf(
+ "joing paragraphs. a=(%f,%f) b=(%f,%f) nearest_paragraph_distance=%f line_b_size=%f",
+ line_item_last(line_a)->x,
+ line_item_last(line_a)->y,
+ line_item_first(line_b)->x,
+ line_item_first(line_b)->y,
+ nearest_paragraph_distance,
+ line_b_size
+ );
+ outf(" %s", paragraph_string(alloc, paragraph_a));
+ outf(" %s", paragraph_string(alloc, nearest_paragraph));
+ outf("paragraph_a ctm=%s",
+ matrix_string(&paragraph_a->lines[0]->spans[0]->ctm)
+ );
+ outf("paragraph_a trm=%s",
+ matrix_string(&paragraph_a->lines[0]->spans[0]->trm)
+ );
+ }
+ /* Join these two paragraph_t's. */
+ a_span = line_span_last(line_a);
+ if (span_char_last(a_span)->ucs == '-') {
+ /* remove trailing '-' at end of prev line. char_t doesn't
+ contain any malloc-heap pointers so this doesn't leak. */
+ a_span->chars_num -= 1;
+ }
+ else {
+ /* Insert space before joining adjacent lines. */
+ char_t* c_prev;
+ char_t* c;
+ if (span_append_c(alloc, line_span_last(line_a), ' ')) goto end;
+ c_prev = &a_span->chars[ a_span->chars_num-2];
+ c = &a_span->chars[ a_span->chars_num-1];
+ c->x = c_prev->x + c_prev->adv * a_span->ctm.a;
+ c->y = c_prev->y + c_prev->adv * a_span->ctm.c;
+ }
+
+ a_lines_num_new = paragraph_a->lines_num + nearest_paragraph->lines_num;
+ if (extract_realloc2(
+ alloc,
+ &paragraph_a->lines,
+ sizeof(line_t*) * paragraph_a->lines_num,
+ sizeof(line_t*) * a_lines_num_new
+ )) goto end;
+ {
+ int i;
+ for (i=0; i<nearest_paragraph->lines_num; ++i) {
+ paragraph_a->lines[paragraph_a->lines_num + i]
+ = nearest_paragraph->lines[i];
+ }
+ }
+ paragraph_a->lines_num = a_lines_num_new;
+
+ /* Ensure that we skip nearest_paragraph in future. */
+ extract_free(alloc, &nearest_paragraph->lines);
+ extract_free(alloc, &nearest_paragraph);
+ paragraphs[nearest_paragraph_b] = NULL;
+
+ num_joins += 1;
+ outfx(
+ "have joined paragraph a=%i to snearest_paragraph_b=%i",
+ a,
+ nearest_paragraph_b
+ );
+
+ if (nearest_paragraph_b > a) {
+ /* We haven't yet tried appending any paragraphs to
+ nearest_paragraph_b, so the new extended paragraph_a needs
+ checking again. */
+ a -= 1;
+ }
+ }
+ else {
+ outfx(
+ "Not joining paragraphs. nearest_paragraph_distance=%f line_b_size=%f",
+ nearest_paragraph_distance,
+ line_b_size
+ );
+ }
+ }
+ }
+
+ {
+ /* Remove empty paragraphs. */
+ int from;
+ int to;
+ int paragraphs_num_old;
+ for (from=0, to=0; from<paragraphs_num; ++from) {
+ if (paragraphs[from]) {
+ paragraphs[to] = paragraphs[from];
+ to += 1;
+ }
+ }
+ outfx("paragraphs_num=%i => %i", paragraphs_num, to);
+ paragraphs_num_old = paragraphs_num;
+ paragraphs_num = to;
+ if (extract_realloc2(
+ alloc,
+ &paragraphs,
+ sizeof(paragraph_t*) * paragraphs_num_old,
+ sizeof(paragraph_t*) * paragraphs_num
+ )) {
+ /* Should always succeed because we're not increasing allocation size, but
+ can fail with memento squeeze. */
+ goto end;
+ }
+ }
+
+ /* Sort paragraphs so they appear in correct order, using paragraphs_cmp().
+ */
+ qsort(
+ paragraphs,
+ paragraphs_num,
+ sizeof(paragraph_t*), paragraphs_cmp
+ );
+
+ *o_paragraphs = paragraphs;
+ *o_paragraphs_num = paragraphs_num;
+ ret = 0;
+ outf("Turned %i lines into %i paragraphs",
+ lines_num,
+ paragraphs_num
+ );
+
+
+ end:
+
+ if (ret) {
+ if (paragraphs) {
+ for (a=0; a<paragraphs_num; ++a) {
+ if (paragraphs[a]) extract_free(alloc, &paragraphs[a]->lines);
+ extract_free(alloc, &paragraphs[a]);
+ }
+ }
+ extract_free(alloc, &paragraphs);
+ }
+ return ret;
+}
+
+int extract_document_join(extract_alloc_t* alloc, document_t* document)
+{
+ int ret = -1;
+
+ /* For each page in <document> we join spans into lines and paragraphs. A
+ line is a list of spans that are at the same angle and on the same line. A
+ paragraph is a list of lines that are at the same angle and close together.
+ */
+ int p;
+ for (p=0; p<document->pages_num; ++p) {
+ page_t* page = document->pages[p];
+ outf("processing page %i: num_spans=%i", p, page->spans_num);
+
+ if (make_lines(
+ alloc,
+ page->spans,
+ page->spans_num,
+ &page->lines,
+ &page->lines_num
+ )) goto end;
+
+ if (make_paragraphs(
+ alloc,
+ page->lines,
+ page->lines_num,
+ &page->paragraphs,
+ &page->paragraphs_num
+ )) goto end;
+ }
+
+ ret = 0;
+
+ end:
+
+ return ret;
+}
diff --git a/extract/src/mem.c b/extract/src/mem.c
new file mode 100644
index 00000000..83b5032c
--- /dev/null
+++ b/extract/src/mem.c
@@ -0,0 +1,51 @@
+#include "../include/extract_alloc.h"
+
+#include "mem.h"
+
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+#ifdef _MSC_VER
+ #include "compat_va_copy.h"
+#endif
+
+
+void extract_bzero(void *b, size_t len)
+{
+ memset(b, 0, len);
+}
+
+int extract_vasprintf(extract_alloc_t* alloc, char** out, const char* format, va_list va)
+{
+ int n;
+ int n2;
+ va_list va2;
+ va_copy(va2, va);
+ n = vsnprintf(NULL, 0, format, va);
+ if (n < 0) return n;
+ if (extract_malloc(alloc, out, n + 1)) return -1;
+ n2 = vsnprintf(*out, n + 1, format, va2);
+ va_end(va2);
+ assert(n2 == n);
+ return n2;
+}
+
+
+int extract_asprintf(extract_alloc_t* alloc, char** out, const char* format, ...)
+{
+ va_list va;
+ int ret;
+ va_start(va, format);
+ ret = extract_vasprintf(alloc, out, format, va);
+ va_end(va);
+ return ret;
+}
+
+int extract_strdup(extract_alloc_t* alloc, const char* s, char** o_out)
+{
+ size_t l = strlen(s) + 1;
+ if (extract_malloc(alloc, o_out, l)) return -1;
+ memcpy(*o_out, s, l);
+ return 0;
+}
diff --git a/extract/src/mem.h b/extract/src/mem.h
new file mode 100644
index 00000000..59729b1a
--- /dev/null
+++ b/extract/src/mem.h
@@ -0,0 +1,14 @@
+#ifndef EXTRACT_MEM_H
+#define EXTRACT_MEM_H
+
+#include <stdarg.h>
+#include <string.h>
+
+void extract_bzero(void *b, size_t len);
+
+int extract_vasprintf(extract_alloc_t* alloc, char** out, const char* format, va_list va);
+int extract_asprintf(extract_alloc_t* alloc, char** out, const char* format, ...);
+
+int extract_strdup(extract_alloc_t* alloc, const char* s, char** o_out);
+
+#endif
diff --git a/extract/src/memento.c b/extract/src/memento.c
new file mode 100644
index 00000000..e62744be
--- /dev/null
+++ b/extract/src/memento.c
@@ -0,0 +1,3574 @@
+/* Copyright (C) 2009-2020 Artifex Software, Inc.
+ All Rights Reserved.
+
+ This software is provided AS-IS with no warranty, either express or
+ implied.
+
+ This software is distributed under license and may not be copied, modified
+ or distributed except as expressly authorized under the terms of that
+ license. Refer to licensing information at http://www.artifex.com
+ or contact Artifex Software, Inc., 1305 Grant Avenue - Suite 200,
+ Novato, CA 94945, U.S.A., +1(415)492-9861, for further information.
+*/
+
+/* Inspired by Fortify by Simon P Bullen. */
+
+/* Set the following if you're only looking for leaks, not memory overwrites
+ * to speed the operation */
+/* #define MEMENTO_LEAKONLY */
+
+/* Set the following to keep extra details about the history of blocks */
+#define MEMENTO_DETAILS
+
+/* Don't keep blocks around if they'd mean losing more than a quarter of
+ * the freelist. */
+#define MEMENTO_FREELIST_MAX_SINGLE_BLOCK (MEMENTO_FREELIST_MAX/4)
+
+#define COMPILING_MEMENTO_C
+
+/* SHUT UP, MSVC. I KNOW WHAT I AM DOING. */
+#define _CRT_SECURE_NO_WARNINGS
+
+/* We have some GS specific tweaks; more for the GS build environment than
+ * anything else. */
+/* #define MEMENTO_GS_HACKS */
+
+#ifdef MEMENTO_GS_HACKS
+/* For GS we include malloc_.h. Anyone else would just include memento.h */
+#include "malloc_.h"
+#include "memory_.h"
+int atexit(void (*)(void));
+#else
+#ifdef MEMENTO_MUPDF_HACKS
+#include "mupdf/memento.h"
+#else
+#include "memento.h"
+#endif
+#include <stdio.h>
+#endif
+#ifndef _MSC_VER
+#include <stdint.h>
+#include <limits.h>
+#include <unistd.h>
+#endif
+
+#include <errno.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
+
+#ifdef __ANDROID__
+#define MEMENTO_ANDROID
+#include <stdio.h>
+#endif
+
+/* Hacks to portably print large sizes */
+#ifdef _MSC_VER
+#define FMTZ "%llu"
+#define FMTZ_CAST _int64
+#define FMTP "0x%p"
+#else
+#define FMTZ "%zu"
+#define FMTZ_CAST size_t
+#define FMTP "%p"
+#endif
+
+#define UB(x) ((intptr_t)((x) & 0xFF))
+#define B2I(x) (UB(x) | (UB(x)<<8) | (UB(x)<<16) | (UB(x)<<24))
+#define B2P(x) ((void *)(B2I(x) | ((B2I(x)<<16)<<16)))
+#define MEMENTO_PREFILL_UBYTE ((unsigned char)(MEMENTO_PREFILL))
+#define MEMENTO_PREFILL_USHORT (((unsigned short)MEMENTO_PREFILL_UBYTE) | (((unsigned short)MEMENTO_PREFILL_UBYTE)<<8))
+#define MEMENTO_PREFILL_UINT (((unsigned int)MEMENTO_PREFILL_USHORT) | (((unsigned int)MEMENTO_PREFILL_USHORT)<<16))
+#define MEMENTO_PREFILL_PTR (void *)(((uintptr_t)MEMENTO_PREFILL_UINT) | ((((uintptr_t)MEMENTO_PREFILL_UINT)<<16)<<16))
+#define MEMENTO_POSTFILL_UBYTE ((unsigned char)(MEMENTO_POSTFILL))
+#define MEMENTO_POSTFILL_USHORT (((unsigned short)MEMENTO_POSTFILL_UBYTE) | (((unsigned short)MEMENTO_POSTFILL_UBYTE)<<8))
+#define MEMENTO_POSTFILL_UINT (((unsigned int)MEMENTO_POSTFILL_USHORT) | (((unsigned int)MEMENTO_POSTFILL_USHORT)<<16))
+#define MEMENTO_POSTFILL_PTR (void *)(((uintptr_t)MEMENTO_POSTFILL_UINT) | ((((uintptr_t)MEMENTO_POSTFILL_UINT)<<16)<<16))
+#define MEMENTO_ALLOCFILL_UBYTE ((unsigned char)(MEMENTO_ALLOCFILL))
+#define MEMENTO_ALLOCFILL_USHORT (((unsigned short)MEMENTO_ALLOCFILL_UBYTE) | (((unsigned short)MEMENTO_ALLOCFILL_UBYTE)<<8))
+#define MEMENTO_ALLOCFILL_UINT (((unsigned int)MEMENTO_ALLOCFILL_USHORT) | (((unsigned int)MEMENTO_ALLOCFILL_USHORT)<<16))
+#define MEMENTO_ALLOCFILL_PTR (void *)(((uintptr_t)MEMENTO_ALLOCFILL_UINT) | ((((uintptr_t)MEMENTO_ALLOCFILL_UINT)<<16)<<16))
+#define MEMENTO_FREEFILL_UBYTE ((unsigned char)(MEMENTO_FREEFILL))
+#define MEMENTO_FREEFILL_USHORT (((unsigned short)MEMENTO_FREEFILL_UBYTE) | (((unsigned short)MEMENTO_FREEFILL_UBYTE)<<8))
+#define MEMENTO_FREEFILL_UINT (((unsigned int)MEMENTO_FREEFILL_USHORT) | (((unsigned int)MEMENTO_FREEFILL_USHORT)<<16))
+#define MEMENTO_FREEFILL_PTR (void *)(((uintptr_t)MEMENTO_FREEFILL_UINT) | ((((uintptr_t)MEMENTO_FREEFILL_UINT)<<16)<<16))
+
+#ifdef MEMENTO
+
+#ifndef MEMENTO_CPP_EXTRAS_ONLY
+
+#ifdef MEMENTO_ANDROID
+#include <android/log.h>
+
+static char log_buffer[4096];
+static int log_fill = 0;
+
+static char log_buffer2[4096];
+
+static int
+android_fprintf(FILE *file, const char *fmt, ...)
+{
+ va_list args;
+ char *p, *q;
+
+ va_start(args, fmt);
+ vsnprintf(log_buffer2, sizeof(log_buffer2)-1, fmt, args);
+ va_end(args);
+
+ /* Ensure we are always null terminated */
+ log_buffer2[sizeof(log_buffer2)-1] = 0;
+
+ p = log_buffer2;
+ q = p;
+ do
+ {
+ /* Find the end of the string, or the next \n */
+ while (*p && *p != '\n')
+ p++;
+
+ /* We need to output from q to p. Limit ourselves to what
+ * will fit in the existing */
+ if (p - q >= sizeof(log_buffer)-1 - log_fill)
+ p = q + sizeof(log_buffer)-1 - log_fill;
+
+ memcpy(&log_buffer[log_fill], q, p-q);
+ log_fill += p-q;
+ if (*p == '\n')
+ {
+ log_buffer[log_fill] = 0;
+ __android_log_print(ANDROID_LOG_ERROR, "memento", "%s", log_buffer);
+ usleep(1);
+ log_fill = 0;
+ p++; /* Skip over the \n */
+ }
+ else if (log_fill >= sizeof(log_buffer)-1)
+ {
+ log_buffer[sizeof(log_buffer2)-1] = 0;
+ __android_log_print(ANDROID_LOG_ERROR, "memento", "%s", log_buffer);
+ usleep(1);
+ log_fill = 0;
+ }
+ q = p;
+ }
+ while (*p);
+
+ return 0;
+}
+
+#define fprintf android_fprintf
+#define MEMENTO_STACKTRACE_METHOD 3
+#endif
+
+/* _WIN64 defined implies _WIN32 will be */
+#ifdef _WIN32
+#include <windows.h>
+
+static int
+windows_fprintf(FILE *file, const char *fmt, ...)
+{
+ va_list args;
+ char text[4096];
+ int ret;
+
+ va_start(args, fmt);
+ ret = vfprintf(file, fmt, args);
+ va_end(args);
+
+ va_start(args, fmt);
+ vsnprintf(text, 4096, fmt, args);
+ OutputDebugStringA(text);
+ va_end(args);
+
+ return ret;
+}
+
+#define fprintf windows_fprintf
+#endif
+
+#ifndef MEMENTO_STACKTRACE_METHOD
+#ifdef __GNUC__
+#define MEMENTO_STACKTRACE_METHOD 1
+#endif
+#ifdef _WIN32
+#define MEMENTO_STACKTRACE_METHOD 2
+#endif
+#endif
+
+#if defined(__linux__) || defined(__OpenBSD__)
+#define MEMENTO_HAS_FORK
+#elif defined(__APPLE__) && defined(__MACH__)
+#define MEMENTO_HAS_FORK
+#endif
+
+/* Define the underlying allocators, just in case */
+void *MEMENTO_UNDERLYING_MALLOC(size_t);
+void MEMENTO_UNDERLYING_FREE(void *);
+void *MEMENTO_UNDERLYING_REALLOC(void *,size_t);
+void *MEMENTO_UNDERLYING_CALLOC(size_t,size_t);
+
+/* And some other standard functions we use. We don't include the header
+ * files, just in case they pull in unexpected others. */
+int atoi(const char *);
+char *getenv(const char *);
+
+/* How far to search for pointers in each block when calculating nestings */
+/* mupdf needs at least 34000ish (sizeof(fz_shade))/ */
+#define MEMENTO_PTRSEARCH 65536
+
+#ifndef MEMENTO_MAXPATTERN
+#define MEMENTO_MAXPATTERN 0
+#endif
+
+#ifdef MEMENTO_GS_HACKS
+#include "valgrind.h"
+#else
+#ifdef HAVE_VALGRIND
+#include "valgrind/memcheck.h"
+#else
+#define VALGRIND_MAKE_MEM_NOACCESS(p,s) do { } while (0==1)
+#define VALGRIND_MAKE_MEM_UNDEFINED(p,s) do { } while (0==1)
+#define VALGRIND_MAKE_MEM_DEFINED(p,s) do { } while (0==1)
+#endif
+#endif
+
+enum {
+ Memento_PreSize = 16,
+ Memento_PostSize = 16
+};
+
+/* Some compile time checks */
+typedef struct
+{
+ char MEMENTO_PRESIZE_MUST_BE_A_MULTIPLE_OF_4[Memento_PreSize & 3 ? -1 : 1];
+ char MEMENTO_POSTSIZE_MUST_BE_A_MULTIPLE_OF_4[Memento_PostSize & 3 ? -1 : 1];
+ char MEMENTO_POSTSIZE_MUST_BE_AT_LEAST_4[Memento_PostSize >= 4 ? 1 : -1];
+ char MEMENTO_PRESIZE_MUST_BE_AT_LEAST_4[Memento_PreSize >= 4 ? 1 : -1];
+} MEMENTO_SANITY_CHECK_STRUCT;
+
+#define MEMENTO_UINT32 unsigned int
+#define MEMENTO_UINT16 unsigned short
+
+#define MEMENTO_PREFILL_UINT32 ((MEMENTO_UINT32)(MEMENTO_PREFILL | (MEMENTO_PREFILL <<8) | (MEMENTO_PREFILL <<16) |(MEMENTO_PREFILL <<24)))
+#define MEMENTO_POSTFILL_UINT16 ((MEMENTO_UINT16)(MEMENTO_POSTFILL | (MEMENTO_POSTFILL<<8)))
+#define MEMENTO_POSTFILL_UINT32 ((MEMENTO_UINT32)(MEMENTO_POSTFILL | (MEMENTO_POSTFILL<<8) | (MEMENTO_POSTFILL<<16) |(MEMENTO_POSTFILL<<24)))
+#define MEMENTO_FREEFILL_UINT16 ((MEMENTO_UINT16)(MEMENTO_FREEFILL | (MEMENTO_FREEFILL<<8)))
+#define MEMENTO_FREEFILL_UINT32 ((MEMENTO_UINT32)(MEMENTO_FREEFILL | (MEMENTO_FREEFILL<<8) | (MEMENTO_FREEFILL<<16) |(MEMENTO_FREEFILL<<24)))
+
+enum {
+ Memento_Flag_OldBlock = 1,
+ Memento_Flag_HasParent = 2,
+ Memento_Flag_BreakOnFree = 4,
+ Memento_Flag_BreakOnRealloc = 8,
+ Memento_Flag_Freed = 16,
+ Memento_Flag_KnownLeak = 32,
+ Memento_Flag_Reported = 64
+};
+
+enum {
+ Memento_EventType_malloc = 0,
+ Memento_EventType_calloc = 1,
+ Memento_EventType_realloc = 2,
+ Memento_EventType_free = 3,
+ Memento_EventType_new = 4,
+ Memento_EventType_delete = 5,
+ Memento_EventType_newArray = 6,
+ Memento_EventType_deleteArray = 7,
+ Memento_EventType_takeRef = 8,
+ Memento_EventType_dropRef = 9,
+ Memento_EventType_reference = 10,
+ Memento_EventType_strdup = 11,
+ Memento_EventType_asprintf = 12,
+ Memento_EventType_vasprintf = 13
+};
+
+static const char *eventType[] =
+{
+ "malloc",
+ "calloc",
+ "realloc",
+ "free",
+ "new",
+ "delete",
+ "new[]",
+ "delete[]",
+ "takeRef",
+ "dropRef",
+ "reference",
+ "strdup",
+ "asprintf",
+ "vasprintf"
+};
+
+/* When we list leaked blocks at the end of execution, we search for pointers
+ * between blocks in order to be able to give a nice nested view.
+ * Unfortunately, if you have are running your own allocator (such as
+ * postscript's chunk allocator) you can often find that the header of the
+ * block always contains pointers to next or previous blocks. This tends to
+ * mean the nesting displayed is "uninteresting" at best :)
+ *
+ * As a hack to get around this, we have a define MEMENTO_SKIP_SEARCH that
+ * indicates how many bytes to skip over at the start of the chunk.
+ * This may cause us to miss true nestings, but such is life...
+ */
+#ifndef MEMENTO_SEARCH_SKIP
+#ifdef MEMENTO_GS_HACKS
+#define MEMENTO_SEARCH_SKIP (2*sizeof(void *))
+#else
+#define MEMENTO_SEARCH_SKIP 0
+#endif
+#endif
+
+#define MEMENTO_CHILD_MAGIC ((Memento_BlkHeader *)('M' | ('3' << 8) | ('m' << 16) | ('3' << 24)))
+#define MEMENTO_SIBLING_MAGIC ((Memento_BlkHeader *)('n' | ('t' << 8) | ('0' << 16) | ('!' << 24)))
+
+#ifdef MEMENTO_DETAILS
+typedef struct Memento_BlkDetails Memento_BlkDetails;
+
+struct Memento_BlkDetails
+{
+ Memento_BlkDetails *next;
+ char type;
+ char count;
+ int sequence;
+ void *stack[1];
+};
+#endif /* MEMENTO_DETAILS */
+
+typedef struct Memento_BlkHeader Memento_BlkHeader;
+
+struct Memento_BlkHeader
+{
+ size_t rawsize;
+ int sequence;
+ int lastCheckedOK;
+ int flags;
+ Memento_BlkHeader *next;
+ Memento_BlkHeader *prev; /* Reused as 'parent' when printing nested list */
+
+ const char *label;
+
+ /* Entries for nesting display calculations. Set to magic
+ * values at all other time. */
+ Memento_BlkHeader *child;
+ Memento_BlkHeader *sibling;
+
+#ifdef MEMENTO_DETAILS
+ Memento_BlkDetails *details;
+ Memento_BlkDetails **details_tail;
+#endif
+
+ char preblk[Memento_PreSize];
+};
+
+/* In future this could (should) be a smarter data structure, like, say,
+ * splay trees. For now, we use a list.
+ */
+typedef struct Memento_Blocks
+{
+ Memento_BlkHeader *head;
+ Memento_BlkHeader *tail;
+} Memento_Blocks;
+
+/* What sort of Mutex should we use? */
+#ifdef MEMENTO_LOCKLESS
+typedef int Memento_mutex;
+
+static void Memento_initMutex(Memento_mutex *m)
+{
+ (void)m;
+}
+
+#define MEMENTO_DO_LOCK() do { } while (0)
+#define MEMENTO_DO_UNLOCK() do { } while (0)
+
+#else
+#if defined(_WIN32) || defined(_WIN64)
+/* Windows */
+typedef CRITICAL_SECTION Memento_mutex;
+
+static void Memento_initMutex(Memento_mutex *m)
+{
+ InitializeCriticalSection(m);
+}
+
+#define MEMENTO_DO_LOCK() \
+ EnterCriticalSection(&memento.mutex)
+#define MEMENTO_DO_UNLOCK() \
+ LeaveCriticalSection(&memento.mutex)
+
+#else
+#include <pthread.h>
+typedef pthread_mutex_t Memento_mutex;
+
+static void Memento_initMutex(Memento_mutex *m)
+{
+ pthread_mutex_init(m, NULL);
+}
+
+#define MEMENTO_DO_LOCK() \
+ pthread_mutex_lock(&memento.mutex)
+#define MEMENTO_DO_UNLOCK() \
+ pthread_mutex_unlock(&memento.mutex)
+
+#endif
+#endif
+
+typedef struct {
+ int begin;
+ int end;
+} Memento_range;
+
+/* And our global structure */
+static struct {
+ int inited;
+ Memento_Blocks used;
+ Memento_Blocks free;
+ size_t freeListSize;
+ int sequence;
+ int paranoia;
+ int paranoidAt;
+ int countdown;
+ int lastChecked;
+ int breakAt;
+ int failAt;
+ int failing;
+ int nextFailAt;
+ int squeezeAt;
+ int squeezing;
+ int segv;
+ int pattern;
+ int nextPattern;
+ int patternBit;
+ int leaking;
+ int hideMultipleReallocs;
+ int abortOnLeak;
+ int abortOnCorruption;
+ size_t maxMemory;
+ size_t alloc;
+ size_t peakAlloc;
+ size_t totalAlloc;
+ size_t numMallocs;
+ size_t numFrees;
+ size_t numReallocs;
+ Memento_mutex mutex;
+ Memento_range *squeezes;
+ int squeezes_num;
+ int squeezes_pos;
+} memento;
+
+#define MEMENTO_EXTRASIZE (sizeof(Memento_BlkHeader) + Memento_PostSize)
+
+/* Round up size S to the next multiple of N (where N is a power of 2) */
+#define MEMENTO_ROUNDUP(S,N) ((S + N-1)&~(N-1))
+
+#define MEMBLK_SIZE(s) MEMENTO_ROUNDUP(s + MEMENTO_EXTRASIZE, MEMENTO_MAXALIGN)
+
+#define MEMBLK_FROMBLK(B) (&((Memento_BlkHeader*)(void *)(B))[-1])
+#define MEMBLK_TOBLK(B) ((void*)(&((Memento_BlkHeader*)(void*)(B))[1]))
+#define MEMBLK_POSTPTR(B) \
+ (&((unsigned char *)(void *)(B))[(B)->rawsize + sizeof(Memento_BlkHeader)])
+
+enum
+{
+ SkipStackBackTraceLevels = 4
+};
+
+#if defined(MEMENTO_STACKTRACE_METHOD) && MEMENTO_STACKTRACE_METHOD == 1
+extern size_t backtrace(void **, int);
+extern void backtrace_symbols_fd(void **, size_t, int);
+extern char **backtrace_symbols(void **, size_t);
+
+#define MEMENTO_BACKTRACE_MAX 256
+static void (*print_stack_value)(void *address);
+
+/* Libbacktrace gubbins - relies on us having libdl to load the .so */
+#ifdef HAVE_LIBDL
+#include <dlfcn.h>
+
+typedef void (*backtrace_error_callback) (void *data, const char *msg, int errnum);
+
+typedef struct backtrace_state *(*backtrace_create_state_type)(
+ const char *filename, int threaded,
+ backtrace_error_callback error_callback, void *data);
+
+typedef int (*backtrace_full_callback) (void *data, uintptr_t pc,
+ const char *filename, int lineno,
+ const char *function);
+
+typedef int (*backtrace_pcinfo_type)(struct backtrace_state *state,
+ uintptr_t pc,
+ backtrace_full_callback callback,
+ backtrace_error_callback error_callback,
+ void *data);
+
+typedef void (*backtrace_syminfo_callback) (void *data, uintptr_t pc,
+ const char *symname,
+ uintptr_t symval,
+ uintptr_t symsize);
+
+typedef int (*backtrace_syminfo_type)(struct backtrace_state *state,
+ uintptr_t addr,
+ backtrace_syminfo_callback callback,
+ backtrace_error_callback error_callback,
+ void *data);
+
+static backtrace_syminfo_type backtrace_syminfo;
+static backtrace_create_state_type backtrace_create_state;
+static backtrace_pcinfo_type backtrace_pcinfo;
+static struct backtrace_state *my_backtrace_state;
+static void *libbt;
+static char backtrace_exe[4096];
+static void *current_addr;
+
+static void error2_cb(void *data, const char *msg, int errnum)
+{
+ (void)data;
+ (void)msg;
+ (void)errnum;
+}
+
+static void syminfo_cb(void *data, uintptr_t pc, const char *symname, uintptr_t symval, uintptr_t symsize)
+{
+ (void)data;
+ (void)symval;
+ (void)symsize;
+ if (sizeof(void *) == 4)
+ fprintf(stderr, " 0x%08lx %s\n", pc, symname?symname:"?");
+ else
+ fprintf(stderr, " 0x%016lx %s\n", pc, symname?symname:"?");
+}
+
+static void error_cb(void *data, const char *msg, int errnum)
+{
+ (void)data;
+ (void)msg;
+ (void)errnum;
+ backtrace_syminfo(my_backtrace_state,
+ (uintptr_t)current_addr,
+ syminfo_cb,
+ error2_cb,
+ NULL);
+}
+
+static int full_cb(void *data, uintptr_t pc, const char *fname, int line, const char *fn)
+{
+ (void)data;
+ if (sizeof(void *) == 4)
+ fprintf(stderr, " 0x%08lx %s(%s:%d)\n", pc, fn?fn:"?", fname?fname:"?", line);
+ else
+ fprintf(stderr, " 0x%016lx %s(%s:%d)\n", pc, fn?fn:"?", fname?fname:"?", line);
+ return 0;
+}
+
+static void print_stack_libbt(void *addr)
+{
+ current_addr = addr;
+ backtrace_pcinfo(my_backtrace_state,
+ (uintptr_t)addr,
+ full_cb,
+ error_cb,
+ NULL);
+}
+
+static void print_stack_libbt_failed(void *addr)
+{
+ char **strings;
+#if 0
+ /* Let's use a hack from Julian Smith to call gdb to extract the information */
+ /* Disabled for now, as I can't make this work. */
+ static char command[1024];
+ int e;
+ static int gdb_invocation_failed = 0;
+
+ if (gdb_invocation_failed == 0)
+ {
+ snprintf(command, sizeof(command),
+ //"gdb -q --batch -p=%i -ex 'info line *%p' -ex quit 2>/dev/null",
+ "gdb -q --batch -p=%i -ex 'info line *%p' -ex quit 2>/dev/null| egrep -v '(Thread debugging using)|(Using host libthread_db library)|(A debugging session is active)|(will be detached)|(Quit anyway)|(No such file or directory)|(^0x)|(^$)'",
+ getpid(), addr);
+ printf("%s\n", command);
+ e = system(command);
+ if (e == 0)
+ return; /* That'll do! */
+ gdb_invocation_failed = 1; /* If it's failed once, it'll probably keep failing. */
+ }
+#endif
+
+ /* We couldn't even get gdb! Make do. */
+ strings = backtrace_symbols(&addr, 1);
+
+ if (strings == NULL || strings[0] == NULL)
+ {
+ if (sizeof(void *) == 4)
+ fprintf(stderr, " [0x%08lx]\n", (uintptr_t)addr);
+ else
+ fprintf(stderr, " [0x%016lx]\n", (uintptr_t)addr);
+ }
+ else
+ {
+ fprintf(stderr, " %s\n", strings[0]);
+ }
+ (free)(strings);
+}
+
+static int init_libbt(void)
+{
+ static int libbt_inited = 0;
+
+ if (libbt_inited)
+ return 0;
+ libbt_inited = 1;
+
+ libbt = dlopen("libbacktrace.so", RTLD_LAZY);
+ if (libbt == NULL)
+ libbt = dlopen("/opt/lib/libbacktrace.so", RTLD_LAZY);
+ if (libbt == NULL)
+ libbt = dlopen("/lib/libbacktrace.so", RTLD_LAZY);
+ if (libbt == NULL)
+ libbt = dlopen("/usr/lib/libbacktrace.so", RTLD_LAZY);
+ if (libbt == NULL)
+ libbt = dlopen("/usr/local/lib/libbacktrace.so", RTLD_LAZY);
+ if (libbt == NULL)
+ goto fail;
+
+ backtrace_create_state = dlsym(libbt, "backtrace_create_state");
+ backtrace_syminfo = dlsym(libbt, "backtrace_syminfo");
+ backtrace_pcinfo = dlsym(libbt, "backtrace_pcinfo");
+
+ if (backtrace_create_state == NULL ||
+ backtrace_syminfo == NULL ||
+ backtrace_pcinfo == NULL)
+ {
+ goto fail;
+ }
+
+ my_backtrace_state = backtrace_create_state(backtrace_exe,
+ 1 /*BACKTRACE_SUPPORTS_THREADS*/,
+ error_cb,
+ NULL);
+ if (my_backtrace_state == NULL)
+ goto fail;
+
+ print_stack_value = print_stack_libbt;
+
+ return 1;
+
+ fail:
+ fprintf(stderr,
+ "MEMENTO: libbacktrace.so failed to load; backtraces will be sparse.\n"
+ "MEMENTO: See memento.h for how to rectify this.\n");
+ libbt = NULL;
+ backtrace_create_state = NULL;
+ backtrace_syminfo = NULL;
+ print_stack_value = print_stack_libbt_failed;
+ return 0;
+}
+#endif
+
+static void print_stack_default(void *addr)
+{
+ char **strings = backtrace_symbols(&addr, 1);
+
+ if (strings == NULL || strings[0] == NULL)
+ {
+ fprintf(stderr, " ["FMTP"]\n", addr);
+ }
+#ifdef HAVE_LIBDL
+ else if (strchr(strings[0], ':') == NULL)
+ {
+ /* Probably a "path [address]" format string */
+ char *s = strchr(strings[0], ' ');
+
+ if (s != strings[0])
+ {
+ memcpy(backtrace_exe, strings[0], s - strings[0]);
+ backtrace_exe[s-strings[0]] = 0;
+ init_libbt();
+ print_stack_value(addr);
+ }
+ }
+#endif
+ else
+ {
+ fprintf(stderr, " %s\n", strings[0]);
+ }
+ free(strings);
+}
+
+static void Memento_initStacktracer(void)
+{
+ print_stack_value = print_stack_default;
+}
+
+static int Memento_getStacktrace(void **stack, int *skip)
+{
+ size_t num;
+
+ num = backtrace(&stack[0], MEMENTO_BACKTRACE_MAX);
+
+ *skip = SkipStackBackTraceLevels;
+ if (num <= SkipStackBackTraceLevels)
+ return 0;
+ return (int)(num-SkipStackBackTraceLevels);
+}
+
+static void Memento_showStacktrace(void **stack, int numberOfFrames)
+{
+ int i;
+
+ for (i = 0; i < numberOfFrames; i++)
+ {
+ print_stack_value(stack[i]);
+ }
+}
+#elif defined(MEMENTO_STACKTRACE_METHOD) && MEMENTO_STACKTRACE_METHOD == 2
+#include <Windows.h>
+
+/* We use DbgHelp.dll rather than DbgHelp.lib. This avoids us needing
+ * extra link time complications, and enables us to fall back gracefully
+ * if the DLL cannot be found.
+ *
+ * To achieve this we have our own potted versions of the required types
+ * inline here.
+ */
+#ifdef _WIN64
+typedef DWORD64 DWORD_NATIVESIZED;
+#else
+typedef DWORD DWORD_NATIVESIZED;
+#endif
+
+#define MEMENTO_BACKTRACE_MAX 64
+
+typedef USHORT (__stdcall *My_CaptureStackBackTraceType)(__in ULONG, __in ULONG, __out PVOID*, __out_opt PULONG);
+
+typedef struct MY_IMAGEHLP_LINE {
+ DWORD SizeOfStruct;
+ PVOID Key;
+ DWORD LineNumber;
+ PCHAR FileName;
+ DWORD_NATIVESIZED Address;
+} MY_IMAGEHLP_LINE, *MY_PIMAGEHLP_LINE;
+
+typedef BOOL (__stdcall *My_SymGetLineFromAddrType)(HANDLE hProcess, DWORD_NATIVESIZED dwAddr, PDWORD pdwDisplacement, MY_PIMAGEHLP_LINE Line);
+
+typedef struct MY_SYMBOL_INFO {
+ ULONG SizeOfStruct;
+ ULONG TypeIndex; // Type Index of symbol
+ ULONG64 Reserved[2];
+ ULONG info;
+ ULONG Size;
+ ULONG64 ModBase; // Base Address of module containing this symbol
+ ULONG Flags;
+ ULONG64 Value; // Value of symbol, ValuePresent should be 1
+ ULONG64 Address; // Address of symbol including base address of module
+ ULONG Register; // register holding value or pointer to value
+ ULONG Scope; // scope of the symbol
+ ULONG Tag; // pdb classification
+ ULONG NameLen; // Actual length of name
+ ULONG MaxNameLen;
+ CHAR Name[1]; // Name of symbol
+} MY_SYMBOL_INFO, *MY_PSYMBOL_INFO;
+
+typedef BOOL (__stdcall *My_SymFromAddrType)(HANDLE hProcess, DWORD64 Address, PDWORD64 Displacement, MY_PSYMBOL_INFO Symbol);
+typedef BOOL (__stdcall *My_SymInitializeType)(HANDLE hProcess, PSTR UserSearchPath, BOOL fInvadeProcess);
+
+static My_CaptureStackBackTraceType Memento_CaptureStackBackTrace;
+static My_SymGetLineFromAddrType Memento_SymGetLineFromAddr;
+static My_SymFromAddrType Memento_SymFromAddr;
+static My_SymInitializeType Memento_SymInitialize;
+static HANDLE Memento_process;
+
+static void Memento_initStacktracer(void)
+{
+ HMODULE mod = LoadLibrary("kernel32.dll");
+
+ if (mod == NULL)
+ return;
+ Memento_CaptureStackBackTrace = (My_CaptureStackBackTraceType)(GetProcAddress(mod, "RtlCaptureStackBackTrace"));
+ if (Memento_CaptureStackBackTrace == NULL)
+ return;
+ mod = LoadLibrary("Dbghelp.dll");
+ if (mod == NULL) {
+ Memento_CaptureStackBackTrace = NULL;
+ return;
+ }
+ Memento_SymGetLineFromAddr =
+ (My_SymGetLineFromAddrType)(GetProcAddress(mod,
+#ifdef _WIN64
+ "SymGetLineFromAddr64"
+#else
+ "SymGetLineFromAddr"
+#endif
+ ));
+ if (Memento_SymGetLineFromAddr == NULL) {
+ Memento_CaptureStackBackTrace = NULL;
+ return;
+ }
+ Memento_SymFromAddr = (My_SymFromAddrType)(GetProcAddress(mod, "SymFromAddr"));
+ if (Memento_SymFromAddr == NULL) {
+ Memento_CaptureStackBackTrace = NULL;
+ return;
+ }
+ Memento_SymInitialize = (My_SymInitializeType)(GetProcAddress(mod, "SymInitialize"));
+ if (Memento_SymInitialize == NULL) {
+ Memento_CaptureStackBackTrace = NULL;
+ return;
+ }
+ Memento_process = GetCurrentProcess();
+ Memento_SymInitialize(Memento_process, NULL, TRUE);
+}
+
+static int Memento_getStacktrace(void **stack, int *skip)
+{
+ if (Memento_CaptureStackBackTrace == NULL)
+ return 0;
+
+ *skip = 0;
+ /* Limit us to 63 levels due to windows bug */
+ return Memento_CaptureStackBackTrace(SkipStackBackTraceLevels, 63-SkipStackBackTraceLevels, stack, NULL);
+}
+
+static void Memento_showStacktrace(void **stack, int numberOfFrames)
+{
+ MY_IMAGEHLP_LINE line;
+ int i;
+ char symbol_buffer[sizeof(MY_SYMBOL_INFO) + 1024 + 1];
+ MY_SYMBOL_INFO *symbol = (MY_SYMBOL_INFO *)symbol_buffer;
+
+ symbol->MaxNameLen = 1024;
+ symbol->SizeOfStruct = sizeof(MY_SYMBOL_INFO);
+ line.SizeOfStruct = sizeof(MY_IMAGEHLP_LINE);
+ for (i = 0; i < numberOfFrames; i++)
+ {
+ DWORD64 dwDisplacement64;
+ DWORD dwDisplacement;
+ Memento_SymFromAddr(Memento_process, (DWORD64)(stack[i]), &dwDisplacement64, symbol);
+ Memento_SymGetLineFromAddr(Memento_process, (DWORD_NATIVESIZED)(stack[i]), &dwDisplacement, &line);
+ fprintf(stderr, " %s in %s:%d\n", symbol->Name, line.FileName, line.LineNumber);
+ }
+}
+#elif defined(MEMENTO_STACKTRACE_METHOD) && MEMENTO_STACKTRACE_METHOD == 3
+
+#include <unwind.h>
+#include <dlfcn.h>
+
+/* From cxxabi.h */
+extern char* __cxa_demangle(const char* mangled_name,
+ char* output_buffer,
+ size_t* length,
+ int* status);
+
+static void Memento_initStacktracer(void)
+{
+}
+
+#define MEMENTO_BACKTRACE_MAX 256
+
+typedef struct
+{
+ int count;
+ void **addr;
+} my_unwind_details;
+
+static _Unwind_Reason_Code unwind_populate_callback(struct _Unwind_Context *context,
+ void *arg)
+{
+ my_unwind_details *uw = (my_unwind_details *)arg;
+ int count = uw->count;
+
+ if (count >= MEMENTO_BACKTRACE_MAX)
+ return _URC_END_OF_STACK;
+
+ uw->addr[count] = (void *)_Unwind_GetIP(context);
+ uw->count++;
+
+ return _URC_NO_REASON;
+}
+
+static int Memento_getStacktrace(void **stack, int *skip)
+{
+ my_unwind_details uw = { 0, stack };
+
+ *skip = 0;
+
+ /* Collect the backtrace. Deliberately only unwind once,
+ * and avoid using malloc etc until this completes just
+ * in case. */
+ _Unwind_Backtrace(unwind_populate_callback, &uw);
+ if (uw.count <= SkipStackBackTraceLevels)
+ return 0;
+
+ *skip = SkipStackBackTraceLevels;
+ return uw.count-SkipStackBackTraceLevels;
+}
+
+static void Memento_showStacktrace(void **stack, int numberOfFrames)
+{
+ int i;
+
+ for (i = 0; i < numberOfFrames; i++)
+ {
+ Dl_info info;
+ if (dladdr(stack[i], &info))
+ {
+ int status = 0;
+ const char *sym = info.dli_sname ? info.dli_sname : "<unknown>";
+ char *demangled = __cxa_demangle(sym, NULL, 0, &status);
+ int offset = stack[i] - info.dli_saddr;
+ fprintf(stderr, " ["FMTP"]%s(+0x%x)\n", stack[i], demangled && status == 0 ? demangled : sym, offset);
+ free(demangled);
+ }
+ else
+ {
+ fprintf(stderr, " ["FMTP"]\n", stack[i]);
+ }
+ }
+}
+
+#else
+static void Memento_initStacktracer(void)
+{
+}
+
+static int Memento_getStacktrace(void **stack, int *skip)
+{
+ *skip = 0;
+ return 0;
+}
+
+static void Memento_showStacktrace(void **stack, int numberOfFrames)
+{
+}
+#endif /* MEMENTO_STACKTRACE_METHOD */
+
+#ifdef MEMENTO_DETAILS
+static void Memento_storeDetails(Memento_BlkHeader *head, int type)
+{
+ void *stack[MEMENTO_BACKTRACE_MAX];
+ Memento_BlkDetails *details;
+ int count;
+ int skip;
+
+ if (head == NULL)
+ return;
+
+#ifdef MEMENTO_STACKTRACE_METHOD
+ count = Memento_getStacktrace(stack, &skip);
+#else
+ skip = 0;
+ count = 0;
+#endif
+
+ details = MEMENTO_UNDERLYING_MALLOC(sizeof(*details) + (count-1) * sizeof(void *));
+ if (details == NULL)
+ return;
+
+ if (count)
+ memcpy(&details->stack, &stack[skip], count * sizeof(void *));
+
+ details->type = (char)type;
+ details->count = (char)count;
+ details->sequence = memento.sequence;
+ details->next = NULL;
+ VALGRIND_MAKE_MEM_DEFINED(&head->details_tail, sizeof(head->details_tail));
+ *head->details_tail = details;
+ head->details_tail = &details->next;
+ VALGRIND_MAKE_MEM_NOACCESS(&head->details_tail, sizeof(head->details_tail));
+}
+#endif
+
+void (Memento_bt)(void)
+{
+#ifdef MEMENTO_STACKTRACE_METHOD
+ void *stack[MEMENTO_BACKTRACE_MAX];
+ int count;
+ int skip;
+
+ count = Memento_getStacktrace(stack, &skip);
+ Memento_showStacktrace(&stack[skip-2], count-skip+2);
+#endif
+}
+
+static void Memento_bt_internal(int skip2)
+{
+#ifdef MEMENTO_STACKTRACE_METHOD
+ void *stack[MEMENTO_BACKTRACE_MAX];
+ int count;
+ int skip;
+
+ count = Memento_getStacktrace(stack, &skip);
+ Memento_showStacktrace(&stack[skip+skip2], count-skip-skip2);
+#endif
+}
+
+static int Memento_checkAllMemoryLocked(void);
+
+void Memento_breakpoint(void)
+{
+ /* A handy externally visible function for breakpointing */
+#if 0 /* Enable this to force automatic breakpointing */
+#ifndef NDEBUG
+#ifdef _MSC_VER
+ __asm int 3;
+#endif
+#endif
+#endif
+}
+
+static void Memento_init(void);
+
+#define MEMENTO_LOCK() \
+do { if (!memento.inited) Memento_init(); MEMENTO_DO_LOCK(); } while (0)
+
+#define MEMENTO_UNLOCK() \
+do { MEMENTO_DO_UNLOCK(); } while (0)
+
+/* Do this as a macro to prevent another level in the callstack,
+ * which is annoying while stepping. */
+#define Memento_breakpointLocked() \
+do { MEMENTO_UNLOCK(); Memento_breakpoint(); MEMENTO_LOCK(); } while (0)
+
+static void Memento_addBlockHead(Memento_Blocks *blks,
+ Memento_BlkHeader *b,
+ int type)
+{
+ if (blks->tail == NULL)
+ blks->tail = b;
+ b->next = blks->head;
+ b->prev = NULL;
+ if (blks->head)
+ {
+ VALGRIND_MAKE_MEM_DEFINED(&blks->head->prev, sizeof(blks->head->prev));
+ blks->head->prev = b;
+ VALGRIND_MAKE_MEM_NOACCESS(&blks->head->prev, sizeof(blks->head->prev));
+ }
+ blks->head = b;
+#ifndef MEMENTO_LEAKONLY
+ memset(b->preblk, MEMENTO_PREFILL, Memento_PreSize);
+ memset(MEMBLK_POSTPTR(b), MEMENTO_POSTFILL, Memento_PostSize);
+#endif
+ VALGRIND_MAKE_MEM_NOACCESS(MEMBLK_POSTPTR(b), Memento_PostSize);
+ if (type == 0) { /* malloc */
+ VALGRIND_MAKE_MEM_UNDEFINED(MEMBLK_TOBLK(b), b->rawsize);
+ } else if (type == 1) { /* free */
+ VALGRIND_MAKE_MEM_NOACCESS(MEMBLK_TOBLK(b), b->rawsize);
+ }
+ VALGRIND_MAKE_MEM_NOACCESS(b, sizeof(Memento_BlkHeader));
+}
+
+static void Memento_addBlockTail(Memento_Blocks *blks,
+ Memento_BlkHeader *b,
+ int type)
+{
+ VALGRIND_MAKE_MEM_DEFINED(&blks->tail, sizeof(Memento_BlkHeader *));
+ if (blks->head == NULL)
+ blks->head = b;
+ b->prev = blks->tail;
+ b->next = NULL;
+ if (blks->tail) {
+ VALGRIND_MAKE_MEM_DEFINED(&blks->tail->next, sizeof(blks->tail->next));
+ blks->tail->next = b;
+ VALGRIND_MAKE_MEM_NOACCESS(&blks->tail->next, sizeof(blks->tail->next));
+ }
+ blks->tail = b;
+#ifndef MEMENTO_LEAKONLY
+ memset(b->preblk, MEMENTO_PREFILL, Memento_PreSize);
+ memset(MEMBLK_POSTPTR(b), MEMENTO_POSTFILL, Memento_PostSize);
+#endif
+ VALGRIND_MAKE_MEM_NOACCESS(MEMBLK_POSTPTR(b), Memento_PostSize);
+ if (type == 0) { /* malloc */
+ VALGRIND_MAKE_MEM_UNDEFINED(MEMBLK_TOBLK(b), b->rawsize);
+ } else if (type == 1) { /* free */
+ VALGRIND_MAKE_MEM_NOACCESS(MEMBLK_TOBLK(b), b->rawsize);
+ }
+ VALGRIND_MAKE_MEM_NOACCESS(b, sizeof(Memento_BlkHeader));
+ VALGRIND_MAKE_MEM_NOACCESS(&blks->tail, sizeof(Memento_BlkHeader *));
+}
+
+typedef struct BlkCheckData {
+ int found;
+ int preCorrupt;
+ int postCorrupt;
+ int freeCorrupt;
+ size_t index;
+} BlkCheckData;
+
+#ifndef MEMENTO_LEAKONLY
+static int Memento_Internal_checkAllocedBlock(Memento_BlkHeader *b, void *arg)
+{
+ int i;
+ MEMENTO_UINT32 *ip;
+ unsigned char *p;
+ BlkCheckData *data = (BlkCheckData *)arg;
+
+ ip = (MEMENTO_UINT32 *)(void *)(b->preblk);
+ i = Memento_PreSize>>2;
+ do {
+ if (*ip++ != MEMENTO_PREFILL_UINT32)
+ goto pre_corrupt;
+ } while (--i);
+ if (0) {
+pre_corrupt:
+ data->preCorrupt = 1;
+ }
+ /* Postfill may not be aligned, so have to be slower */
+ p = MEMBLK_POSTPTR(b);
+ i = Memento_PostSize-4;
+ if ((intptr_t)p & 1)
+ {
+ if (*p++ != MEMENTO_POSTFILL)
+ goto post_corrupt;
+ i--;
+ }
+ if ((intptr_t)p & 2)
+ {
+ if (*(MEMENTO_UINT16 *)p != MEMENTO_POSTFILL_UINT16)
+ goto post_corrupt;
+ p += 2;
+ i -= 2;
+ }
+ do {
+ if (*(MEMENTO_UINT32 *)p != MEMENTO_POSTFILL_UINT32)
+ goto post_corrupt;
+ p += 4;
+ i -= 4;
+ } while (i >= 0);
+ if (i & 2)
+ {
+ if (*(MEMENTO_UINT16 *)p != MEMENTO_POSTFILL_UINT16)
+ goto post_corrupt;
+ p += 2;
+ }
+ if (i & 1)
+ {
+ if (*p != MEMENTO_POSTFILL)
+ goto post_corrupt;
+ }
+ if (0) {
+post_corrupt:
+ data->postCorrupt = 1;
+ }
+ if ((data->freeCorrupt | data->preCorrupt | data->postCorrupt) == 0) {
+ b->lastCheckedOK = memento.sequence;
+ }
+ data->found |= 1;
+ return 0;
+}
+
+static int Memento_Internal_checkFreedBlock(Memento_BlkHeader *b, void *arg)
+{
+ size_t i;
+ unsigned char *p;
+ BlkCheckData *data = (BlkCheckData *)arg;
+
+ p = MEMBLK_TOBLK(b); /* p will always be aligned */
+ i = b->rawsize;
+ /* Attempt to speed this up by checking an (aligned) int at a time */
+ if (i >= 4) {
+ i -= 4;
+ do {
+ if (*(MEMENTO_UINT32 *)p != MEMENTO_FREEFILL_UINT32)
+ goto mismatch4;
+ p += 4;
+ i -= 4;
+ } while (i > 0);
+ i += 4;
+ }
+ if (i & 2) {
+ if (*(MEMENTO_UINT16 *)p != MEMENTO_FREEFILL_UINT16)
+ goto mismatch;
+ p += 2;
+ i -= 2;
+ }
+ if (0) {
+mismatch4:
+ i += 4;
+ }
+mismatch:
+ while (i) {
+ if (*p++ != (unsigned char)MEMENTO_FREEFILL)
+ break;
+ i--;
+ }
+ if (i) {
+ data->freeCorrupt = 1;
+ data->index = b->rawsize-i;
+ }
+ return Memento_Internal_checkAllocedBlock(b, arg);
+}
+#endif /* MEMENTO_LEAKONLY */
+
+static void Memento_removeBlock(Memento_Blocks *blks,
+ Memento_BlkHeader *b)
+{
+ VALGRIND_MAKE_MEM_DEFINED(b, sizeof(*b));
+ if (b->next) {
+ VALGRIND_MAKE_MEM_DEFINED(&b->next->prev, sizeof(b->next->prev));
+ b->next->prev = b->prev;
+ VALGRIND_MAKE_MEM_NOACCESS(&b->next->prev, sizeof(b->next->prev));
+ }
+ if (b->prev) {
+ VALGRIND_MAKE_MEM_DEFINED(&b->prev->next, sizeof(b->prev->next));
+ b->prev->next = b->next;
+ VALGRIND_MAKE_MEM_NOACCESS(&b->prev->next, sizeof(b->prev->next));
+ }
+ if (blks->tail == b)
+ blks->tail = b->prev;
+ if (blks->head == b)
+ blks->head = b->next;
+}
+
+static void free_block(Memento_BlkHeader *head)
+{
+#ifdef MEMENTO_DETAILS
+ Memento_BlkDetails *details = head->details;
+
+ while (details)
+ {
+ Memento_BlkDetails *next = details->next;
+ MEMENTO_UNDERLYING_FREE(details);
+ details = next;
+ }
+#endif
+ MEMENTO_UNDERLYING_FREE(head);
+}
+
+static int Memento_Internal_makeSpace(size_t space)
+{
+ /* If too big, it can never go on the freelist */
+ if (space > MEMENTO_FREELIST_MAX_SINGLE_BLOCK)
+ return 0;
+ /* Pretend we added it on. */
+ memento.freeListSize += space;
+ /* Ditch blocks until it fits within our limit */
+ while (memento.freeListSize > MEMENTO_FREELIST_MAX) {
+ Memento_BlkHeader *head = memento.free.head;
+ VALGRIND_MAKE_MEM_DEFINED(head, sizeof(*head));
+ memento.free.head = head->next;
+ memento.freeListSize -= MEMBLK_SIZE(head->rawsize);
+ free_block(head);
+ }
+ /* Make sure we haven't just completely emptied the free list */
+ /* (This should never happen, but belt and braces... */
+ if (memento.free.head == NULL)
+ memento.free.tail = NULL;
+ return 1;
+}
+
+static int Memento_appBlocks(Memento_Blocks *blks,
+ int (*app)(Memento_BlkHeader *,
+ void *),
+ void *arg)
+{
+ Memento_BlkHeader *head = blks->head;
+ Memento_BlkHeader *next;
+ int result;
+ while (head) {
+ VALGRIND_MAKE_MEM_DEFINED(head, sizeof(Memento_BlkHeader));
+ VALGRIND_MAKE_MEM_DEFINED(MEMBLK_TOBLK(head),
+ head->rawsize + Memento_PostSize);
+ result = app(head, arg);
+ next = head->next;
+ VALGRIND_MAKE_MEM_NOACCESS(MEMBLK_POSTPTR(head), Memento_PostSize);
+ VALGRIND_MAKE_MEM_NOACCESS(head, sizeof(Memento_BlkHeader));
+ if (result)
+ return result;
+ head = next;
+ }
+ return 0;
+}
+
+#ifndef MEMENTO_LEAKONLY
+/* Distrustful - check the block is a real one */
+static int Memento_appBlockUser(Memento_Blocks *blks,
+ int (*app)(Memento_BlkHeader *,
+ void *),
+ void *arg,
+ Memento_BlkHeader *b)
+{
+ Memento_BlkHeader *head = blks->head;
+ Memento_BlkHeader *next;
+ int result;
+ while (head && head != b) {
+ VALGRIND_MAKE_MEM_DEFINED(head, sizeof(Memento_BlkHeader));
+ next = head->next;
+ VALGRIND_MAKE_MEM_NOACCESS(MEMBLK_POSTPTR(head), Memento_PostSize);
+ head = next;
+ }
+ if (head == b) {
+ VALGRIND_MAKE_MEM_DEFINED(head, sizeof(Memento_BlkHeader));
+ VALGRIND_MAKE_MEM_DEFINED(MEMBLK_TOBLK(head),
+ head->rawsize + Memento_PostSize);
+ result = app(head, arg);
+ VALGRIND_MAKE_MEM_NOACCESS(MEMBLK_POSTPTR(head), Memento_PostSize);
+ VALGRIND_MAKE_MEM_NOACCESS(head, sizeof(Memento_BlkHeader));
+ return result;
+ }
+ return 0;
+}
+
+static int Memento_appBlock(Memento_Blocks *blks,
+ int (*app)(Memento_BlkHeader *,
+ void *),
+ void *arg,
+ Memento_BlkHeader *b)
+{
+ int result;
+ (void)blks;
+ VALGRIND_MAKE_MEM_DEFINED(b, sizeof(Memento_BlkHeader));
+ VALGRIND_MAKE_MEM_DEFINED(MEMBLK_TOBLK(b),
+ b->rawsize + Memento_PostSize);
+ result = app(b, arg);
+ VALGRIND_MAKE_MEM_NOACCESS(MEMBLK_POSTPTR(b), Memento_PostSize);
+ VALGRIND_MAKE_MEM_NOACCESS(b, sizeof(Memento_BlkHeader));
+ return result;
+}
+#endif /* MEMENTO_LEAKONLY */
+
+static int showBlock(Memento_BlkHeader *b, int space)
+{
+ int seq;
+ VALGRIND_MAKE_MEM_DEFINED(b, sizeof(Memento_BlkHeader));
+ fprintf(stderr, FMTP":(size=" FMTZ ",num=%d)",
+ MEMBLK_TOBLK(b), (FMTZ_CAST)b->rawsize, b->sequence);
+ if (b->label)
+ fprintf(stderr, "%c(%s)", space, b->label);
+ if (b->flags & Memento_Flag_KnownLeak)
+ fprintf(stderr, "(Known Leak)");
+ seq = b->sequence;
+ VALGRIND_MAKE_MEM_NOACCESS(b, sizeof(Memento_BlkHeader));
+ return seq;
+}
+
+static void blockDisplay(Memento_BlkHeader *b, int n)
+{
+ n++;
+ while (n > 40)
+ {
+ fprintf(stderr, "*");
+ n -= 40;
+ }
+ while(n > 0)
+ {
+ int i = n;
+ if (i > 32)
+ i = 32;
+ n -= i;
+ fprintf(stderr, "%s", &" "[32-i]);
+ }
+ showBlock(b, '\t');
+ fprintf(stderr, "\n");
+}
+
+static int Memento_listBlock(Memento_BlkHeader *b,
+ void *arg)
+{
+ size_t *counts = (size_t *)arg;
+ blockDisplay(b, 0);
+ counts[0]++;
+ VALGRIND_MAKE_MEM_DEFINED(b, sizeof(Memento_BlkHeader));
+ counts[1]+= b->rawsize;
+ VALGRIND_MAKE_MEM_NOACCESS(b, sizeof(Memento_BlkHeader));
+ return 0;
+}
+
+static void doNestedDisplay(Memento_BlkHeader *b,
+ int depth)
+{
+ /* Try and avoid recursion if we can help it */
+ do {
+ Memento_BlkHeader *c = NULL;
+ blockDisplay(b, depth);
+ VALGRIND_MAKE_MEM_DEFINED(b, sizeof(Memento_BlkHeader));
+ if (b->sibling) {
+ c = b->child;
+ b = b->sibling;
+ } else {
+ b = b->child;
+ depth++;
+ }
+ VALGRIND_MAKE_MEM_NOACCESS(b, sizeof(Memento_BlkHeader));
+ if (c)
+ doNestedDisplay(c, depth+1);
+ } while (b);
+}
+
+static int ptrcmp(const void *a_, const void *b_)
+{
+ const char **a = (const char **)a_;
+ const char **b = (const char **)b_;
+ return (int)(*a-*b);
+}
+
+static
+int Memento_listBlocksNested(void)
+{
+ int count, i;
+ size_t size;
+ Memento_BlkHeader *b, *prev;
+ void **blocks, *minptr, *maxptr;
+ intptr_t mask;
+
+ /* Count the blocks */
+ count = 0;
+ size = 0;
+ for (b = memento.used.head; b; b = b->next) {
+ VALGRIND_MAKE_MEM_DEFINED(b, sizeof(*b));
+ size += b->rawsize;
+ count++;
+ }
+
+ /* Make our block list */
+ blocks = MEMENTO_UNDERLYING_MALLOC(sizeof(void *) * count);
+ if (blocks == NULL)
+ return 1;
+
+ /* Populate our block list */
+ b = memento.used.head;
+ minptr = maxptr = MEMBLK_TOBLK(b);
+ mask = (intptr_t)minptr;
+ for (i = 0; b; b = b->next, i++) {
+ void *p = MEMBLK_TOBLK(b);
+ mask &= (intptr_t)p;
+ if (p < minptr)
+ minptr = p;
+ if (p > maxptr)
+ maxptr = p;
+ blocks[i] = p;
+ b->flags &= ~Memento_Flag_HasParent;
+ b->child = NULL;
+ b->sibling = NULL;
+ b->prev = NULL; /* parent */
+ }
+ qsort(blocks, count, sizeof(void *), ptrcmp);
+
+ /* Now, calculate tree */
+ for (b = memento.used.head; b; b = b->next) {
+ char *p = MEMBLK_TOBLK(b);
+ size_t end = (b->rawsize < MEMENTO_PTRSEARCH ? b->rawsize : MEMENTO_PTRSEARCH);
+ size_t z;
+ VALGRIND_MAKE_MEM_DEFINED(p, end);
+ end -= sizeof(void *)-1;
+ for (z = MEMENTO_SEARCH_SKIP; z < end; z += sizeof(void *)) {
+ void *q = *(void **)(&p[z]);
+ void **r;
+
+ /* Do trivial checks on pointer */
+ if ((mask & (intptr_t)q) != mask || q < minptr || q > maxptr)
+ continue;
+
+ /* Search for pointer */
+ r = bsearch(&q, blocks, count, sizeof(void *), ptrcmp);
+ if (r) {
+ /* Found child */
+ Memento_BlkHeader *child = MEMBLK_FROMBLK(*r);
+ Memento_BlkHeader *parent;
+
+ /* We're assuming tree structure, not graph - ignore second
+ * and subsequent pointers. */
+ if (child->prev != NULL) /* parent */
+ continue;
+ if (child->flags & Memento_Flag_HasParent)
+ continue;
+
+ /* Not interested in pointers to ourself! */
+ if (child == b)
+ continue;
+
+ /* We're also assuming acyclicness here. If this is one of
+ * our parents, ignore it. */
+ parent = b->prev; /* parent */
+ while (parent != NULL && parent != child)
+ parent = parent->prev; /* parent */
+ if (parent == child)
+ continue;
+
+ child->sibling = b->child;
+ b->child = child;
+ child->prev = b; /* parent */
+ child->flags |= Memento_Flag_HasParent;
+ }
+ }
+ }
+
+ /* Now display with nesting */
+ for (b = memento.used.head; b; b = b->next) {
+ if ((b->flags & Memento_Flag_HasParent) == 0)
+ doNestedDisplay(b, 0);
+ }
+ fprintf(stderr, " Total number of blocks = %d\n", count);
+ fprintf(stderr, " Total size of blocks = "FMTZ"\n", (FMTZ_CAST)size);
+
+ MEMENTO_UNDERLYING_FREE(blocks);
+
+ /* Now put the blocks back for valgrind, and restore the prev
+ * and magic values. */
+ prev = NULL;
+ for (b = memento.used.head; b;) {
+ Memento_BlkHeader *next = b->next;
+ b->prev = prev;
+ b->child = MEMENTO_CHILD_MAGIC;
+ b->sibling = MEMENTO_SIBLING_MAGIC;
+ prev = b;
+ VALGRIND_MAKE_MEM_NOACCESS(b, sizeof(*b));
+ b = next;
+ }
+
+ return 0;
+}
+
+void Memento_listBlocks(void)
+{
+ MEMENTO_LOCK();
+ fprintf(stderr, "Allocated blocks:\n");
+ if (Memento_listBlocksNested())
+ {
+ size_t counts[2];
+ counts[0] = 0;
+ counts[1] = 0;
+ Memento_appBlocks(&memento.used, Memento_listBlock, &counts[0]);
+ fprintf(stderr, " Total number of blocks = "FMTZ"\n", (FMTZ_CAST)counts[0]);
+ fprintf(stderr, " Total size of blocks = "FMTZ"\n", (FMTZ_CAST)counts[1]);
+ }
+ MEMENTO_UNLOCK();
+}
+
+static int Memento_listNewBlock(Memento_BlkHeader *b,
+ void *arg)
+{
+ if (b->flags & Memento_Flag_OldBlock)
+ return 0;
+ b->flags |= Memento_Flag_OldBlock;
+ return Memento_listBlock(b, arg);
+}
+
+void Memento_listNewBlocks(void)
+{
+ size_t counts[2];
+ MEMENTO_LOCK();
+ counts[0] = 0;
+ counts[1] = 0;
+ fprintf(stderr, "Blocks allocated and still extant since last list:\n");
+ Memento_appBlocks(&memento.used, Memento_listNewBlock, &counts[0]);
+ fprintf(stderr, " Total number of blocks = "FMTZ"\n", (FMTZ_CAST)counts[0]);
+ fprintf(stderr, " Total size of blocks = "FMTZ"\n", (FMTZ_CAST)counts[1]);
+ MEMENTO_UNLOCK();
+}
+
+static void Memento_endStats(void)
+{
+ fprintf(stderr, "Total memory malloced = "FMTZ" bytes\n", (FMTZ_CAST)memento.totalAlloc);
+ fprintf(stderr, "Peak memory malloced = "FMTZ" bytes\n", (FMTZ_CAST)memento.peakAlloc);
+ fprintf(stderr, FMTZ" mallocs, "FMTZ" frees, "FMTZ" reallocs\n", (FMTZ_CAST)memento.numMallocs,
+ (FMTZ_CAST)memento.numFrees, (FMTZ_CAST)memento.numReallocs);
+ fprintf(stderr, "Average allocation size "FMTZ" bytes\n", (FMTZ_CAST)
+ (memento.numMallocs != 0 ? memento.totalAlloc/memento.numMallocs: 0));
+}
+
+void Memento_stats(void)
+{
+ MEMENTO_LOCK();
+ fprintf(stderr, "Current memory malloced = "FMTZ" bytes\n", (FMTZ_CAST)memento.alloc);
+ Memento_endStats();
+ MEMENTO_UNLOCK();
+}
+
+#ifdef MEMENTO_DETAILS
+static int showInfo(Memento_BlkHeader *b, void *arg)
+{
+ Memento_BlkDetails *details;
+
+ (void)arg;
+
+ fprintf(stderr, FMTP":(size="FMTZ",num=%d)",
+ MEMBLK_TOBLK(b), (FMTZ_CAST)b->rawsize, b->sequence);
+ if (b->label)
+ fprintf(stderr, " (%s)", b->label);
+ fprintf(stderr, "\nEvents:\n");
+
+ for (details = b->details; details; details = details->next)
+ {
+ if (memento.hideMultipleReallocs &&
+ details->type == Memento_EventType_realloc &&
+ details->next &&
+ details->next->type == Memento_EventType_realloc) {
+ continue;
+ }
+ fprintf(stderr, " Event %d (%s)\n", details->sequence, eventType[(int)details->type]);
+ Memento_showStacktrace(details->stack, details->count);
+ }
+ return 0;
+}
+#endif
+
+void Memento_listBlockInfo(void)
+{
+#ifdef MEMENTO_DETAILS
+ MEMENTO_LOCK();
+ fprintf(stderr, "Details of allocated blocks:\n");
+ Memento_appBlocks(&memento.used, showInfo, NULL);
+ MEMENTO_UNLOCK();
+#endif
+}
+
+static int Memento_nonLeakBlocksLeaked(void)
+{
+ Memento_BlkHeader *blk = memento.used.head;
+ while (blk)
+ {
+ Memento_BlkHeader *next;
+ int leaked;
+ VALGRIND_MAKE_MEM_DEFINED(blk, sizeof(*blk));
+ leaked = ((blk->flags & Memento_Flag_KnownLeak) == 0);
+ next = blk->next;
+ VALGRIND_MAKE_MEM_DEFINED(blk, sizeof(*blk));
+ if (leaked)
+ return 1;
+ blk = next;
+ }
+ return 0;
+}
+
+void Memento_fin(void)
+{
+ Memento_checkAllMemory();
+ if (!memento.segv)
+ {
+ Memento_endStats();
+ if (Memento_nonLeakBlocksLeaked()) {
+ Memento_listBlocks();
+#ifdef MEMENTO_DETAILS
+ fprintf(stderr, "\n");
+ Memento_listBlockInfo();
+#endif
+ Memento_breakpoint();
+ }
+ }
+ if (memento.squeezing) {
+ if (memento.pattern == 0)
+ fprintf(stderr, "Memory squeezing @ %d complete%s\n", memento.squeezeAt, memento.segv ? " (with SEGV)" : "");
+ else
+ fprintf(stderr, "Memory squeezing @ %d (%d) complete%s\n", memento.squeezeAt, memento.pattern, memento.segv ? " (with SEGV)" : "");
+ } else if (memento.segv) {
+ fprintf(stderr, "Memento completed (with SEGV)\n");
+ }
+ if (memento.failing)
+ {
+ fprintf(stderr, "MEMENTO_FAILAT=%d\n", memento.failAt);
+ fprintf(stderr, "MEMENTO_PATTERN=%d\n", memento.pattern);
+ }
+ if (memento.nextFailAt != 0)
+ {
+ fprintf(stderr, "MEMENTO_NEXTFAILAT=%d\n", memento.nextFailAt);
+ fprintf(stderr, "MEMENTO_NEXTPATTERN=%d\n", memento.nextPattern);
+ }
+ if (Memento_nonLeakBlocksLeaked() && memento.abortOnLeak) {
+ fprintf(stderr, "Calling abort() because blocks were leaked and MEMENTO_ABORT_ON_LEAK is set.\n");
+ abort();
+ }
+}
+
+/* Reads number from <text> using strtol().
+ *
+ * Params:
+ * text:
+ * text to read.
+ * out:
+ * pointer to output value.
+ * relative:
+ * *relative set to 1 if <text> starts with '+' or '-', else set to 0.
+ * end:
+ * *end is set to point to next unread character after number.
+ *
+ * Returns 0 on success, else -1.
+ */
+static int read_number(const char *text, int *out, int *relative, char **end)
+{
+ if (text[0] == '+' || text[0] == '-')
+ *relative = 1;
+ else
+ *relative = 0;
+ errno = 0;
+ *out = (int)strtol(text, end, 0 /*base*/);
+ if (errno || *end == text)
+ {
+ fprintf(stderr, "Failed to parse number at start of '%s'.\n", text);
+ return -1;
+ }
+ if (0)
+ fprintf(stderr, "text='%s': *out=%i *relative=%i\n",
+ text, *out, *relative);
+ return 0;
+}
+
+/* Reads number plus optional delta value from <text>.
+ *
+ * Evaluates <number> or <number>[+|-<delta>]. E.g. text='1234+2' sets *out=1236,
+ * text='1234-1' sets *out=1233.
+ *
+ * Params:
+ * text:
+ * text to read.
+ * out:
+ * pointer to output value.
+ * end:
+ * *end is set to point to next unread character after number.
+ *
+ * Returns 0 on success, else -1.
+ */
+static int read_number_delta(const char *text, int *out, char **end)
+{
+ int e;
+ int relative;
+
+ e = read_number(text, out, &relative, end);
+ if (e)
+ return e;
+ if (relative) {
+ fprintf(stderr, "Base number should not start with '+' or '-' at start of '%s'.\n",
+ text);
+ return -1;
+ }
+ if (*end) {
+ if (**end == '-' || **end == '+') {
+ int delta;
+ e = read_number(*end, &delta, &relative, end);
+ if (e)
+ return e;
+ *out += delta;
+ }
+ }
+ if (0) fprintf(stderr, "text='%s': *out=%i\n", text, *out);
+
+ return 0;
+}
+
+/* Reads range.
+ *
+ * E.g.:
+ * text='115867-2' sets *begin=115865 *end=115866.
+ * text='115867-1..+3' sets *begin=115866 *end=115869.
+ *
+ * Supported patterns for text:
+ * <range>
+ * <value> - returns *begin=value *end=*begin+1.
+ * <value1>..<value2> - returns *begin=value1 *end=value2.
+ * <value>..+<number> - returns *begin=value *end=*begin+number.
+ * <value>
+ * <number>
+ * <number>+<number>
+ * <number>-<number>
+ *
+ * <number>: [0-9]+
+ *
+ * If not specified, *end defaults to *begin+1.
+ *
+ * Returns 0 on success, else -1, with *string_end pointing to first unused
+ * character.
+ */
+static int read_number_range(const char *text, int *begin, int *end, char **string_end)
+{
+ int e;
+ e = read_number_delta(text, begin, string_end);
+ if (e)
+ return e;
+ if (string_end && (*string_end)[0] == '.' && (*string_end)[1] == '.') {
+ int relative;
+ e = read_number((*string_end) + 2, end, &relative, string_end);
+ if (e)
+ return e;
+ if (relative)
+ *end += *begin;
+ } else {
+ *end = *begin + 1;
+ }
+ if (*end < *begin) {
+ fprintf(stderr, "Range %i..%i has negative extent, at start of '%s'.\n",
+ *begin, *end, text);
+ return -1;
+ }
+ if (0) fprintf(stderr, "text='%s': *begin=%i *end=%i\n", text, *begin, *end);
+
+ return 0;
+}
+
+/* Format: <range>[,<range>]+
+ *
+ * For description of <range>, see read_number_range() above.
+ *
+ * E.g.:
+ * MEMENTO_SQUEEZES=1234-2..+4,2345,2350..+2
+ */
+static int Memento_add_squeezes(const char *text)
+{
+ int e = 0;
+ for(;;) {
+ int begin;
+ int end;
+ char *string_end;
+ if (!*text)
+ break;
+ e = read_number_range(text, &begin, &end, &string_end);
+ if (e)
+ break;
+ if (*string_end && *string_end != ',') {
+ fprintf(stderr, "Expecting comma at start of '%s'.\n", string_end);
+ e = -1;
+ break;
+ }
+ fprintf(stderr, "Adding squeeze range %i..%i.\n",
+ begin, end);
+ memento.squeezes_num += 1;
+ memento.squeezes = MEMENTO_UNDERLYING_REALLOC(
+ memento.squeezes,
+ memento.squeezes_num * sizeof(*memento.squeezes)
+ );
+ if (!memento.squeezes) {
+ fprintf(stderr, "Failed to allocate memory for memento.squeezes_num=%i\n",
+ memento.squeezes_num);
+ e = -1;
+ break;
+ }
+ memento.squeezes[memento.squeezes_num-1].begin = begin;
+ memento.squeezes[memento.squeezes_num-1].end = end;
+
+ if (*string_end == 0)
+ break;
+ text = string_end + 1;
+ }
+
+ return e;
+}
+
+static void Memento_init(void)
+{
+ char *env;
+ memset(&memento, 0, sizeof(memento));
+ memento.inited = 1;
+ memento.used.head = NULL;
+ memento.used.tail = NULL;
+ memento.free.head = NULL;
+ memento.free.tail = NULL;
+ memento.sequence = 0;
+ memento.countdown = 1024;
+ memento.squeezes = NULL;
+ memento.squeezes_num = 0;
+ memento.squeezes_pos = 0;
+
+ env = getenv("MEMENTO_FAILAT");
+ memento.failAt = (env ? atoi(env) : 0);
+
+ env = getenv("MEMENTO_BREAKAT");
+ memento.breakAt = (env ? atoi(env) : 0);
+
+ env = getenv("MEMENTO_PARANOIA");
+ memento.paranoia = (env ? atoi(env) : 0);
+ if (memento.paranoia == 0)
+ memento.paranoia = -1024;
+
+ env = getenv("MEMENTO_PARANOIDAT");
+ memento.paranoidAt = (env ? atoi(env) : 0);
+
+ env = getenv("MEMENTO_SQUEEZEAT");
+ memento.squeezeAt = (env ? atoi(env) : 0);
+
+ env = getenv("MEMENTO_PATTERN");
+ memento.pattern = (env ? atoi(env) : 0);
+
+ env = getenv("MEMENTO_HIDE_MULTIPLE_REALLOCS");
+ memento.hideMultipleReallocs = (env ? atoi(env) : 0);
+
+ env = getenv("MEMENTO_ABORT_ON_LEAK");
+ memento.abortOnLeak = (env ? atoi(env) : 0);
+
+ env = getenv("MEMENTO_ABORT_ON_CORRUPTION");
+ memento.abortOnCorruption = (env ? atoi(env) : 0);
+
+ env = getenv("MEMENTO_SQUEEZES");
+ if (env) {
+ int e;
+ fprintf(stderr, "Parsing squeeze ranges in MEMENTO_SQUEEZES=%s\n", env);
+ e = Memento_add_squeezes(env);
+ if (e) {
+ fprintf(stderr, "Failed to parse MEMENTO_SQUEEZES=%s\n", env);
+ exit(1);
+ }
+ }
+
+ env = getenv("MEMENTO_MAXMEMORY");
+ memento.maxMemory = (env ? atoi(env) : 0);
+
+ atexit(Memento_fin);
+
+ Memento_initMutex(&memento.mutex);
+
+ Memento_initStacktracer();
+
+ Memento_breakpoint();
+}
+
+typedef struct findBlkData {
+ void *addr;
+ Memento_BlkHeader *blk;
+ int flags;
+} findBlkData;
+
+static int Memento_containsAddr(Memento_BlkHeader *b,
+ void *arg)
+{
+ findBlkData *data = (findBlkData *)arg;
+ char *blkend = &((char *)MEMBLK_TOBLK(b))[b->rawsize];
+ if ((MEMBLK_TOBLK(b) <= data->addr) &&
+ ((void *)blkend > data->addr)) {
+ data->blk = b;
+ data->flags = 1;
+ return 1;
+ }
+ if (((void *)b <= data->addr) &&
+ (MEMBLK_TOBLK(b) > data->addr)) {
+ data->blk = b;
+ data->flags = 2;
+ return 1;
+ }
+ if (((void *)blkend <= data->addr) &&
+ ((void *)(blkend + Memento_PostSize) > data->addr)) {
+ data->blk = b;
+ data->flags = 3;
+ return 1;
+ }
+ return 0;
+}
+
+void Memento_info(void *addr)
+{
+#ifdef MEMENTO_DETAILS
+ findBlkData data;
+
+ MEMENTO_LOCK();
+ data.addr = addr;
+ data.blk = NULL;
+ data.flags = 0;
+ Memento_appBlocks(&memento.used, Memento_containsAddr, &data);
+ if (data.blk != NULL)
+ showInfo(data.blk, NULL);
+ data.blk = NULL;
+ data.flags = 0;
+ Memento_appBlocks(&memento.free, Memento_containsAddr, &data);
+ if (data.blk != NULL)
+ showInfo(data.blk, NULL);
+ MEMENTO_UNLOCK();
+#else
+ printf("Memento not compiled with details support\n");
+#endif
+}
+
+#ifdef MEMENTO_HAS_FORK
+#include <unistd.h>
+#include <sys/wait.h>
+#include <time.h>
+#ifdef MEMENTO_STACKTRACE_METHOD
+#if MEMENTO_STACKTRACE_METHOD == 1
+#include <signal.h>
+#endif
+#endif
+
+/* FIXME: Find some portable way of getting this */
+/* MacOSX has 10240, Ubuntu seems to have 256 */
+#ifndef OPEN_MAX
+#define OPEN_MAX 10240
+#endif
+
+/* stashed_map[j] = i means that file descriptor i-1 was duplicated to j */
+int stashed_map[OPEN_MAX];
+
+static void Memento_signal(int sig)
+{
+ (void)sig;
+ fprintf(stderr, "SEGV at:\n");
+ memento.segv = 1;
+ Memento_bt_internal(0);
+
+ exit(1);
+}
+
+static int squeeze(void)
+{
+ pid_t pid;
+ int i, status;
+
+ if (memento.patternBit < 0)
+ return 1;
+ if (memento.squeezing && memento.patternBit >= MEMENTO_MAXPATTERN)
+ return 1;
+
+ if (memento.patternBit == 0)
+ memento.squeezeAt = memento.sequence;
+
+ if (!memento.squeezing) {
+ fprintf(stderr, "Memory squeezing @ %d\n", memento.squeezeAt);
+ } else
+ fprintf(stderr, "Memory squeezing @ %d (%x,%x)\n", memento.squeezeAt, memento.pattern, memento.patternBit);
+
+ /* When we fork below, the child is going to snaffle all our file pointers
+ * and potentially corrupt them. Let's make copies of all of them before
+ * we fork, so we can restore them when we restart. */
+ for (i = 0; i < OPEN_MAX; i++) {
+ if (stashed_map[i] == 0) {
+ int j = dup(i);
+ if (j >= 0) {
+ stashed_map[j] = i+1;
+ }
+ }
+ }
+
+ fprintf(stderr, "Failing at:\n");
+ Memento_bt_internal(2);
+ pid = fork();
+ if (pid == 0) {
+ /* Child */
+ signal(SIGSEGV, Memento_signal);
+ /* Close the dup-licated fds to avoid them getting corrupted by faulty
+ * code. */
+ for (i = 0; i < OPEN_MAX; i++) {
+ if (stashed_map[i] != 0) {
+ /* We close duplicated fds, just in case child has some bad
+ * code that modifies/closes random fds. */
+ close(i);
+ }
+ }
+ /* In the child, we always fail the next allocation. */
+ if (memento.patternBit == 0) {
+ memento.patternBit = 1;
+ } else
+ memento.patternBit <<= 1;
+ memento.squeezing = 1;
+
+ /* This is necessary to allow Memento_failThisEventLocked() near the
+ * end to do 'return squeeze();'. */
+ memento.squeezes_num = 0;
+
+ return 1;
+ }
+
+ /* In the parent if we hit another allocation, pass it (and record the
+ * fact we passed it in the pattern. */
+ memento.pattern |= memento.patternBit;
+ memento.patternBit <<= 1;
+
+ /* Wait for pid to finish, with a timeout. */
+ {
+ struct timespec tm = { 0, 10 * 1000 * 1000 }; /* 10ms = 100th sec */
+ int timeout = 30 * 1000 * 1000; /* time out in microseconds! */
+ while (waitpid(pid, &status, WNOHANG) == 0) {
+ nanosleep(&tm, NULL);
+ timeout -= (int)(tm.tv_nsec/1000);
+ tm.tv_nsec *= 2;
+ if (tm.tv_nsec > 999999999)
+ tm.tv_nsec = 999999999;
+ if (timeout <= 0) {
+ char text[32];
+ fprintf(stderr, "Child is taking a long time to die. Killing it.\n");
+ sprintf(text, "kill %d", pid);
+ system(text);
+ break;
+ }
+ }
+ }
+
+ if (status != 0) {
+ fprintf(stderr, "Child status=%d\n", status);
+ }
+
+ /* Put the files back */
+ for (i = 0; i < OPEN_MAX; i++) {
+ if (stashed_map[i] != 0) {
+ dup2(i, stashed_map[i]-1);
+ close(i);
+ stashed_map[i] = 0;
+ }
+ }
+
+ return 0;
+}
+#else
+#include <signal.h>
+
+static void Memento_signal(int sig)
+{
+ (void)sig;
+ memento.segv = 1;
+ /* If we just return from this function the SEGV will be unhandled, and
+ * we'll launch into whatever JIT debugging system the OS provides. At
+ * least fprintf(stderr, something useful first. If MEMENTO_NOJIT is set, then
+ * just exit to avoid the JIT (and get the usual atexit handling). */
+ if (getenv("MEMENTO_NOJIT"))
+ exit(1);
+ else
+ Memento_fin();
+}
+
+static int squeeze(void)
+{
+ fprintf(stderr, "Memento memory squeezing disabled as no fork!\n");
+ return 0;
+}
+#endif
+
+static void Memento_startFailing(void)
+{
+ if (!memento.failing) {
+ fprintf(stderr, "Starting to fail...\n");
+ Memento_bt();
+ fflush(stderr);
+ memento.failing = 1;
+ memento.failAt = memento.sequence;
+ memento.nextFailAt = memento.sequence+1;
+ memento.pattern = 0;
+ memento.patternBit = 0;
+ signal(SIGSEGV, Memento_signal);
+ signal(SIGABRT, Memento_signal);
+ Memento_breakpointLocked();
+ }
+}
+
+static int Memento_event(void)
+{
+ memento.sequence++;
+ if ((memento.sequence >= memento.paranoidAt) && (memento.paranoidAt != 0)) {
+ memento.paranoia = 1;
+ memento.countdown = 1;
+ }
+ if (--memento.countdown == 0) {
+ Memento_checkAllMemoryLocked();
+ if (memento.paranoia > 0)
+ memento.countdown = memento.paranoia;
+ else
+ {
+ memento.countdown = -memento.paranoia;
+ if (memento.paranoia > INT_MIN/2)
+ memento.paranoia *= 2;
+ }
+ }
+
+ if (memento.sequence == memento.breakAt) {
+ fprintf(stderr, "Breaking at event %d\n", memento.breakAt);
+ return 1;
+ }
+ return 0;
+}
+
+int Memento_sequence(void)
+{
+ return memento.sequence;
+}
+
+int Memento_breakAt(int event)
+{
+ MEMENTO_LOCK();
+ memento.breakAt = event;
+ MEMENTO_UNLOCK();
+ return event;
+}
+
+static void *safe_find_block(void *ptr)
+{
+ Memento_BlkHeader *block;
+ int valid;
+
+ if (ptr == NULL)
+ return NULL;
+
+ block = MEMBLK_FROMBLK(ptr);
+ /* Sometimes wrapping allocators can mean Memento_label
+ * is called with a value within the block, rather than
+ * at the start of the block. If we detect this, find it
+ * the slow way. */
+ VALGRIND_MAKE_MEM_DEFINED(&block->child, sizeof(block->child));
+ VALGRIND_MAKE_MEM_DEFINED(&block->sibling, sizeof(block->sibling));
+ valid = (block->child == MEMENTO_CHILD_MAGIC &&
+ block->sibling == MEMENTO_SIBLING_MAGIC);
+ VALGRIND_MAKE_MEM_NOACCESS(&block->child, sizeof(block->child));
+ VALGRIND_MAKE_MEM_NOACCESS(&block->sibling, sizeof(block->sibling));
+ if (!valid)
+ {
+ findBlkData data;
+
+ data.addr = ptr;
+ data.blk = NULL;
+ data.flags = 0;
+ Memento_appBlocks(&memento.used, Memento_containsAddr, &data);
+ if (data.blk == NULL)
+ return NULL;
+ block = data.blk;
+ }
+ return block;
+}
+
+void *Memento_label(void *ptr, const char *label)
+{
+ Memento_BlkHeader *block;
+
+ if (ptr == NULL)
+ return NULL;
+ MEMENTO_LOCK();
+ block = safe_find_block(ptr);
+ if (block != NULL)
+ {
+ VALGRIND_MAKE_MEM_DEFINED(&block->label, sizeof(block->label));
+ block->label = label;
+ VALGRIND_MAKE_MEM_NOACCESS(&block->label, sizeof(block->label));
+ }
+ MEMENTO_UNLOCK();
+ return ptr;
+}
+
+void Memento_tick(void)
+{
+ MEMENTO_LOCK();
+ if (Memento_event()) Memento_breakpointLocked();
+ MEMENTO_UNLOCK();
+}
+
+static int Memento_failThisEventLocked(void)
+{
+ int failThisOne;
+
+ if (Memento_event()) Memento_breakpointLocked();
+
+ if (!memento.squeezing && memento.squeezes_num) {
+ /* Move to next relevant squeeze region if appropriate. */
+ for ( ; memento.squeezes_pos != memento.squeezes_num; memento.squeezes_pos++) {
+ if (memento.sequence < memento.squeezes[memento.squeezes_pos].end)
+ break;
+ }
+
+ /* See whether memento.sequence is within this squeeze region. */
+ if (memento.squeezes_pos < memento.squeezes_num) {
+ int begin = memento.squeezes[memento.squeezes_pos].begin;
+ int end = memento.squeezes[memento.squeezes_pos].end;
+ if (memento.sequence >= begin && memento.sequence < end) {
+ if (1) {
+ fprintf(stderr,
+ "squeezes match memento.sequence=%i: memento.squeezes_pos=%i/%i %i..%i\n",
+ memento.sequence,
+ memento.squeezes_pos,
+ memento.squeezes_num,
+ memento.squeezes[memento.squeezes_pos].begin,
+ memento.squeezes[memento.squeezes_pos].end
+ );
+ }
+ return squeeze();
+ }
+ }
+ }
+
+ if ((memento.sequence >= memento.failAt) && (memento.failAt != 0))
+ Memento_startFailing();
+ if ((memento.squeezes_num==0) && (memento.sequence >= memento.squeezeAt) && (memento.squeezeAt != 0))
+ return squeeze();
+
+ if (!memento.failing)
+ return 0;
+ failThisOne = ((memento.patternBit & memento.pattern) == 0);
+ /* If we are failing, and we've reached the end of the pattern and we've
+ * still got bits available in the pattern word, and we haven't already
+ * set a nextPattern, then extend the pattern. */
+ if (memento.failing &&
+ ((~(memento.patternBit-1) & memento.pattern) == 0) &&
+ (memento.patternBit != 0) &&
+ memento.nextPattern == 0)
+ {
+ /* We'll fail this one, and set the 'next' one to pass it. */
+ memento.nextFailAt = memento.failAt;
+ memento.nextPattern = memento.pattern | memento.patternBit;
+ }
+ memento.patternBit = (memento.patternBit ? memento.patternBit << 1 : 1);
+
+ return failThisOne;
+}
+
+int Memento_failThisEvent(void)
+{
+ int ret;
+
+ if (!memento.inited)
+ Memento_init();
+
+ MEMENTO_LOCK();
+ ret = Memento_failThisEventLocked();
+ MEMENTO_UNLOCK();
+ return ret;
+}
+
+static void *do_malloc(size_t s, int eventType)
+{
+ Memento_BlkHeader *memblk;
+ size_t smem = MEMBLK_SIZE(s);
+
+ (void)eventType;
+
+ if (Memento_failThisEventLocked()) {
+ errno = ENOMEM;
+ return NULL;
+ }
+
+ if (s == 0)
+ return NULL;
+
+ memento.numMallocs++;
+
+ if (memento.maxMemory != 0 && memento.alloc + s > memento.maxMemory) {
+ errno = ENOMEM;
+ return NULL;
+ }
+
+ memblk = MEMENTO_UNDERLYING_MALLOC(smem);
+ if (memblk == NULL)
+ return NULL;
+
+ memento.alloc += s;
+ memento.totalAlloc += s;
+ if (memento.peakAlloc < memento.alloc)
+ memento.peakAlloc = memento.alloc;
+#ifndef MEMENTO_LEAKONLY
+ memset(MEMBLK_TOBLK(memblk), MEMENTO_ALLOCFILL, s);
+#endif
+ memblk->rawsize = s;
+ memblk->sequence = memento.sequence;
+ memblk->lastCheckedOK = memblk->sequence;
+ memblk->flags = 0;
+ memblk->label = 0;
+ memblk->child = MEMENTO_CHILD_MAGIC;
+ memblk->sibling = MEMENTO_SIBLING_MAGIC;
+#ifdef MEMENTO_DETAILS
+ memblk->details = NULL;
+ memblk->details_tail = &memblk->details;
+ Memento_storeDetails(memblk, eventType);
+#endif /* MEMENTO_DETAILS */
+ Memento_addBlockHead(&memento.used, memblk, 0);
+
+ if (memento.leaking > 0)
+ memblk->flags |= Memento_Flag_KnownLeak;
+
+ return MEMBLK_TOBLK(memblk);
+}
+
+char *Memento_strdup(const char *text)
+{
+ size_t len = strlen(text) + 1;
+ char *ret;
+
+ if (!memento.inited)
+ Memento_init();
+
+ MEMENTO_LOCK();
+ ret = do_malloc(len, Memento_EventType_strdup);
+ MEMENTO_UNLOCK();
+
+ if (ret != NULL)
+ memcpy(ret, text, len);
+
+ return ret;
+}
+
+int Memento_asprintf(char **ret, const char *format, ...)
+{
+ va_list va;
+ int n;
+ int n2;
+
+ if (!memento.inited)
+ Memento_init();
+
+ va_start(va, format);
+ n = vsnprintf(NULL, 0, format, va);
+ va_end(va);
+ if (n < 0)
+ return n;
+
+ MEMENTO_LOCK();
+ *ret = do_malloc(n+1, Memento_EventType_asprintf);
+ MEMENTO_UNLOCK();
+ if (*ret == NULL)
+ return -1;
+
+ va_start(va, format);
+ n2 = vsnprintf(*ret, n + 1, format, va);
+ va_end(va);
+
+ return n2;
+}
+
+int Memento_vasprintf(char **ret, const char *format, va_list ap)
+{
+ int n;
+ va_list ap2;
+ va_copy(ap2, ap);
+
+ if (!memento.inited)
+ Memento_init();
+
+ n = vsnprintf(NULL, 0, format, ap);
+ if (n < 0) {
+ va_end(ap2);
+ return n;
+ }
+
+ MEMENTO_LOCK();
+ *ret = do_malloc(n+1, Memento_EventType_vasprintf);
+ MEMENTO_UNLOCK();
+ if (*ret == NULL) {
+ va_end(ap2);
+ return -1;
+ }
+
+ n = vsnprintf(*ret, n + 1, format, ap2);
+ va_end(ap2);
+
+ return n;
+}
+
+void *Memento_malloc(size_t s)
+{
+ void *ret;
+
+ if (!memento.inited)
+ Memento_init();
+
+ MEMENTO_LOCK();
+ ret = do_malloc(s, Memento_EventType_malloc);
+ MEMENTO_UNLOCK();
+
+ return ret;
+}
+
+void *Memento_calloc(size_t n, size_t s)
+{
+ void *block;
+
+ if (!memento.inited)
+ Memento_init();
+
+ MEMENTO_LOCK();
+ block = do_malloc(n*s, Memento_EventType_calloc);
+ MEMENTO_UNLOCK();
+ if (block)
+ memset(block, 0, n*s);
+
+ return block;
+}
+
+static void do_reference(Memento_BlkHeader *blk, int event)
+{
+#ifdef MEMENTO_DETAILS
+ Memento_storeDetails(blk, event);
+#endif /* MEMENTO_DETAILS */
+}
+
+int Memento_checkPointerOrNull(void *blk)
+{
+ if (blk == NULL)
+ return 0;
+ if (blk == MEMENTO_PREFILL_PTR)
+ fprintf(stderr, "Prefill value found as pointer - buffer underrun?\n");
+ else if (blk == MEMENTO_POSTFILL_PTR)
+ fprintf(stderr, "Postfill value found as pointer - buffer overrun?\n");
+ else if (blk == MEMENTO_ALLOCFILL_PTR)
+ fprintf(stderr, "Allocfill value found as pointer - use of uninitialised value?\n");
+ else if (blk == MEMENTO_FREEFILL_PTR)
+ fprintf(stderr, "Allocfill value found as pointer - use after free?\n");
+ else
+ return 0;
+#ifdef MEMENTO_DETAILS
+ fprintf(stderr, "Current backtrace:\n");
+ Memento_bt();
+ fprintf(stderr, "History:\n");
+ Memento_info(blk);
+#endif
+ return 1;
+}
+
+int Memento_checkBytePointerOrNull(void *blk)
+{
+ unsigned char i;
+ if (blk == NULL)
+ return 0;
+ Memento_checkPointerOrNull(blk);
+
+ i = *(unsigned char *)blk;
+
+ if (i == MEMENTO_PREFILL_UBYTE)
+ fprintf(stderr, "Prefill value found - buffer underrun?\n");
+ else if (i == MEMENTO_POSTFILL_UBYTE)
+ fprintf(stderr, "Postfill value found - buffer overrun?\n");
+ else if (i == MEMENTO_ALLOCFILL_UBYTE)
+ fprintf(stderr, "Allocfill value found - use of uninitialised value?\n");
+ else if (i == MEMENTO_FREEFILL_UBYTE)
+ fprintf(stderr, "Allocfill value found - use after free?\n");
+ else
+ return 0;
+#ifdef MEMENTO_DETAILS
+ fprintf(stderr, "Current backtrace:\n");
+ Memento_bt();
+ fprintf(stderr, "History:\n");
+ Memento_info(blk);
+#endif
+ Memento_breakpoint();
+ return 1;
+}
+
+int Memento_checkShortPointerOrNull(void *blk)
+{
+ unsigned short i;
+ if (blk == NULL)
+ return 0;
+ Memento_checkPointerOrNull(blk);
+
+ i = *(unsigned short *)blk;
+
+ if (i == MEMENTO_PREFILL_USHORT)
+ fprintf(stderr, "Prefill value found - buffer underrun?\n");
+ else if (i == MEMENTO_POSTFILL_USHORT)
+ fprintf(stderr, "Postfill value found - buffer overrun?\n");
+ else if (i == MEMENTO_ALLOCFILL_USHORT)
+ fprintf(stderr, "Allocfill value found - use of uninitialised value?\n");
+ else if (i == MEMENTO_FREEFILL_USHORT)
+ fprintf(stderr, "Allocfill value found - use after free?\n");
+ else
+ return 0;
+#ifdef MEMENTO_DETAILS
+ fprintf(stderr, "Current backtrace:\n");
+ Memento_bt();
+ fprintf(stderr, "History:\n");
+ Memento_info(blk);
+#endif
+ Memento_breakpoint();
+ return 1;
+}
+
+int Memento_checkIntPointerOrNull(void *blk)
+{
+ unsigned int i;
+ if (blk == NULL)
+ return 0;
+ Memento_checkPointerOrNull(blk);
+
+ i = *(unsigned int *)blk;
+
+ if (i == MEMENTO_PREFILL_UINT)
+ fprintf(stderr, "Prefill value found - buffer underrun?\n");
+ else if (i == MEMENTO_POSTFILL_UINT)
+ fprintf(stderr, "Postfill value found - buffer overrun?\n");
+ else if (i == MEMENTO_ALLOCFILL_UINT)
+ fprintf(stderr, "Allocfill value found - use of uninitialised value?\n");
+ else if (i == MEMENTO_FREEFILL_UINT)
+ fprintf(stderr, "Allocfill value found - use after free?\n");
+ else
+ return 0;
+#ifdef MEMENTO_DETAILS
+ fprintf(stderr, "Current backtrace:\n");
+ Memento_bt();
+ fprintf(stderr, "History:\n");
+ Memento_info(blk);
+#endif
+ Memento_breakpoint();
+ return 1;
+}
+
+static void *do_takeRef(void *blk)
+{
+ MEMENTO_LOCK();
+ do_reference(safe_find_block(blk), Memento_EventType_takeRef);
+ MEMENTO_UNLOCK();
+ return blk;
+}
+
+void *Memento_takeByteRef(void *blk)
+{
+ if (!memento.inited)
+ Memento_init();
+
+ if (Memento_event()) Memento_breakpoint();
+
+ if (!blk)
+ return NULL;
+
+ (void)Memento_checkBytePointerOrNull(blk);
+
+ return do_takeRef(blk);
+}
+
+void *Memento_takeShortRef(void *blk)
+{
+ if (!memento.inited)
+ Memento_init();
+
+ if (Memento_event()) Memento_breakpoint();
+
+ if (!blk)
+ return NULL;
+
+ (void)Memento_checkShortPointerOrNull(blk);
+
+ return do_takeRef(blk);
+}
+
+void *Memento_takeIntRef(void *blk)
+{
+ if (!memento.inited)
+ Memento_init();
+
+ if (Memento_event()) Memento_breakpoint();
+
+ if (!blk)
+ return NULL;
+
+ (void)Memento_checkIntPointerOrNull(blk);
+
+ return do_takeRef(blk);
+}
+
+void *Memento_takeRef(void *blk)
+{
+ if (!memento.inited)
+ Memento_init();
+
+ if (Memento_event()) Memento_breakpoint();
+
+ if (!blk)
+ return NULL;
+
+ return do_takeRef(blk);
+}
+
+static void *do_dropRef(void *blk)
+{
+ MEMENTO_LOCK();
+ do_reference(safe_find_block(blk), Memento_EventType_dropRef);
+ MEMENTO_UNLOCK();
+ return blk;
+}
+
+void *Memento_dropByteRef(void *blk)
+{
+ if (!memento.inited)
+ Memento_init();
+
+ if (Memento_event()) Memento_breakpoint();
+
+ if (!blk)
+ return NULL;
+
+ Memento_checkBytePointerOrNull(blk);
+
+ return do_dropRef(blk);
+}
+
+void *Memento_dropShortRef(void *blk)
+{
+ if (!memento.inited)
+ Memento_init();
+
+ if (Memento_event()) Memento_breakpoint();
+
+ if (!blk)
+ return NULL;
+
+ Memento_checkShortPointerOrNull(blk);
+
+ return do_dropRef(blk);
+}
+
+void *Memento_dropIntRef(void *blk)
+{
+ if (!memento.inited)
+ Memento_init();
+
+ if (Memento_event()) Memento_breakpoint();
+
+ if (!blk)
+ return NULL;
+
+ Memento_checkIntPointerOrNull(blk);
+
+ return do_dropRef(blk);
+}
+
+void *Memento_dropRef(void *blk)
+{
+ if (!memento.inited)
+ Memento_init();
+
+ if (Memento_event()) Memento_breakpoint();
+
+ if (!blk)
+ return NULL;
+
+ return do_dropRef(blk);
+}
+
+void *Memento_adjustRef(void *blk, int adjust)
+{
+ if (Memento_event()) Memento_breakpoint();
+
+ if (blk == NULL)
+ return NULL;
+
+ while (adjust > 0)
+ {
+ do_takeRef(blk);
+ adjust--;
+ }
+ while (adjust < 0)
+ {
+ do_dropRef(blk);
+ adjust++;
+ }
+
+ return blk;
+ }
+
+void *Memento_reference(void *blk)
+{
+ if (!blk)
+ return NULL;
+
+ if (!memento.inited)
+ Memento_init();
+
+ MEMENTO_LOCK();
+ do_reference(safe_find_block(blk), Memento_EventType_reference);
+ MEMENTO_UNLOCK();
+ return blk;
+}
+
+/* Treat blocks from the user with suspicion, and check them the slow
+ * but safe way. */
+static int checkBlockUser(Memento_BlkHeader *memblk, const char *action)
+{
+#ifndef MEMENTO_LEAKONLY
+ BlkCheckData data;
+
+ memset(&data, 0, sizeof(data));
+ Memento_appBlockUser(&memento.used, Memento_Internal_checkAllocedBlock,
+ &data, memblk);
+ if (!data.found) {
+ /* Failure! */
+ fprintf(stderr, "Attempt to %s block ", action);
+ showBlock(memblk, 32);
+ fprintf(stderr, "\n");
+ Memento_breakpointLocked();
+ return 1;
+ } else if (data.preCorrupt || data.postCorrupt) {
+ fprintf(stderr, "Block ");
+ showBlock(memblk, ' ');
+ fprintf(stderr, " found to be corrupted on %s!\n", action);
+ if (data.preCorrupt) {
+ fprintf(stderr, "Preguard corrupted\n");
+ }
+ if (data.postCorrupt) {
+ fprintf(stderr, "Postguard corrupted\n");
+ }
+ fprintf(stderr, "Block last checked OK at allocation %d. Now %d.\n",
+ memblk->lastCheckedOK, memento.sequence);
+ if ((memblk->flags & Memento_Flag_Reported) == 0)
+ {
+ memblk->flags |= Memento_Flag_Reported;
+ Memento_breakpointLocked();
+ }
+ return 1;
+ }
+#endif
+ return 0;
+}
+
+static int checkBlock(Memento_BlkHeader *memblk, const char *action)
+{
+#ifndef MEMENTO_LEAKONLY
+ BlkCheckData data;
+#endif
+
+ if (memblk->child != MEMENTO_CHILD_MAGIC ||
+ memblk->sibling != MEMENTO_SIBLING_MAGIC)
+ {
+ /* Failure! */
+ fprintf(stderr, "Attempt to %s invalid block ", action);
+ showBlock(memblk, 32);
+ fprintf(stderr, "\n");
+ Memento_breakpointLocked();
+ return 1;
+ }
+
+#ifndef MEMENTO_LEAKONLY
+ memset(&data, 0, sizeof(data));
+ Memento_appBlock(&memento.used, Memento_Internal_checkAllocedBlock,
+ &data, memblk);
+ if (!data.found) {
+ /* Failure! */
+ fprintf(stderr, "Attempt to %s block ", action);
+ showBlock(memblk, 32);
+ fprintf(stderr, "\n");
+ Memento_breakpointLocked();
+ return 1;
+ } else if (data.preCorrupt || data.postCorrupt) {
+ fprintf(stderr, "Block ");
+ showBlock(memblk, ' ');
+ fprintf(stderr, " found to be corrupted on %s!\n", action);
+ if (data.preCorrupt) {
+ fprintf(stderr, "Preguard corrupted\n");
+ }
+ if (data.postCorrupt) {
+ fprintf(stderr, "Postguard corrupted\n");
+ }
+ fprintf(stderr, "Block last checked OK at allocation %d. Now %d.\n",
+ memblk->lastCheckedOK, memento.sequence);
+ if ((memblk->flags & Memento_Flag_Reported) == 0)
+ {
+ memblk->flags |= Memento_Flag_Reported;
+ Memento_breakpointLocked();
+ }
+ return 1;
+ }
+#endif
+ return 0;
+}
+
+static void do_free(void *blk, int eventType)
+{
+ Memento_BlkHeader *memblk;
+
+ (void)eventType;
+
+ if (Memento_event()) Memento_breakpointLocked();
+
+ if (blk == NULL)
+ return;
+
+ memblk = MEMBLK_FROMBLK(blk);
+ VALGRIND_MAKE_MEM_DEFINED(memblk, sizeof(*memblk));
+ if (checkBlock(memblk, "free"))
+ {
+ if (memento.abortOnCorruption) {
+ fprintf(stderr, "*** memblk corrupted, calling abort()\n");
+ abort();
+ }
+ return;
+ }
+
+#ifdef MEMENTO_DETAILS
+ Memento_storeDetails(memblk, eventType);
+#endif
+
+ VALGRIND_MAKE_MEM_DEFINED(memblk, sizeof(*memblk));
+ if (memblk->flags & Memento_Flag_BreakOnFree)
+ Memento_breakpointLocked();
+
+ memento.alloc -= memblk->rawsize;
+ memento.numFrees++;
+
+ Memento_removeBlock(&memento.used, memblk);
+
+ VALGRIND_MAKE_MEM_DEFINED(memblk, sizeof(*memblk));
+ if (Memento_Internal_makeSpace(MEMBLK_SIZE(memblk->rawsize))) {
+ VALGRIND_MAKE_MEM_DEFINED(memblk, sizeof(*memblk));
+ VALGRIND_MAKE_MEM_DEFINED(MEMBLK_TOBLK(memblk),
+ memblk->rawsize + Memento_PostSize);
+#ifndef MEMENTO_LEAKONLY
+ memset(MEMBLK_TOBLK(memblk), MEMENTO_FREEFILL, memblk->rawsize);
+#endif
+ memblk->flags |= Memento_Flag_Freed;
+ Memento_addBlockTail(&memento.free, memblk, 1);
+ } else {
+ free_block(memblk);
+ }
+}
+
+void Memento_free(void *blk)
+{
+ if (!memento.inited)
+ Memento_init();
+
+ MEMENTO_LOCK();
+ do_free(blk, Memento_EventType_free);
+ MEMENTO_UNLOCK();
+}
+
+static void *do_realloc(void *blk, size_t newsize, int type)
+{
+ Memento_BlkHeader *memblk, *newmemblk;
+ size_t newsizemem;
+ int flags;
+
+ if (Memento_failThisEventLocked()) {
+ errno = ENOMEM;
+ return NULL;
+ }
+
+ memblk = MEMBLK_FROMBLK(blk);
+ VALGRIND_MAKE_MEM_DEFINED(memblk, sizeof(*memblk));
+ if (checkBlock(memblk, "realloc")) {
+ errno = ENOMEM;
+ return NULL;
+ }
+
+#ifdef MEMENTO_DETAILS
+ Memento_storeDetails(memblk, type);
+#endif
+
+ VALGRIND_MAKE_MEM_DEFINED(memblk, sizeof(*memblk));
+ if (memblk->flags & Memento_Flag_BreakOnRealloc)
+ Memento_breakpointLocked();
+
+ VALGRIND_MAKE_MEM_DEFINED(memblk, sizeof(*memblk));
+ if (memento.maxMemory != 0 && memento.alloc - memblk->rawsize + newsize > memento.maxMemory) {
+ errno = ENOMEM;
+ return NULL;
+ }
+
+ newsizemem = MEMBLK_SIZE(newsize);
+ Memento_removeBlock(&memento.used, memblk);
+ VALGRIND_MAKE_MEM_DEFINED(memblk, sizeof(*memblk));
+ flags = memblk->flags;
+ newmemblk = MEMENTO_UNDERLYING_REALLOC(memblk, newsizemem);
+ if (newmemblk == NULL)
+ {
+ Memento_addBlockHead(&memento.used, memblk, 2);
+ return NULL;
+ }
+ memento.numReallocs++;
+ memento.totalAlloc += newsize;
+ memento.alloc -= newmemblk->rawsize;
+ memento.alloc += newsize;
+ if (memento.peakAlloc < memento.alloc)
+ memento.peakAlloc = memento.alloc;
+ newmemblk->flags = flags;
+#ifndef MEMENTO_LEAKONLY
+ if (newmemblk->rawsize < newsize) {
+ char *newbytes = ((char *)MEMBLK_TOBLK(newmemblk))+newmemblk->rawsize;
+ VALGRIND_MAKE_MEM_DEFINED(newbytes, newsize - newmemblk->rawsize);
+ memset(newbytes, MEMENTO_ALLOCFILL, newsize - newmemblk->rawsize);
+ VALGRIND_MAKE_MEM_UNDEFINED(newbytes, newsize - newmemblk->rawsize);
+ }
+#endif
+ newmemblk->rawsize = newsize;
+#ifndef MEMENTO_LEAKONLY
+ VALGRIND_MAKE_MEM_DEFINED(newmemblk->preblk, Memento_PreSize);
+ memset(newmemblk->preblk, MEMENTO_PREFILL, Memento_PreSize);
+ VALGRIND_MAKE_MEM_UNDEFINED(newmemblk->preblk, Memento_PreSize);
+ VALGRIND_MAKE_MEM_DEFINED(MEMBLK_POSTPTR(newmemblk), Memento_PostSize);
+ memset(MEMBLK_POSTPTR(newmemblk), MEMENTO_POSTFILL, Memento_PostSize);
+ VALGRIND_MAKE_MEM_UNDEFINED(MEMBLK_POSTPTR(newmemblk), Memento_PostSize);
+#endif
+ Memento_addBlockHead(&memento.used, newmemblk, 2);
+ return MEMBLK_TOBLK(newmemblk);
+}
+
+void *Memento_realloc(void *blk, size_t newsize)
+{
+ void *ret;
+
+ if (!memento.inited)
+ Memento_init();
+
+ if (blk == NULL)
+ {
+ MEMENTO_LOCK();
+ ret = do_malloc(newsize, Memento_EventType_realloc);
+ MEMENTO_UNLOCK();
+ if (!ret) errno = ENOMEM;
+ return ret;
+ }
+ if (newsize == 0) {
+ MEMENTO_LOCK();
+ do_free(blk, Memento_EventType_realloc);
+ MEMENTO_UNLOCK();
+ return NULL;
+ }
+
+ MEMENTO_LOCK();
+ ret = do_realloc(blk, newsize, Memento_EventType_realloc);
+ MEMENTO_UNLOCK();
+ if (!ret) errno = ENOMEM;
+ return ret;
+}
+
+int Memento_checkBlock(void *blk)
+{
+ Memento_BlkHeader *memblk;
+ int ret;
+
+ if (blk == NULL)
+ return 0;
+
+ MEMENTO_LOCK();
+ memblk = MEMBLK_FROMBLK(blk);
+ ret = checkBlockUser(memblk, "check");
+ MEMENTO_UNLOCK();
+ return ret;
+}
+
+#ifndef MEMENTO_LEAKONLY
+static int Memento_Internal_checkAllAlloced(Memento_BlkHeader *memblk, void *arg)
+{
+ BlkCheckData *data = (BlkCheckData *)arg;
+
+ Memento_Internal_checkAllocedBlock(memblk, data);
+ if (data->preCorrupt || data->postCorrupt) {
+ if ((data->found & 2) == 0) {
+ fprintf(stderr, "Allocated blocks:\n");
+ data->found |= 2;
+ }
+ fprintf(stderr, " Block ");
+ showBlock(memblk, ' ');
+ if (data->preCorrupt) {
+ fprintf(stderr, " Preguard ");
+ }
+ if (data->postCorrupt) {
+ fprintf(stderr, "%s Postguard ",
+ (data->preCorrupt ? "&" : ""));
+ }
+ fprintf(stderr, "corrupted.\n "
+ "Block last checked OK at allocation %d. Now %d.\n",
+ memblk->lastCheckedOK, memento.sequence);
+ data->preCorrupt = 0;
+ data->postCorrupt = 0;
+ data->freeCorrupt = 0;
+ if ((memblk->flags & Memento_Flag_Reported) == 0)
+ {
+ memblk->flags |= Memento_Flag_Reported;
+ Memento_breakpointLocked();
+ }
+ }
+ else
+ memblk->lastCheckedOK = memento.sequence;
+ return 0;
+}
+
+static int Memento_Internal_checkAllFreed(Memento_BlkHeader *memblk, void *arg)
+{
+ BlkCheckData *data = (BlkCheckData *)arg;
+
+ Memento_Internal_checkFreedBlock(memblk, data);
+ if (data->preCorrupt || data->postCorrupt || data->freeCorrupt) {
+ if ((data->found & 4) == 0) {
+ fprintf(stderr, "Freed blocks:\n");
+ data->found |= 4;
+ }
+ fprintf(stderr, " ");
+ showBlock(memblk, ' ');
+ if (data->freeCorrupt) {
+ fprintf(stderr, " index %d (address "FMTP") onwards", (int)data->index,
+ &((char *)MEMBLK_TOBLK(memblk))[data->index]);
+ if (data->preCorrupt) {
+ fprintf(stderr, "+ preguard");
+ }
+ if (data->postCorrupt) {
+ fprintf(stderr, "+ postguard");
+ }
+ } else {
+ if (data->preCorrupt) {
+ fprintf(stderr, " preguard");
+ }
+ if (data->postCorrupt) {
+ fprintf(stderr, "%s Postguard",
+ (data->preCorrupt ? "+" : ""));
+ }
+ }
+ VALGRIND_MAKE_MEM_DEFINED(memblk, sizeof(Memento_BlkHeader));
+ fprintf(stderr, " corrupted.\n"
+ " Block last checked OK at allocation %d. Now %d.\n",
+ memblk->lastCheckedOK, memento.sequence);
+ if ((memblk->flags & Memento_Flag_Reported) == 0)
+ {
+ memblk->flags |= Memento_Flag_Reported;
+ Memento_breakpointLocked();
+ }
+ VALGRIND_MAKE_MEM_NOACCESS(memblk, sizeof(Memento_BlkHeader));
+ data->preCorrupt = 0;
+ data->postCorrupt = 0;
+ data->freeCorrupt = 0;
+ }
+ else
+ memblk->lastCheckedOK = memento.sequence;
+ return 0;
+}
+#endif /* MEMENTO_LEAKONLY */
+
+static int Memento_checkAllMemoryLocked(void)
+{
+#ifndef MEMENTO_LEAKONLY
+ BlkCheckData data;
+
+ memset(&data, 0, sizeof(data));
+ Memento_appBlocks(&memento.used, Memento_Internal_checkAllAlloced, &data);
+ Memento_appBlocks(&memento.free, Memento_Internal_checkAllFreed, &data);
+ return data.found;
+#else
+ return 0;
+#endif
+}
+
+int Memento_checkAllMemory(void)
+{
+#ifndef MEMENTO_LEAKONLY
+ int ret;
+
+ MEMENTO_LOCK();
+ ret = Memento_checkAllMemoryLocked();
+ MEMENTO_UNLOCK();
+ if (ret & 6) {
+ Memento_breakpoint();
+ return 1;
+ }
+ return 0;
+#endif
+}
+
+int Memento_setParanoia(int i)
+{
+ memento.paranoia = i;
+ if (memento.paranoia > 0)
+ memento.countdown = memento.paranoia;
+ else
+ memento.countdown = -memento.paranoia;
+ return i;
+}
+
+int Memento_paranoidAt(int i)
+{
+ memento.paranoidAt = i;
+ return i;
+}
+
+int Memento_getBlockNum(void *b)
+{
+ Memento_BlkHeader *memblk;
+ if (b == NULL)
+ return 0;
+ memblk = MEMBLK_FROMBLK(b);
+ return (memblk->sequence);
+}
+
+int Memento_check(void)
+{
+ int result;
+
+ fprintf(stderr, "Checking memory\n");
+ result = Memento_checkAllMemory();
+ fprintf(stderr, "Memory checked!\n");
+ return result;
+}
+
+int Memento_find(void *a)
+{
+ findBlkData data;
+ int s;
+
+ MEMENTO_LOCK();
+ data.addr = a;
+ data.blk = NULL;
+ data.flags = 0;
+ Memento_appBlocks(&memento.used, Memento_containsAddr, &data);
+ if (data.blk != NULL) {
+ fprintf(stderr, "Address "FMTP" is in %sallocated block ",
+ data.addr,
+ (data.flags == 1 ? "" : (data.flags == 2 ?
+ "preguard of " : "postguard of ")));
+ s = showBlock(data.blk, ' ');
+ fprintf(stderr, "\n");
+ MEMENTO_UNLOCK();
+ return s;
+ }
+ data.blk = NULL;
+ data.flags = 0;
+ Memento_appBlocks(&memento.free, Memento_containsAddr, &data);
+ if (data.blk != NULL) {
+ fprintf(stderr, "Address "FMTP" is in %sfreed block ",
+ data.addr,
+ (data.flags == 1 ? "" : (data.flags == 2 ?
+ "preguard of " : "postguard of ")));
+ s = showBlock(data.blk, ' ');
+ fprintf(stderr, "\n");
+ MEMENTO_UNLOCK();
+ return s;
+ }
+ MEMENTO_UNLOCK();
+ return 0;
+}
+
+void Memento_breakOnFree(void *a)
+{
+ findBlkData data;
+
+ MEMENTO_LOCK();
+ data.addr = a;
+ data.blk = NULL;
+ data.flags = 0;
+ Memento_appBlocks(&memento.used, Memento_containsAddr, &data);
+ if (data.blk != NULL) {
+ fprintf(stderr, "Will stop when address "FMTP" (in %sallocated block ",
+ data.addr,
+ (data.flags == 1 ? "" : (data.flags == 2 ?
+ "preguard of " : "postguard of ")));
+ showBlock(data.blk, ' ');
+ fprintf(stderr, ") is freed\n");
+ VALGRIND_MAKE_MEM_DEFINED(data.blk, sizeof(Memento_BlkHeader));
+ data.blk->flags |= Memento_Flag_BreakOnFree;
+ VALGRIND_MAKE_MEM_NOACCESS(data.blk, sizeof(Memento_BlkHeader));
+ MEMENTO_UNLOCK();
+ return;
+ }
+ data.blk = NULL;
+ data.flags = 0;
+ Memento_appBlocks(&memento.free, Memento_containsAddr, &data);
+ if (data.blk != NULL) {
+ fprintf(stderr, "Can't stop on free; address "FMTP" is in %sfreed block ",
+ data.addr,
+ (data.flags == 1 ? "" : (data.flags == 2 ?
+ "preguard of " : "postguard of ")));
+ showBlock(data.blk, ' ');
+ fprintf(stderr, "\n");
+ MEMENTO_UNLOCK();
+ return;
+ }
+ fprintf(stderr, "Can't stop on free; address "FMTP" is not in a known block.\n", a);
+ MEMENTO_UNLOCK();
+}
+
+void Memento_breakOnRealloc(void *a)
+{
+ findBlkData data;
+
+ MEMENTO_LOCK();
+ data.addr = a;
+ data.blk = NULL;
+ data.flags = 0;
+ Memento_appBlocks(&memento.used, Memento_containsAddr, &data);
+ if (data.blk != NULL) {
+ fprintf(stderr, "Will stop when address "FMTP" (in %sallocated block ",
+ data.addr,
+ (data.flags == 1 ? "" : (data.flags == 2 ?
+ "preguard of " : "postguard of ")));
+ showBlock(data.blk, ' ');
+ fprintf(stderr, ") is freed (or realloced)\n");
+ VALGRIND_MAKE_MEM_DEFINED(data.blk, sizeof(Memento_BlkHeader));
+ data.blk->flags |= Memento_Flag_BreakOnFree | Memento_Flag_BreakOnRealloc;
+ VALGRIND_MAKE_MEM_NOACCESS(data.blk, sizeof(Memento_BlkHeader));
+ MEMENTO_UNLOCK();
+ return;
+ }
+ data.blk = NULL;
+ data.flags = 0;
+ Memento_appBlocks(&memento.free, Memento_containsAddr, &data);
+ if (data.blk != NULL) {
+ fprintf(stderr, "Can't stop on free/realloc; address "FMTP" is in %sfreed block ",
+ data.addr,
+ (data.flags == 1 ? "" : (data.flags == 2 ?
+ "preguard of " : "postguard of ")));
+ showBlock(data.blk, ' ');
+ fprintf(stderr, "\n");
+ MEMENTO_UNLOCK();
+ return;
+ }
+ fprintf(stderr, "Can't stop on free/realloc; address "FMTP" is not in a known block.\n", a);
+ MEMENTO_UNLOCK();
+}
+
+int Memento_failAt(int i)
+{
+ memento.failAt = i;
+ if ((memento.sequence > memento.failAt) &&
+ (memento.failing != 0))
+ Memento_startFailing();
+ return i;
+}
+
+size_t Memento_setMax(size_t max)
+{
+ memento.maxMemory = max;
+ return max;
+}
+
+void Memento_startLeaking(void)
+{
+ memento.leaking++;
+}
+
+void Memento_stopLeaking(void)
+{
+ memento.leaking--;
+}
+
+int Memento_squeezing(void)
+{
+ return memento.squeezing;
+}
+
+#endif /* MEMENTO_CPP_EXTRAS_ONLY */
+
+#ifdef __cplusplus
+/* Dumb overrides for the new and delete operators */
+
+void *operator new(size_t size)
+{
+ void *ret;
+
+ if (!memento.inited)
+ Memento_init();
+
+ if (size == 0)
+ size = 1;
+ MEMENTO_LOCK();
+ ret = do_malloc(size, Memento_EventType_new);
+ MEMENTO_UNLOCK();
+ return ret;
+}
+
+void operator delete(void *pointer)
+{
+ if (!pointer)
+ return;
+
+ MEMENTO_LOCK();
+ do_free(pointer, Memento_EventType_delete);
+ MEMENTO_UNLOCK();
+}
+
+/* Some C++ systems (apparently) don't provide new[] or delete[]
+ * operators. Provide a way to cope with this */
+#ifndef MEMENTO_CPP_NO_ARRAY_CONSTRUCTORS
+void *operator new[](size_t size)
+{
+ void *ret;
+ if (!memento.inited)
+ Memento_init();
+
+ if (size == 0)
+ size = 1;
+ MEMENTO_LOCK();
+ ret = do_malloc(size, Memento_EventType_newArray);
+ MEMENTO_UNLOCK();
+ return ret;
+}
+
+void operator delete[](void *pointer)
+{
+ MEMENTO_LOCK();
+ do_free(pointer, Memento_EventType_deleteArray);
+ MEMENTO_UNLOCK();
+}
+#endif /* MEMENTO_CPP_NO_ARRAY_CONSTRUCTORS */
+#endif /* __cplusplus */
+
+#else
+
+/* Just in case anyone has left some debugging code in... */
+void (Memento_breakpoint)(void)
+{
+}
+
+int (Memento_checkBlock)(void *b)
+{
+ return 0;
+}
+
+int (Memento_checkAllMemory)(void)
+{
+ return 0;
+}
+
+int (Memento_check)(void)
+{
+ return 0;
+}
+
+int (Memento_setParanoia)(int i)
+{
+ return 0;
+}
+
+int (Memento_paranoidAt)(int i)
+{
+ return 0;
+}
+
+int (Memento_breakAt)(int i)
+{
+ return 0;
+}
+
+int (Memento_getBlockNum)(void *i)
+{
+ return 0;
+}
+
+int (Memento_find)(void *a)
+{
+ return 0;
+}
+
+int (Memento_failAt)(int i)
+{
+ return 0;
+}
+
+void (Memento_breakOnFree)(void *a)
+{
+}
+
+void (Memento_breakOnRealloc)(void *a)
+{
+}
+
+void *(Memento_takeRef)(void *a)
+{
+ return a;
+}
+
+void *(Memento_dropRef)(void *a)
+{
+ return a;
+}
+
+void *(Memento_adjustRef)(void *a, int adjust)
+{
+ return a;
+}
+
+void *(Memento_reference)(void *a)
+{
+ return a;
+}
+
+#undef Memento_malloc
+#undef Memento_free
+#undef Memento_realloc
+#undef Memento_calloc
+#undef Memento_strdup
+
+void *Memento_malloc(size_t size)
+{
+ return MEMENTO_UNDERLYING_MALLOC(size);
+}
+
+void Memento_free(void *b)
+{
+ MEMENTO_UNDERLYING_FREE(b);
+}
+
+void *Memento_realloc(void *b, size_t s)
+{
+ return MEMENTO_UNDERLYING_REALLOC(b, s);
+}
+
+void *Memento_calloc(size_t n, size_t s)
+{
+ return MEMENTO_UNDERLYING_CALLOC(n, s);
+}
+
+/* Avoid calling strdup, in case our compiler doesn't support it.
+ * Yes, I'm looking at you, early Visual Studios. */
+char *Memento_strdup(const char *s)
+{
+ size_t len = strlen(s)+1;
+ char *ret = MEMENTO_UNDERLYING_MALLOC(len);
+ if (ret != NULL)
+ memcpy(ret, s, len);
+ return ret;
+}
+
+/* Avoid calling asprintf, in case our compiler doesn't support it.
+ * Vaguely unhappy about relying on vsnprintf, but... */
+int Memento_asprintf(char **ret, const char *format, ...)
+{
+ va_list va;
+ int n;
+ int n2;
+
+ va_start(va, format);
+ n = vsnprintf(NULL, 0, format, va);
+ va_end(va);
+ if (n < 0)
+ return n;
+
+ *ret = MEMENTO_UNDERLYING_MALLOC(n+1);
+ if (*ret == NULL)
+ return -1;
+
+ va_start(va, format);
+ n2 = vsnprintf(*ret, n + 1, format, va);
+ va_end(va);
+
+ return n2;
+}
+
+/* Avoid calling vasprintf, in case our compiler doesn't support it.
+ * Vaguely unhappy about relying on vsnprintf, but... */
+int Memento_vasprintf(char **ret, const char *format, va_list ap)
+{
+ int n;
+ va_list ap2;
+ va_copy(ap2, ap);
+
+ n = vsnprintf(NULL, 0, format, ap);
+ if (n < 0) {
+ va_end(ap2);
+ return n;
+ }
+
+ *ret = MEMENTO_UNDERLYING_MALLOC(n+1);
+ if (*ret == NULL) {
+ va_end(ap2);
+ return -1;
+ }
+
+ n = vsnprintf(*ret, n + 1, format, ap2);
+ va_end(ap2);
+
+ return n;
+}
+
+void (Memento_listBlocks)(void)
+{
+}
+
+void (Memento_listNewBlocks)(void)
+{
+}
+
+size_t (Memento_setMax)(size_t max)
+{
+ return 0;
+}
+
+void (Memento_stats)(void)
+{
+}
+
+void *(Memento_label)(void *ptr, const char *label)
+{
+ return ptr;
+}
+
+void (Memento_info)(void *addr)
+{
+}
+
+void (Memento_listBlockInfo)(void)
+{
+}
+
+void (Memento_startLeaking)(void)
+{
+}
+
+void (Memento_stopLeaking)(void)
+{
+}
+
+int (Memento_squeezing)(void)
+{
+ return 0;
+}
+
+#endif
diff --git a/extract/src/memento.h b/extract/src/memento.h
new file mode 100644
index 00000000..2dc1271d
--- /dev/null
+++ b/extract/src/memento.h
@@ -0,0 +1,343 @@
+/* Copyright (C) 2009-2018 Artifex Software, Inc.
+ All Rights Reserved.
+
+ This software is provided AS-IS with no warranty, either express or
+ implied.
+
+ This software is distributed under license and may not be copied, modified
+ or distributed except as expressly authorized under the terms of that
+ license. Refer to licensing information at http://www.artifex.com
+ or contact Artifex Software, Inc., 1305 Grant Avenue - Suite 200,
+ Novato, CA 94945, U.S.A., +1(415)492-9861, for further information.
+*/
+
+/* Memento: A library to aid debugging of memory leaks/heap corruption.
+ *
+ * Usage (with C):
+ * First, build your project with MEMENTO defined, and include this
+ * header file wherever you use malloc, realloc or free.
+ * This header file will use macros to point malloc, realloc and free to
+ * point to Memento_malloc, Memento_realloc, Memento_free.
+ *
+ * Run your program, and all mallocs/frees/reallocs should be redirected
+ * through here. When the program exits, you will get a list of all the
+ * leaked blocks, together with some helpful statistics. You can get the
+ * same list of allocated blocks at any point during program execution by
+ * calling Memento_listBlocks();
+ *
+ * Every call to malloc/free/realloc counts as an 'allocation event'.
+ * On each event Memento increments a counter. Every block is tagged with
+ * the current counter on allocation. Every so often during program
+ * execution, the heap is checked for consistency. By default this happens
+ * after 1024 events, then after 2048 events, then after 4096 events, etc.
+ * This can be changed at runtime by using Memento_setParanoia(int level).
+ * 0 turns off such checking, 1 sets checking to happen on every event,
+ * any positive number n sets checking to happen once every n events,
+ * and any negative number n sets checking to happen after -n events, then
+ * after -2n events etc.
+ *
+ * The default paranoia level is therefore -1024.
+ *
+ * Memento keeps blocks around for a while after they have been freed, and
+ * checks them as part of these heap checks to see if they have been
+ * written to (or are freed twice etc).
+ *
+ * A given heap block can be checked for consistency (it's 'pre' and
+ * 'post' guard blocks are checked to see if they have been written to)
+ * by calling Memento_checkBlock(void *blockAddress);
+ *
+ * A check of all the memory can be triggered by calling Memento_check();
+ * (or Memento_checkAllMemory(); if you'd like it to be quieter).
+ *
+ * A good place to breakpoint is Memento_breakpoint, as this will then
+ * trigger your debugger if an error is detected. This is done
+ * automatically for debug windows builds.
+ *
+ * If a block is found to be corrupt, information will be printed to the
+ * console, including the address of the block, the size of the block,
+ * the type of corruption, the number of the block and the event on which
+ * it last passed a check for correctness.
+ *
+ * If you rerun, and call Memento_paranoidAt(int event); with this number
+ * the code will wait until it reaches that event and then start
+ * checking the heap after every allocation event. Assuming it is a
+ * deterministic failure, you should then find out where in your program
+ * the error is occurring (between event x-1 and event x).
+ *
+ * Then you can rerun the program again, and call
+ * Memento_breakAt(int event); and the program will call
+ * Memento_Breakpoint() when event x is reached, enabling you to step
+ * through.
+ *
+ * Memento_find(address) will tell you what block (if any) the given
+ * address is in.
+ *
+ * An example:
+ * Suppose we have a gs invocation that crashes with memory corruption.
+ * * Build with -DMEMENTO.
+ * * In your debugger put a breakpoint on Memento_breakpoint.
+ * * Run the program. It will stop in Memento_inited.
+ * * Execute Memento_setParanoia(1); (In VS use Ctrl-Alt-Q). (Note #1)
+ * * Continue execution.
+ * * It will detect the memory corruption on the next allocation event
+ * after it happens, and stop in Memento_breakpoint. The console should
+ * show something like:
+ *
+ * Freed blocks:
+ * 0x172e610(size=288,num=1415) index 256 (0x172e710) onwards corrupted
+ * Block last checked OK at allocation 1457. Now 1458.
+ *
+ * * This means that the block became corrupted between allocation 1457
+ * and 1458 - so if we rerun and stop the program at 1457, we can then
+ * step through, possibly with a data breakpoint at 0x172e710 and see
+ * when it occurs.
+ * * So restart the program from the beginning. When we stop after
+ * initialisation execute Memento_breakAt(1457); (and maybe
+ * Memento_setParanoia(1), or Memento_setParanoidAt(1457))
+ * * Continue execution until we hit Memento_breakpoint.
+ * * Now you can step through and watch the memory corruption happen.
+ *
+ * Note #1: Using Memento_setParanoia(1) can cause your program to run
+ * very slowly. You may instead choose to use Memento_setParanoia(100)
+ * (or some other figure). This will only exhaustively check memory on
+ * every 100th allocation event. This trades speed for the size of the
+ * average allocation event range in which detection of memory corruption
+ * occurs. You may (for example) choose to run once checking every 100
+ * allocations and discover that the corruption happens between events
+ * X and X+100. You can then rerun using Memento_paranoidAt(X), and
+ * it'll only start exhaustively checking when it reaches X.
+ *
+ * More than one memory allocator?
+ *
+ * If you have more than one memory allocator in the system (like for
+ * instance the ghostscript chunk allocator, that builds on top of the
+ * standard malloc and returns chunks itself), then there are some things
+ * to note:
+ *
+ * * If the secondary allocator gets its underlying blocks from calling
+ * malloc, then those will be checked by Memento, but 'subblocks' that
+ * are returned to the secondary allocator will not. There is currently
+ * no way to fix this other than trying to bypass the secondary
+ * allocator. One way I have found to do this with the chunk allocator
+ * is to tweak its idea of a 'large block' so that it puts every
+ * allocation in its own chunk. Clearly this negates the point of having
+ * a secondary allocator, and is therefore not recommended for general
+ * use.
+ *
+ * * Again, if the secondary allocator gets its underlying blocks from
+ * calling malloc (and hence Memento) leak detection should still work
+ * (but whole blocks will be detected rather than subblocks).
+ *
+ * * If on every allocation attempt the secondary allocator calls into
+ * Memento_failThisEvent(), and fails the allocation if it returns true
+ * then more useful features can be used; firstly memory squeezing will
+ * work, and secondly, Memento will have a "finer grained" paranoia
+ * available to it.
+ *
+ * Usage with C++:
+ *
+ * Memento has some experimental code in it to trap new/delete (and
+ * new[]/delete[] if required) calls.
+ *
+ * In order for this to work, either:
+ *
+ * 1) Build memento.c with the c++ compiler.
+ *
+ * or
+ *
+ * 2) Build memento.c as normal with the C compiler, then from any
+ * one of your .cpp files, do:
+ *
+ * #define MEMENTO_CPP_EXTRAS_ONLY
+ * #include "memento.c"
+ *
+ * In the case where MEMENTO is not defined, this will not do anything.
+ *
+ * Both Windows and GCC provide separate new[] and delete[] operators
+ * for arrays. Apparently some systems do not. If this is the case for
+ * your system, define MEMENTO_CPP_NO_ARRAY_CONSTRUCTORS.
+ *
+ * "libbacktrace.so failed to load"
+ *
+ * In order to give nice backtraces on unix, Memento will try to use
+ * a libbacktrace dynamic library. If it can't find it, you'll see
+ * that warning, and your backtraces won't include file/line information.
+ *
+ * To fix this you'll need to build your own libbacktrace. Don't worry
+ * it's really easy:
+ * git clone git://github.com/ianlancetaylor/libbacktrace
+ * cd libbacktrace
+ * ./configure
+ * make
+ *
+ * This leaves the build .so as .libs/libbacktrace.so
+ *
+ * Memento will look for this on LD_LIBRARY_PATH, or in /opt/lib/,
+ * or in /lib/, or in /usr/lib/, or in /usr/local/lib/. I recommend
+ * using /opt/lib/ as this won't conflict with anything that you
+ * get via a package manager like apt.
+ *
+ * sudo mkdir /opt
+ * sudo mkdir /opt/lib
+ * sudo cp .libs/libbacktrace.so /opt/lib/
+ */
+
+#ifndef MEMENTO_H
+
+#include <stdlib.h>
+#include <stdarg.h>
+
+#define MEMENTO_H
+
+#ifndef MEMENTO_UNDERLYING_MALLOC
+#define MEMENTO_UNDERLYING_MALLOC malloc
+#endif
+#ifndef MEMENTO_UNDERLYING_FREE
+#define MEMENTO_UNDERLYING_FREE free
+#endif
+#ifndef MEMENTO_UNDERLYING_REALLOC
+#define MEMENTO_UNDERLYING_REALLOC realloc
+#endif
+#ifndef MEMENTO_UNDERLYING_CALLOC
+#define MEMENTO_UNDERLYING_CALLOC calloc
+#endif
+
+#ifndef MEMENTO_MAXALIGN
+#define MEMENTO_MAXALIGN (sizeof(int))
+#endif
+
+#define MEMENTO_PREFILL 0xa6
+#define MEMENTO_POSTFILL 0xa7
+#define MEMENTO_ALLOCFILL 0xa8
+#define MEMENTO_FREEFILL 0xa9
+
+#define MEMENTO_FREELIST_MAX 0x2000000
+
+int Memento_checkBlock(void *);
+int Memento_checkAllMemory(void);
+int Memento_check(void);
+
+int Memento_setParanoia(int);
+int Memento_paranoidAt(int);
+int Memento_breakAt(int);
+void Memento_breakOnFree(void *a);
+void Memento_breakOnRealloc(void *a);
+int Memento_getBlockNum(void *);
+int Memento_find(void *a);
+void Memento_breakpoint(void);
+int Memento_failAt(int);
+int Memento_failThisEvent(void);
+void Memento_listBlocks(void);
+void Memento_listNewBlocks(void);
+size_t Memento_setMax(size_t);
+void Memento_stats(void);
+void *Memento_label(void *, const char *);
+void Memento_tick(void);
+
+void *Memento_malloc(size_t s);
+void *Memento_realloc(void *, size_t s);
+void Memento_free(void *);
+void *Memento_calloc(size_t, size_t);
+char *Memento_strdup(const char*);
+int Memento_asprintf(char **ret, const char *format, ...);
+int Memento_vasprintf(char **ret, const char *format, va_list ap);
+
+void Memento_info(void *addr);
+void Memento_listBlockInfo(void);
+void *Memento_takeByteRef(void *blk);
+void *Memento_dropByteRef(void *blk);
+void *Memento_takeShortRef(void *blk);
+void *Memento_dropShortRef(void *blk);
+void *Memento_takeIntRef(void *blk);
+void *Memento_dropIntRef(void *blk);
+void *Memento_takeRef(void *blk);
+void *Memento_dropRef(void *blk);
+void *Memento_adjustRef(void *blk, int adjust);
+void *Memento_reference(void *blk);
+
+int Memento_checkPointerOrNull(void *blk);
+int Memento_checkBytePointerOrNull(void *blk);
+int Memento_checkShortPointerOrNull(void *blk);
+int Memento_checkIntPointerOrNull(void *blk);
+
+void Memento_startLeaking(void);
+void Memento_stopLeaking(void);
+
+/* Returns number of allocation events so far. */
+int Memento_sequence(void);
+
+/* Returns non-zero if our process was forked by Memento squeeze. */
+int Memento_squeezing(void);
+
+void Memento_fin(void);
+
+void Memento_bt(void);
+
+#ifdef MEMENTO
+
+#ifndef COMPILING_MEMENTO_C
+#define malloc Memento_malloc
+#define free Memento_free
+#define realloc Memento_realloc
+#define calloc Memento_calloc
+#define strdup Memento_strdup
+#define asprintf Memento_asprintf
+#define vasprintf Memento_vasprintf
+#endif
+
+#else
+
+#define Memento_malloc MEMENTO_UNDERLYING_MALLOC
+#define Memento_free MEMENTO_UNDERLYING_FREE
+#define Memento_realloc MEMENTO_UNDERLYING_REALLOC
+#define Memento_calloc MEMENTO_UNDERLYING_CALLOC
+#define Memento_strdup strdup
+#define Memento_asprintf asprintf
+#define Memento_vasprintf vasprintf
+
+#define Memento_checkBlock(A) 0
+#define Memento_checkAllMemory() 0
+#define Memento_check() 0
+#define Memento_setParanoia(A) 0
+#define Memento_paranoidAt(A) 0
+#define Memento_breakAt(A) 0
+#define Memento_breakOnFree(A) 0
+#define Memento_breakOnRealloc(A) 0
+#define Memento_getBlockNum(A) 0
+#define Memento_find(A) 0
+#define Memento_breakpoint() do {} while (0)
+#define Memento_failAt(A) 0
+#define Memento_failThisEvent() 0
+#define Memento_listBlocks() do {} while (0)
+#define Memento_listNewBlocks() do {} while (0)
+#define Memento_setMax(A) 0
+#define Memento_stats() do {} while (0)
+#define Memento_label(A,B) (A)
+#define Memento_info(A) do {} while (0)
+#define Memento_listBlockInfo() do {} while (0)
+#define Memento_takeByteRef(A) (A)
+#define Memento_dropByteRef(A) (A)
+#define Memento_takeShortRef(A) (A)
+#define Memento_dropShortRef(A) (A)
+#define Memento_takeIntRef(A) (A)
+#define Memento_dropIntRef(A) (A)
+#define Memento_takeRef(A) (A)
+#define Memento_dropRef(A) (A)
+#define Memento_adjustRef(A,V) (A)
+#define Memento_reference(A) (A)
+#define Memento_checkPointerOrNull(A) 0
+#define Memento_checkBytePointerOrNull(A) 0
+#define Memento_checkShortPointerOrNull(A) 0
+#define Memento_checkIntPointerOrNull(A) 0
+
+#define Memento_tick() do {} while (0)
+#define Memento_startLeaking() do {} while (0)
+#define Memento_stopLeaking() do {} while (0)
+#define Memento_fin() do {} while (0)
+#define Memento_bt() do {} while (0)
+#define Memento_sequence() (0)
+#define Memento_squeezing() (0)
+
+#endif /* MEMENTO */
+
+#endif /* MEMENTO_H */
diff --git a/extract/src/memento.py b/extract/src/memento.py
new file mode 100755
index 00000000..987cd4fd
--- /dev/null
+++ b/extract/src/memento.py
@@ -0,0 +1,83 @@
+#!/usr/bin/env python3
+
+'''
+Post-processor for Memento.
+
+Args:
+ -q <quiet>
+ Controls how often we output 'Memory squeezing @ ...' lines. E.g. '-q
+ 10' outputs for multiples of 10.
+'''
+
+import os
+import re
+import sys
+
+
+def main():
+ quiet = 1
+ out_raw = None
+ args = iter(sys.argv[1:])
+ while 1:
+ try:
+ arg = next(args)
+ except StopIteration:
+ break
+ if arg == '-h':
+ print(__doc__)
+ elif arg == '-o':
+ out_raw = open(next(args), 'w')
+ elif arg == '-q':
+ quiet = int(next(args))
+ else:
+ raise Exception(f'unrecognised arg: {arg}')
+
+ openbsd = os.uname()[0] == 'OpenBSD'
+ n = None
+ segv = 0
+ leaks = 0
+ lines = []
+ for line in sys.stdin:
+ if out_raw:
+ out_raw.write(line)
+ m = re.match('^Memory squeezing @ ([0-9]+)( complete)?', line)
+ if m:
+ if not m.group(2):
+ # Start of squeeze.
+
+ if not openbsd:
+ # Looks like memento's forked processes might terminate
+ # before they get to output the 'Memory squeezing @ <N>
+ # complete' line.
+ #
+ assert n is None, f'n={n} line={line!r}'
+
+ n = int(m.group(1))
+ if n % quiet == 0:
+ sys.stdout.write(line)
+ sys.stdout.flush()
+ else:
+ # End of squeeze.
+ assert n == int(m.group(1))
+ # Output info about any failure:
+ if segv or leaks:
+ print(f'Failure at squeeze {n}: segv={segv} leaks={leaks}:')
+ for l in lines:
+ if l.endswith('\n'):
+ l = l[:-1]
+ print(f' {l}')
+ lines = []
+ segv = 0
+ leaks = 0
+ n = None
+ else:
+ if n is not None:
+ lines.append(line)
+ if line.startswith('SEGV at:'):
+ segv = 1
+ if line.startswith('Allocated blocks'):
+ leaks = 1
+
+
+if __name__ == '__main__':
+ main()
diff --git a/extract/src/misc-test.c b/extract/src/misc-test.c
new file mode 100644
index 00000000..58b098ff
--- /dev/null
+++ b/extract/src/misc-test.c
@@ -0,0 +1,86 @@
+#include "memento.h"
+#include "xml.h"
+
+#include <errno.h>
+#include <stdio.h>
+
+
+static int s_num_fails = 0;
+
+static void s_check(
+ int values_equal,
+ const char* text,
+ int ret,
+ const char* value_s,
+ int errno_,
+ const char* value_expected_s,
+ int errno_expected
+ )
+{
+ int ok;
+ if (errno_expected) {
+ ok = (ret == -1 && errno_ == errno_expected);
+ }
+ else {
+ ok = (ret == 0 && values_equal);
+ }
+
+ if (ok) printf(" ok: ");
+ else printf(" fail:");
+ printf(" text=%16s", text);
+ if (errno_expected) printf(" errno_expected=%6i", errno_expected);
+ else printf(" value_expected=%6s", value_expected_s);
+ printf(". result: ret=%2i value=%6s errno=%3i", ret, value_s, errno_);
+ printf(".\n");
+ if (!ok) s_num_fails += 1;
+}
+
+static void s_check_int(const char* text, int value_expected, int expected_errno)
+{
+ int value;
+ int ret = extract_xml_str_to_int(text, &value);
+ char value_s[32];
+ char value_expected_s[32];
+ snprintf(value_s, sizeof(value_s), "%i", value);
+ snprintf(value_expected_s, sizeof(value_expected_s), "%i", value_expected);
+ s_check(value == value_expected, text, ret, value_s, errno, value_expected_s, expected_errno);
+ return;
+}
+
+static void s_check_uint(const char* text, unsigned expected_value, int expected_errno)
+{
+ unsigned value;
+ int ret = extract_xml_str_to_uint(text, &value);
+ char value_s[32];
+ char value_expected_s[32];
+ snprintf(value_s, sizeof(value_s), "%u", value);
+ snprintf(value_expected_s, sizeof(value_expected_s), "%u", value);
+ s_check(value == expected_value, text, ret, value_s, errno, value_expected_s, expected_errno);
+ return;
+}
+
+int main(void)
+{
+ printf("testing extract_xml_str_to_int():\n");
+ s_check_int("2", 2, 0);
+ s_check_int("-20", -20, 0);
+ s_check_int("-20b", 0, EINVAL);
+ s_check_int("123456789123", 0, ERANGE);
+
+ printf("testing extract_xml_str_to_uint():\n");
+ s_check_uint("2", 2, 0);
+ s_check_uint("-20", 0, ERANGE);
+ s_check_uint("-20b", 0, EINVAL);
+ s_check_uint("123456789123", 0, ERANGE);
+
+ printf("s_num_fails=%i\n", s_num_fails);
+
+ if (s_num_fails) {
+ printf("Failed\n");
+ return 1;
+ }
+ else {
+ printf("Succeeded\n");
+ return 0;
+ }
+}
diff --git a/extract/src/outf.c b/extract/src/outf.c
new file mode 100644
index 00000000..95575c16
--- /dev/null
+++ b/extract/src/outf.c
@@ -0,0 +1,42 @@
+#include "memento.h"
+#include "outf.h"
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <string.h>
+
+static int s_verbose = 0;
+
+void outf_verbose_set(int verbose)
+{
+ s_verbose = verbose;
+}
+
+void (outf)(
+ int level,
+ const char* file,
+ int line,
+ const char* fn,
+ int ln,
+ const char* format,
+ ...
+ )
+{
+ va_list va;
+ if (level > s_verbose) {
+ return;
+ }
+
+ if (ln) {
+ fprintf(stderr, "%s:%i:%s: ", file, line, fn);
+ }
+ va_start(va, format);
+ vfprintf(stderr, format, va);
+ va_end(va);
+ if (ln) {
+ size_t len = strlen(format);
+ if (len == 0 || format[len-1] != '\n') {
+ fprintf(stderr, "\n");
+ }
+ }
+}
diff --git a/extract/src/outf.h b/extract/src/outf.h
new file mode 100644
index 00000000..a2b6c078
--- /dev/null
+++ b/extract/src/outf.h
@@ -0,0 +1,32 @@
+#ifndef ARTIFEX_EXTRACT_OUTF_H
+#define ARTIFEX_EXTRACT_OUTF_H
+
+/* Only for internal use by extract code. */
+
+void (outf)(
+ int level,
+ const char* file, int line,
+ const char* fn,
+ int ln,
+ const char* format,
+ ...
+ );
+/* Outputs text if <level> is less than or equal to verbose value set by
+outf_level_set(). */
+
+#define outf(format, ...) \
+ (outf)(1, __FILE__, __LINE__, __FUNCTION__, 1 /*ln*/, format, ##__VA_ARGS__)
+
+#define outf0(format, ...) \
+ (outf)(0, __FILE__, __LINE__, __FUNCTION__, 1 /*ln*/, format, ##__VA_ARGS__)
+
+#define outfx(format, ...)
+
+/* Simple printf-style debug output. */
+
+#define outfx(format, ...)
+
+void outf_verbose_set(int verbose);
+/* Set verbose value. Higher values are more verbose. Initial value is 0. */
+
+#endif
diff --git a/extract/src/template.docx b/extract/src/template.docx
new file mode 100644
index 00000000..8ad94155
--- /dev/null
+++ b/extract/src/template.docx
Binary files differ
diff --git a/extract/src/xml.c b/extract/src/xml.c
new file mode 100644
index 00000000..8dab511b
--- /dev/null
+++ b/extract/src/xml.c
@@ -0,0 +1,505 @@
+#include "../include/extract_alloc.h"
+
+#include "mem.h"
+#include "memento.h"
+#include "outf.h"
+#include "xml.h"
+
+#include <assert.h>
+#include <errno.h>
+#include <float.h>
+#include <limits.h>
+
+#ifdef _MSC_VER
+ #include "compat_stdint.h"
+ #include "compat_strtoll.h"
+#else
+ #include <stdint.h>
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+
+
+/* These str_*() functions realloc buffer as required. All return 0 or -1 with
+errno set. */
+
+/* Appends first <s_len> chars of string <s> to *p. */
+static int str_catl(extract_alloc_t* alloc, char** p, const char* s, int s_len)
+{
+ size_t p_len = (*p) ? strlen(*p) : 0;
+ if (extract_realloc2(
+ alloc,
+ p,
+ p_len + 1,
+ p_len + s_len + 1
+ )) return -1;
+ memcpy(*p + p_len, s, s_len);
+ (*p)[p_len + s_len] = 0;
+ return 0;
+}
+
+/* Appends a char. */
+static int str_catc(extract_alloc_t* alloc, char** p, char c)
+{
+ return str_catl(alloc, p, &c, 1);
+}
+
+/* Unused but usefult o keep code here. */
+#if 0
+/* Appends a string. */
+static int str_cat(extract_alloc_t* alloc, char** p, const char* s)
+{
+ return str_catl(alloc, p, s, strlen(s));
+}
+#endif
+
+char* extract_xml_tag_attributes_find(extract_xml_tag_t* tag, const char* name)
+{
+ int i;
+ for (i=0; i<tag->attributes_num; ++i) {
+ if (!strcmp(tag->attributes[i].name, name)) {
+ char* ret = tag->attributes[i].value;
+ return ret;
+ }
+ }
+ outf("Failed to find attribute '%s'",name);
+ return NULL;
+}
+
+int extract_xml_tag_attributes_find_float(
+ extract_xml_tag_t* tag,
+ const char* name,
+ float* o_out
+ )
+{
+ const char* value = extract_xml_tag_attributes_find(tag, name);
+ if (!value) {
+ errno = ESRCH;
+ return -1;
+ }
+ if (extract_xml_str_to_float(value, o_out)) return -1;
+ return 0;
+}
+
+int extract_xml_tag_attributes_find_double(
+ extract_xml_tag_t* tag,
+ const char* name,
+ double* o_out
+ )
+{
+ const char* value = extract_xml_tag_attributes_find(tag, name);
+ if (!value) {
+ errno = ESRCH;
+ return -1;
+ }
+ if (extract_xml_str_to_double(value, o_out)) return -1;
+ return 0;
+}
+
+int extract_xml_tag_attributes_find_int(
+ extract_xml_tag_t* tag,
+ const char* name,
+ int* o_out
+ )
+{
+ const char* text = extract_xml_tag_attributes_find(tag, name);
+ return extract_xml_str_to_int(text, o_out);
+}
+
+int extract_xml_tag_attributes_find_uint(
+ extract_xml_tag_t* tag,
+ const char* name,
+ unsigned* o_out
+ )
+{
+ const char* text = extract_xml_tag_attributes_find(tag, name);
+ return extract_xml_str_to_uint(text, o_out);
+}
+
+int extract_xml_tag_attributes_find_size(
+ extract_xml_tag_t* tag,
+ const char* name,
+ size_t* o_out
+ )
+{
+ const char* text = extract_xml_tag_attributes_find(tag, name);
+ return extract_xml_str_to_size(text, o_out);
+}
+
+int extract_xml_str_to_llint(const char* text, long long* o_out)
+{
+ char* endptr;
+ long long x;
+ if (!text) {
+ errno = ESRCH;
+ return -1;
+ }
+ if (text[0] == 0) {
+ errno = EINVAL;
+ return -1;
+ }
+ errno = 0;
+ x = strtoll(text, &endptr, 10 /*base*/);
+ if (errno) {
+ return -1;
+ }
+ if (*endptr) {
+ errno = EINVAL;
+ return -1;
+ }
+ *o_out = x;
+ return 0;
+}
+
+int extract_xml_str_to_ullint(const char* text, unsigned long long* o_out)
+{
+ char* endptr;
+ unsigned long long x;
+ if (!text) {
+ errno = ESRCH;
+ return -1;
+ }
+ if (text[0] == 0) {
+ errno = EINVAL;
+ return -1;
+ }
+ errno = 0;
+ x = strtoull(text, &endptr, 10 /*base*/);
+ if (errno) {
+ return -1;
+ }
+ if (*endptr) {
+ errno = EINVAL;
+ return -1;
+ }
+ *o_out = x;
+ return 0;
+}
+
+int extract_xml_str_to_int(const char* text, int* o_out)
+{
+ long long x;
+ if (extract_xml_str_to_llint(text, &x)) return -1;
+ if (x > INT_MAX || x < INT_MIN) {
+ errno = ERANGE;
+ return -1;
+ }
+ *o_out = (int) x;
+ return 0;
+}
+
+int extract_xml_str_to_uint(const char* text, unsigned* o_out)
+{
+ unsigned long long x;
+ if (extract_xml_str_to_ullint(text, &x)) return -1;
+ if (x > UINT_MAX) {
+ errno = ERANGE;
+ return -1;
+ }
+ *o_out = (unsigned) x;
+ return 0;
+}
+
+int extract_xml_str_to_size(const char* text, size_t* o_out)
+{
+ unsigned long long x;
+ if (extract_xml_str_to_ullint(text, &x)) return -1;
+ if (x > SIZE_MAX) {
+ errno = ERANGE;
+ return -1;
+ }
+ *o_out = (size_t) x;
+ return 0;
+}
+
+int extract_xml_str_to_double(const char* text, double* o_out)
+{
+ char* endptr;
+ double x;
+ if (!text) {
+ errno = ESRCH;
+ return -1;
+ }
+ if (text[0] == 0) {
+ errno = EINVAL;
+ return -1;
+ }
+ errno = 0;
+ x = strtod(text, &endptr);
+ if (errno) {
+ return -1;
+ }
+ if (*endptr) {
+ errno = EINVAL;
+ return -1;
+ }
+ *o_out = x;
+ return 0;
+}
+
+int extract_xml_str_to_float(const char* text, float* o_out)
+{
+ double x;
+ if (extract_xml_str_to_double(text, &x)) {
+ return -1;
+ }
+ if (x > FLT_MAX || x < -FLT_MAX) {
+ errno = ERANGE;
+ return -1;
+ }
+ *o_out = (float) x;
+ return 0;
+}
+
+static int extract_xml_tag_attributes_append(
+ extract_alloc_t* alloc,
+ extract_xml_tag_t* tag,
+ char* name,
+ char* value
+ )
+{
+ if (extract_realloc2(
+ alloc,
+ &tag->attributes,
+ sizeof(extract_xml_attribute_t) * tag->attributes_num,
+ sizeof(extract_xml_attribute_t) * (tag->attributes_num+1)
+ )) return -1;
+ tag->attributes[tag->attributes_num].name = name;
+ tag->attributes[tag->attributes_num].value = value;
+ tag->attributes_num += 1;
+ return 0;
+}
+
+void extract_xml_tag_init(extract_xml_tag_t* tag)
+{
+ tag->name = NULL;
+ tag->attributes = NULL;
+ tag->attributes_num = 0;
+ extract_astring_init(&tag->text);
+}
+
+void extract_xml_tag_free(extract_alloc_t* alloc, extract_xml_tag_t* tag)
+{
+ int i;
+ extract_free(alloc, &tag->name);
+ for (i=0; i<tag->attributes_num; ++i) {
+ extract_xml_attribute_t* attribute = &tag->attributes[i];
+ extract_free(alloc, &attribute->name);
+ extract_free(alloc, &attribute->value);
+ }
+ extract_free(alloc, &tag->attributes);
+ extract_astring_free(alloc, &tag->text);
+ extract_xml_tag_init(tag);
+}
+
+/* Unused but useful to keep code here. */
+#if 0
+/* Like strcmp() but also handles NULL. */
+static int extract_xml_strcmp_null(const char* a, const char* b)
+{
+ if (!a && !b) return 0;
+ if (!a) return -1;
+ if (!b) return 1;
+ return strcmp(a, b);
+}
+#endif
+
+/* Unused but usefult o keep code here. */
+#if 0
+/* Compares tag name, then attributes; returns -1, 0 or +1. Does not compare
+extract_xml_tag_t::text members. */
+int extract_xml_compare_tags(const extract_xml_tag_t* lhs, const extract_xml_tag_t* rhs)
+{
+ int d;
+ int i;
+ d = extract_xml_strcmp_null(lhs->name, rhs->name);
+ if (d) return d;
+ for(i=0;; ++i) {
+ if (i >= lhs->attributes_num || i >= rhs->attributes_num) {
+ break;
+ }
+ const extract_xml_attribute_t* lhs_attribute = &lhs->attributes[i];
+ const extract_xml_attribute_t* rhs_attribute = &rhs->attributes[i];
+ d = extract_xml_strcmp_null(lhs_attribute->name, rhs_attribute->name);
+ if (d) return d;
+ d = extract_xml_strcmp_null(lhs_attribute->value, rhs_attribute->value);
+ if (d) return d;
+ }
+ if (lhs->attributes_num > rhs->attributes_num) return +1;
+ if (lhs->attributes_num < rhs->attributes_num) return -1;
+ return 0;
+}
+#endif
+
+
+int extract_xml_pparse_init(extract_alloc_t* alloc, extract_buffer_t* buffer, const char* first_line)
+{
+ char* first_line_buffer = NULL;
+ int e = -1;
+
+ if (first_line) {
+ size_t first_line_len = strlen(first_line);
+ size_t actual;
+ if (extract_malloc(alloc, &first_line_buffer, first_line_len + 1)) goto end;
+
+ if (extract_buffer_read(buffer, first_line_buffer, first_line_len, &actual)) {
+ outf("error: failed to read first line.");
+ goto end;
+ }
+ first_line_buffer[actual] = 0;
+ if (strcmp(first_line, first_line_buffer)) {
+ outf("Unrecognised prefix: ", first_line_buffer);
+ errno = ESRCH;
+ goto end;
+ }
+ }
+
+ for(;;) {
+ char c;
+ int ee = extract_buffer_read(buffer, &c, 1, NULL);
+ if (ee) {
+ if (ee==1) errno = ESRCH; /* EOF. */
+ goto end;
+ }
+ if (c == '<') {
+ break;
+ }
+ else if (c == ' ' || c == '\n') {}
+ else {
+ outf("Expected '<' but found c=%i", c);
+ goto end;
+ }
+ }
+ e = 0;
+
+ end:
+ extract_free(alloc, &first_line_buffer);
+ return e;
+}
+
+static int s_next(extract_buffer_t* buffer, int* ret, char* o_c)
+/* Reads next char, but if EOF sets *ret=+1, errno=ESRCH and returns +1. */
+{
+ int e = extract_buffer_read(buffer, o_c, 1, NULL);
+ if (e == +1) {
+ *ret = +1;
+ errno = ESRCH;
+ }
+ return e;
+}
+
+static const char* extract_xml_tag_string(extract_alloc_t* alloc, extract_xml_tag_t* tag)
+{
+ static char* buffer = NULL;
+ extract_free(alloc, &buffer);
+ extract_asprintf(alloc, &buffer, "<name=%s>", tag->name ? tag->name : "");
+ return buffer;
+}
+
+int extract_xml_pparse_next(extract_buffer_t* buffer, extract_xml_tag_t* out)
+{
+ int ret = -1;
+ char* attribute_name = NULL;
+ char* attribute_value = NULL;
+ char c;
+ int i;
+ extract_alloc_t* alloc = extract_buffer_alloc(buffer);
+
+ if (0) outf("out is: %s", extract_xml_tag_string(extract_buffer_alloc(buffer), out));
+ assert(buffer);
+ extract_xml_tag_free(alloc, out);
+
+ /* Read tag name. */
+ for( i=0;; ++i) {
+ int e = extract_buffer_read(buffer, &c, 1, NULL);
+ if (e) {
+ if (e == +1) ret = 1; /* EOF is not an error here. */
+ goto end;
+ }
+ if (c == '>' || c == ' ') break;
+ if (str_catc(alloc, &out->name, c)) goto end;
+ }
+ if (c == ' ') {
+
+ /* Read attributes. */
+ for(;;) {
+
+ /* Read attribute name. */
+ for(;;) {
+ if (s_next(buffer, &ret, &c)) goto end;
+ if (c == '=' || c == '>' || c == ' ') break;
+ if (str_catc(alloc, &attribute_name, c)) goto end;
+ }
+ if (c == '>') break;
+
+ if (c == '=') {
+ /* Read attribute value. */
+ int quote_single = 0;
+ int quote_double = 0;
+ size_t l;
+ for(;;) {
+ if (s_next(buffer, &ret, &c)) goto end;
+ if (c == '\'') quote_single = !quote_single;
+ else if (c == '"') quote_double = !quote_double;
+ else if (!quote_single && !quote_double
+ && (c == ' ' || c == '/' || c == '>')
+ ) {
+ /* We are at end of attribute value. */
+ break;
+ }
+ else if (c == '\\') {
+ // Escape next character.
+ if (s_next(buffer, &ret, &c)) goto end;
+ }
+ if (str_catc(alloc, &attribute_value, c)) goto end;
+ }
+
+ /* Remove any enclosing quotes. */
+ l = strlen(attribute_value);
+ if (l >= 2) {
+ if (
+ (attribute_value[0] == '"' && attribute_value[l-1] == '"')
+ ||
+ (attribute_value[0] == '\'' && attribute_value[l-1] == '\'')
+ ) {
+ memmove(attribute_value, attribute_value+1, l-2);
+ attribute_value[l-2] = 0;
+ }
+ }
+ }
+
+ if (extract_xml_tag_attributes_append(alloc, out, attribute_name, attribute_value)) goto end;
+ attribute_name = NULL;
+ attribute_value = NULL;
+ if (c == '/') {
+ if (s_next(buffer, &ret, &c)) goto end;
+ }
+ if (c == '>') break;
+ }
+ }
+
+ /* Read plain text until next '<'. */
+ for(;;) {
+ /* We don't use s_next() here because EOF is not an error. */
+ int e = extract_buffer_read(buffer, &c, 1, NULL);
+ if (e == +1) {
+ break; /* EOF is not an error here. */
+ }
+ if (e) goto end;
+ if (c == '<') break;
+ if (extract_astring_catc(alloc, &out->text, c)) goto end;
+ }
+
+ ret = 0;
+
+ end:
+
+ extract_free(alloc, &attribute_name);
+ extract_free(alloc, &attribute_value);
+ if (ret) {
+ extract_xml_tag_free(alloc, out);
+ }
+ return ret;
+}
+
diff --git a/extract/src/xml.h b/extract/src/xml.h
new file mode 100644
index 00000000..d11fd886
--- /dev/null
+++ b/extract/src/xml.h
@@ -0,0 +1,123 @@
+#ifndef ARTIFEX_EXTRACT_XML
+#define ARTIFEX_EXTRACT_XML
+
+/* Only for internal use by extract code. */
+
+#include "../include/extract_buffer.h"
+
+#include "astring.h"
+
+
+/* Things for representing XML. */
+
+typedef struct {
+ char* name;
+ char* value;
+} extract_xml_attribute_t;
+
+/* Represents a single <...> XML tag plus trailing text. */
+typedef struct {
+ char* name;
+ extract_xml_attribute_t* attributes;
+ int attributes_num;
+ extract_astring_t text;
+} extract_xml_tag_t;
+
+
+void extract_xml_tag_init(extract_xml_tag_t* tag);
+/* Initialises tag. Will cause leak if tag contains data - in this case call
+extract_xml_tag_free(). */
+
+void extract_xml_tag_free(extract_alloc_t* alloc, extract_xml_tag_t* tag);
+/* Frees tag and then calls extract_xml_tag_init(). */
+
+
+int extract_xml_pparse_init(extract_alloc_t* alloc, extract_buffer_t* buffer, const char* first_line);
+/* extract_xml_pparse_*(): simple XML 'pull' parser.
+
+extract_xml_pparse_init() merely consumes the initial '<'. Thereafter
+extract_xml_pparse_next() consumes the next '<' before returning the previous
+tag. */
+
+/* Opens specified file.
+
+If first_line is not NULL, we check that it matches the first line in the file.
+
+Returns -1 with errno=ESRCH if we fail to read the first '<' due to EOF.
+*/
+
+
+int extract_xml_pparse_next(extract_buffer_t* buffer, extract_xml_tag_t* out);
+/* Returns the next XML tag.
+
+Returns 0 with *out containing next tag; or -1 with errno set if error; or +1
+with errno=ESRCH if EOF.
+
+*out is initially passed to extract_xml_tag_free(), so *out must have been
+initialised, e.g. by by extract_xml_tag_init(). */
+
+
+char* extract_xml_tag_attributes_find(extract_xml_tag_t* tag, const char* name);
+/* Returns pointer to value of specified attribute, or NULL if not found. */
+
+int extract_xml_tag_attributes_find_float(
+ extract_xml_tag_t* tag,
+ const char* name,
+ float* o_out
+ );
+/* Finds float value of specified attribute, returning error if not found or
+there is trailing text. */
+
+int extract_xml_tag_attributes_find_double(
+ extract_xml_tag_t* tag,
+ const char* name,
+ double* o_out
+ );
+/* Finds double value of specified attribute, returning error if not found or there is
+trailing text. */
+
+
+/* Next few functions write to out-param and return zero on success, else
+return -1 with errno set.
+
+An error is returned if value is out of range or there is any trailing text. */
+
+int extract_xml_str_to_llint(const char* text, long long* o_out);
+
+int extract_xml_str_to_ullint(const char* text, unsigned long long* o_out);
+
+int extract_xml_str_to_int(const char* text, int* o_out);
+
+int extract_xml_str_to_uint(const char* text, unsigned* o_out);
+
+int extract_xml_str_to_size(const char* text, size_t* o_out);
+
+int extract_xml_str_to_double(const char* text, double* o_out);
+
+int extract_xml_str_to_float(const char* text, float* o_out);
+
+
+int extract_xml_tag_attributes_find_int(
+ extract_xml_tag_t* tag,
+ const char* name,
+ int* o_out
+ );
+/* Finds int value of specified attribute, returning error if not found. */
+
+int extract_xml_tag_attributes_find_uint(
+ extract_xml_tag_t* tag,
+ const char* name,
+ unsigned* o_out
+ );
+/* Finds unsigned int value of specified attribute, returning error if not
+found. */
+
+int extract_xml_tag_attributes_find_size(
+ extract_xml_tag_t* tag,
+ const char* name,
+ size_t* o_out
+ );
+/* Finds unsigned int value of specified attribute, returning error if not
+found. */
+
+#endif
diff --git a/extract/src/zip-test.c b/extract/src/zip-test.c
new file mode 100644
index 00000000..67082342
--- /dev/null
+++ b/extract/src/zip-test.c
@@ -0,0 +1,224 @@
+/* Crude programme to show detailed information about a zip file. */
+
+#include "memento.h"
+#include "outf.h"
+
+#include <assert.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+
+static int s_native_little_endinesss(void)
+{
+ static const char a[] = { 1, 2};
+ uint16_t b = *(uint16_t*) a;
+ if (b == 1 + 2*256) {
+ /* Native little-endiness. */
+ return 1;
+ }
+ else if (b == 2 + 1*256) {
+ return 0;
+ }
+ abort();
+}
+
+
+static int s_show(const char* filename)
+{
+ outf("Looking at filename=%s", filename);
+ assert(s_native_little_endinesss());
+ FILE* f = fopen(filename, "r");
+ assert(f);
+ size_t datasize = 10*1000*1000;
+ char* data = extract_malloc(datasize);
+ assert(data);
+ size_t n = fread(data, 1, datasize, f);
+ assert(n < datasize);
+ datasize = n;
+ outf("datasize=%zi", datasize);
+ fclose(f);
+
+ /* look for End of central directory (EOCD) record. */
+ uint32_t magic = 0x06054b50;
+ char* pos = data + datasize - 22;
+ for(;;) {
+ if (!memcmp(pos, &magic, sizeof(magic))) break;
+ assert(pos > data);
+ pos -= 1;
+ }
+ outf("found EOCD at offset=%li", pos-data);
+ uint16_t disk_number = *(uint16_t*)(pos+4);
+ uint16_t disk_cd = *(uint16_t*)(pos+6);
+ uint16_t num_records_on_disk = *(uint16_t*)(pos+8);
+ uint16_t num_records = *(uint16_t*)(pos+10);
+ uint32_t size_cd = *(uint32_t*)(pos+12);
+ uint32_t offset_cd = *(uint32_t*)(pos+16);
+ uint16_t comment_length = *(uint16_t*)(pos+20);
+ char* comment = extract_malloc(comment_length + 1);
+ assert(comment);
+ memcpy(comment, pos+22, comment_length);
+ comment[comment_length] = 0;
+ assert(strlen(comment) == comment_length);
+ outf(" EOCD:");
+ outf(" disk_number=%i", disk_number);
+ outf(" disk_cd=%i", disk_cd);
+ outf(" num_records_on_disk=%i", num_records_on_disk);
+ outf(" num_records=%i", num_records);
+ outf(" size_cd=%i", size_cd);
+ outf(" offset_cd=%i", offset_cd);
+ outf(" comment_length=%i", comment_length);
+ outf(" comment=%s", comment);
+
+ if (pos != data + datasize - 22 - comment_length) {
+ outf("file does not end with EOCD. datasize=%zi pos-data=%li datasize-22-comment_length=%zi",
+ datasize,
+ pos-data,
+ datasize-22-comment_length
+ );
+ /* I think this isn't actually an error according to the Zip standard,
+ but zip files created by us should always pass this test. Note that
+ Word doesn't like trailing data after the EOCD record, but will repair
+ the file. */
+ assert(0);
+ }
+
+ pos = data + offset_cd;
+ int i;
+ for (i=0; i<num_records_on_disk; ++i) {
+ outf(" file %i: offset=%i", i, pos - data);
+ magic = 0x02014b50;
+ assert(!memcmp(pos, &magic, sizeof(magic)));
+ uint16_t version_made_by = *(uint16_t*)(pos+4);
+ uint16_t version_needed = *(uint16_t*)(pos+6);
+ uint16_t general_bit_flag = *(uint16_t*)(pos+8);
+ uint16_t compression_method = *(uint16_t*)(pos+10);
+ uint16_t mtime = *(uint16_t*)(pos+12);
+ uint16_t mdate = *(uint16_t*)(pos+14);
+ uint32_t crc = *(uint32_t*)(pos+16);
+ uint32_t size_compressed = *(uint32_t*)(pos+20);
+ uint32_t size_uncompressed = *(uint32_t*)(pos+24);
+ uint16_t filename_length = *(uint16_t*)(pos+28);
+ uint16_t extrafield_length = *(uint16_t*)(pos+30);
+ uint16_t filecomment_length = *(uint16_t*)(pos+32);
+ uint16_t disk_number = *(uint16_t*)(pos+34);
+ uint16_t internal_attributes = *(uint16_t*)(pos+36);
+ uint32_t external_attributes = *(uint32_t*)(pos+38);
+ uint32_t offset = *(uint32_t*)(pos+42);
+ char* filename = extract_malloc(filename_length + 1);
+ assert(filename);
+ memcpy(filename, pos+46, filename_length);
+ filename[filename_length] = 0;
+
+ char* comment = extract_malloc(filecomment_length + 1);
+ assert(comment);
+ memcpy(comment, pos+46+filename_length+extrafield_length, filecomment_length);
+ comment[filecomment_length] = 0;
+ assert(strlen(comment) == filecomment_length);
+ outf(" version_made_by=0x%x", version_made_by);
+ outf(" version_needed=0x%x", version_needed);
+ outf(" general_bit_flag=0x%x", general_bit_flag);
+ outf(" compression_method=%i", compression_method);
+ outf(" mtime=%i", mtime);
+ outf(" mdate=%i", mdate);
+ outf(" crc=%i", crc);
+ outf(" size_compressed=%i", size_compressed);
+ outf(" size_uncompressed=%i", size_uncompressed);
+ outf(" filename_length=%i", filename_length);
+ outf(" extrafield_length=%i", extrafield_length);
+ outf(" filecomment_length=%i", filecomment_length);
+ outf(" disk_number=%i", disk_number);
+ outf(" internal_attributes=0x%x", internal_attributes);
+ outf(" external_attributes=0x%x", external_attributes);
+ outf(" offset=%i", offset);
+ outf(" filename=%s", filename);
+
+ if (extrafield_length) {
+ outf( " extra:");
+ fprintf(stderr, " ");
+ char* extra = pos + 46+filename_length;
+ int j;
+ for (j=0; j<extrafield_length; ++j) {
+ unsigned char c = extra[j];
+ if (isprint(c) && c != '\\') fputc(c, stderr);
+ else fprintf(stderr, "\\x%02x", c);
+ }
+ fputc('\n', stderr);
+ }
+
+ /* show local file header. */
+ {
+ char* local_pos = data + offset;
+ outf(" local header offset=%i", i, local_pos - data);
+ magic = 0x04034b50;
+ assert(!memcmp(local_pos, &magic, sizeof(magic)));
+
+ uint16_t version_needed = *(uint16_t*)(local_pos+4);
+ uint16_t general_bit_flag = *(uint16_t*)(local_pos+6);
+ uint16_t compression_method = *(uint16_t*)(local_pos+8);
+ uint16_t mtime = *(uint16_t*)(local_pos+10);
+ uint16_t mdate = *(uint16_t*)(local_pos+12);
+ uint32_t crc = *(uint32_t*)(local_pos+14);
+ uint32_t size_compressed = *(uint32_t*)(local_pos+18);
+ uint32_t size_uncompressed = *(uint32_t*)(local_pos+22);
+ uint16_t filename_length = *(uint16_t*)(local_pos+26);
+ uint16_t extrafield_length = *(uint16_t*)(local_pos+28);
+
+ char* filename = extract_malloc(filename_length + 1);
+ assert(filename);
+ memcpy(filename, local_pos+30, filename_length);
+ filename[filename_length] = 0;
+
+ outf(" version_needed=0x%x", version_needed);
+ outf(" general_bit_flag=0x%x", general_bit_flag);
+ outf(" compression_method=%i", compression_method);
+ outf(" mtime=%i", mtime);
+ outf(" mdate=%i", mdate);
+ outf(" crc=%i", crc);
+ outf(" size_compressed=%i", size_compressed);
+ outf(" size_uncompressed=%i", size_uncompressed);
+ outf(" filename_length=%i", filename_length);
+ outf(" extrafield_length=%i", extrafield_length);
+ outf(" filecomment_length=%i", filecomment_length);
+ outf(" disk_number=%i", disk_number);
+ outf(" internal_attributes=0x%x", internal_attributes);
+ outf(" external_attributes=0x%x", external_attributes);
+ outf(" offset=%i", offset);
+ outf(" filename=%s", filename);
+
+ if (extrafield_length) {
+ outf( " extra:");
+ fprintf(stderr, " ");
+ char* extra = local_pos + 30 + filename_length;
+ int j;
+ for (j=0; j<extrafield_length; ++j) {
+ unsigned char c = extra[j];
+ if (isprint(c) && c != '\\') fputc(c, stderr);
+ else fprintf(stderr, "\\x%02x", c);
+ }
+ fputc('\n', stderr);
+ }
+
+ }
+
+ outf(" comment=%s", comment);
+
+ pos += 46 + filename_length + extrafield_length + filecomment_length;
+ }
+
+ outf("finished");
+ extract_free(&data);
+
+ return 0;
+}
+
+int main(int argc, char** argv)
+{
+ outf_level_set(1);
+ int i;
+ for (i=1; i<argc; ++i) {
+ s_show(argv[i]);
+ }
+ return 0;
+}
diff --git a/extract/src/zip.c b/extract/src/zip.c
new file mode 100644
index 00000000..013cd578
--- /dev/null
+++ b/extract/src/zip.c
@@ -0,0 +1,307 @@
+#include "../include/extract_alloc.h"
+
+#include "mem.h"
+#include "memento.h"
+#include "outf.h"
+#include "zip.h"
+
+#include <zlib.h>
+/* For crc32(). */
+
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+
+#ifdef _MSC_VER
+ #include "compat_stdint.h"
+#else
+ #include <stdint.h>
+#endif
+
+
+typedef struct
+{
+ int16_t mtime;
+ int16_t mdate;
+ int32_t crc_sum;
+ int32_t size_compressed;
+ int32_t size_uncompressed;
+ char* name;
+ uint32_t offset;
+ uint16_t attr_internal;
+ uint32_t attr_external;
+
+} extract_zip_cd_file_t;
+
+struct extract_zip_t
+{
+ extract_buffer_t* buffer;
+ extract_zip_cd_file_t* cd_files;
+ int cd_files_num;
+
+ /* errno_ is set to non-zero if any operation fails; avoids need to check
+ after every small output operation. */
+ int errno_;
+ int eof;
+
+ /* Defaults for various values in zip file headers etc. */
+ uint16_t mtime;
+ uint16_t mdate;
+ uint16_t version_creator;
+ uint16_t version_extract;
+ uint16_t general_purpose_bit_flag;
+ uint16_t file_attr_internal;
+ uint32_t file_attr_external;
+ char* archive_comment;
+};
+
+int extract_zip_open(extract_buffer_t* buffer, extract_zip_t** o_zip)
+{
+ int e = -1;
+ extract_zip_t* zip;
+ extract_alloc_t* alloc = extract_buffer_alloc(buffer);
+
+ if (extract_malloc(alloc, &zip, sizeof(*zip))) goto end;
+
+ zip->cd_files = NULL;
+ zip->cd_files_num = 0;
+ zip->buffer = buffer;
+ zip->errno_ = 0;
+ zip->eof = 0;
+
+ /* We could maybe convert current date/time to the ms-dos format required
+ here, but using zeros doesn't seem to make a difference to Word etc. */
+ zip->mtime = 0;
+ zip->mdate = 0;
+
+ /* These are all copied from command-line zip on unix. */
+ zip->version_creator = (0x3 << 8) + 30; /* 0x3 is unix, 30 means 3.0. */
+ zip->version_extract = 10; /* 10 means 1.0. */
+ zip->general_purpose_bit_flag = 0;
+ zip->file_attr_internal = 0;
+
+ /* We follow command-line zip which uses 0x81a40000 which is octal
+ 0100644:0. (0100644 is S_IFREG (regular file) plus rw-r-r. See stat(2) for
+ details.) */
+ zip->file_attr_external = (0100644 << 16) + 0;
+ if (extract_strdup(alloc, "Artifex", &zip->archive_comment)) goto end;
+
+ e = 0;
+
+ end:
+ if (e) {
+ if (zip) extract_free(alloc, &zip->archive_comment);
+ extract_free(alloc, &zip);
+ *o_zip = NULL;
+ }
+ else {
+ *o_zip = zip;
+ }
+ return e;
+}
+
+static int s_native_little_endinesss(void)
+{
+ static const char a[] = { 1, 2};
+ uint16_t b = *(uint16_t*) a;
+ if (b == 1 + 2*256) {
+ /* Native little-endiness. */
+ return 1;
+ }
+ else if (b == 2 + 1*256) {
+ /* Native big-endiness. */
+ return 0;
+ }
+ abort();
+}
+
+static int s_write(extract_zip_t* zip, const void* data, size_t data_length)
+{
+ size_t actual;
+ int e;
+ if (zip->errno_) return -1;
+ if (zip->eof) return +1;
+ e = extract_buffer_write(zip->buffer, data, data_length, &actual);
+ if (e == -1) zip->errno_ = errno;
+ if (e == +1) zip->eof = 1;
+ return e;
+}
+
+static int s_write_uint32(extract_zip_t* zip, uint32_t value)
+{
+ if (s_native_little_endinesss()) {
+ return s_write(zip, &value, sizeof(value));
+ }
+ else {
+ unsigned char value2[4] = {
+ (unsigned char) (value >> 0),
+ (unsigned char) (value >> 8),
+ (unsigned char) (value >> 16),
+ (unsigned char) (value >> 24)
+ };
+ return s_write(zip, &value2, sizeof(value2));
+ }
+}
+
+static int s_write_uint16(extract_zip_t* zip, uint16_t value)
+{
+ if (s_native_little_endinesss()) {
+ return s_write(zip, &value, sizeof(value));
+ }
+ else {
+ unsigned char value2[2] = {
+ (unsigned char) (value >> 0),
+ (unsigned char) (value >> 8)
+ };
+ return s_write(zip, &value2, sizeof(value2));
+ }
+}
+
+static int s_write_string(extract_zip_t* zip, const char* text)
+{
+ return s_write(zip, text, strlen(text));
+}
+
+
+int extract_zip_write_file(
+ extract_zip_t* zip,
+ const void* data,
+ size_t data_length,
+ const char* name
+ )
+{
+ int e = -1;
+ extract_zip_cd_file_t* cd_file = NULL;
+ extract_alloc_t* alloc = extract_buffer_alloc(zip->buffer);
+
+ if (data_length > INT_MAX) {
+ assert(0);
+ errno = EINVAL;
+ return -1;
+ }
+ /* Create central directory file header for later. */
+ if (extract_realloc2(
+ alloc,
+ &zip->cd_files,
+ sizeof(extract_zip_cd_file_t) * zip->cd_files_num,
+ sizeof(extract_zip_cd_file_t) * (zip->cd_files_num+1)
+ )) goto end;
+ cd_file = &zip->cd_files[zip->cd_files_num];
+ cd_file->name = NULL;
+
+ cd_file->mtime = zip->mtime;
+ cd_file->mdate = zip->mtime;
+ cd_file->crc_sum = (int32_t) crc32(crc32(0, NULL, 0), data, (int) data_length);
+ cd_file->size_compressed = (int) data_length;
+ cd_file->size_uncompressed = (int) data_length;
+ if (extract_strdup(alloc, name, &cd_file->name)) goto end;
+ cd_file->offset = (int) extract_buffer_pos(zip->buffer);
+ cd_file->attr_internal = zip->file_attr_internal;
+ cd_file->attr_external = zip->file_attr_external;
+ if (!cd_file->name) goto end;
+
+ /* Write local file header. */
+ {
+ const char extra_local[] = ""; /* Modify for testing. */
+ s_write_uint32(zip, 0x04034b50);
+ s_write_uint16(zip, zip->version_extract); /* Version needed to extract (minimum). */
+ s_write_uint16(zip, zip->general_purpose_bit_flag); /* General purpose bit flag */
+ s_write_uint16(zip, 0); /* Compression method */
+ s_write_uint16(zip, cd_file->mtime); /* File last modification time */
+ s_write_uint16(zip, cd_file->mdate); /* File last modification date */
+ s_write_uint32(zip, cd_file->crc_sum); /* CRC-32 of uncompressed data */
+ s_write_uint32(zip, cd_file->size_compressed); /* Compressed size */
+ s_write_uint32(zip, cd_file->size_uncompressed); /* Uncompressed size */
+ s_write_uint16(zip, (uint16_t) strlen(name)); /* File name length (n) */
+ s_write_uint16(zip, sizeof(extra_local)-1); /* Extra field length (m) */
+ s_write_string(zip, cd_file->name); /* File name */
+ s_write(zip, extra_local, sizeof(extra_local)-1); /* Extra field */
+ }
+ /* Write the (uncompressed) data. */
+ s_write(zip, data, data_length);
+
+ if (zip->errno_) e = -1;
+ else if (zip->eof) e = +1;
+ else e = 0;
+
+
+ end:
+
+ if (e) {
+ /* Leave zip->cd_files_num unchanged, so calling extract_zip_close()
+ will write out any earlier files. Free cd_file->name to avoid leak. */
+ if (cd_file) extract_free(alloc, &cd_file->name);
+ }
+ else {
+ /* cd_files[zip->cd_files_num] is valid. */
+ zip->cd_files_num += 1;
+ }
+
+ return e;
+}
+
+int extract_zip_close(extract_zip_t** pzip)
+{
+ int e = -1;
+ size_t pos;
+ size_t len;
+ int i;
+ extract_zip_t* zip = *pzip;
+ extract_alloc_t* alloc;
+ if (!zip) {
+ return 0;
+ }
+ alloc = extract_buffer_alloc(zip->buffer);
+ pos = extract_buffer_pos(zip->buffer);
+ len = 0;
+
+ /* Write Central directory file headers, freeing data as we go. */
+ for (i=0; i<zip->cd_files_num; ++i) {
+ const char extra[] = "";
+ size_t pos2 = extract_buffer_pos(zip->buffer);
+ extract_zip_cd_file_t* cd_file = &zip->cd_files[i];
+ s_write_uint32(zip, 0x02014b50);
+ s_write_uint16(zip, zip->version_creator); /* Version made by, copied from command-line zip. */
+ s_write_uint16(zip, zip->version_extract); /* Version needed to extract (minimum). */
+ s_write_uint16(zip, zip->general_purpose_bit_flag); /* General purpose bit flag */
+ s_write_uint16(zip, 0); /* Compression method */
+ s_write_uint16(zip, cd_file->mtime); /* File last modification time */
+ s_write_uint16(zip, cd_file->mdate); /* File last modification date */
+ s_write_uint32(zip, cd_file->crc_sum); /* CRC-32 of uncompressed data */
+ s_write_uint32(zip, cd_file->size_compressed); /* Compressed size */
+ s_write_uint32(zip, cd_file->size_uncompressed); /* Uncompressed size */
+ s_write_uint16(zip, (uint16_t) strlen(cd_file->name)); /* File name length (n) */
+ s_write_uint16(zip, sizeof(extra)-1); /* Extra field length (m) */
+ s_write_uint16(zip, 0); /* File comment length (k) */
+ s_write_uint16(zip, 0); /* Disk number where file starts */
+ s_write_uint16(zip, cd_file->attr_internal); /* Internal file attributes */
+ s_write_uint32(zip, cd_file->attr_external); /* External file attributes. */
+ s_write_uint32(zip, cd_file->offset); /* Offset of local file header. */
+ s_write_string(zip, cd_file->name); /* File name */
+ s_write(zip, extra, sizeof(extra)-1); /* Extra field */
+ len += extract_buffer_pos(zip->buffer) - pos2;
+ extract_free(alloc, &cd_file->name);
+ }
+ extract_free(alloc, &zip->cd_files);
+
+ /* Write End of central directory record. */
+ s_write_uint32(zip, 0x06054b50);
+ s_write_uint16(zip, 0); /* Number of this disk */
+ s_write_uint16(zip, 0); /* Disk where central directory starts */
+ s_write_uint16(zip, (uint16_t) zip->cd_files_num); /* Number of central directory records on this disk */
+ s_write_uint16(zip, (uint16_t) zip->cd_files_num); /* Total number of central directory records */
+ s_write_uint32(zip, (int) len); /* Size of central directory (bytes) */
+ s_write_uint32(zip, (int) pos); /* Offset of start of central directory, relative to start of archive */
+
+ s_write_uint16(zip, (uint16_t) strlen(zip->archive_comment)); /* Comment length (n) */
+ s_write_string(zip, zip->archive_comment);
+ extract_free(alloc, &zip->archive_comment);
+
+ if (zip->errno_) e = -1;
+ else if (zip->eof) e = +1;
+ else e = 0;
+
+ extract_free(alloc, pzip);
+
+ return e;
+}
diff --git a/extract/src/zip.h b/extract/src/zip.h
new file mode 100644
index 00000000..570f475a
--- /dev/null
+++ b/extract/src/zip.h
@@ -0,0 +1,64 @@
+#ifndef ARTIFEX_EXTRACT_ZIP
+#define ARTIFEX_EXTRACT_ZIP
+
+/* Only for internal use by extract code. */
+
+#include "../include/extract_buffer.h"
+
+#include <stddef.h>
+
+
+/* Support for creating zip file content.
+
+Content is uncompressed.
+
+Unless otherwise stated, all functions return 0 on success or -1 with errno
+set.
+*/
+
+typedef struct extract_zip_t extract_zip_t;
+/* Abstract handle for zipfile state. */
+
+
+int extract_zip_open(extract_buffer_t* buffer, extract_zip_t** o_zip);
+/* Creates an extract_zip_t that writes to specified buffer.
+
+buffer:
+ Destination for zip file content.
+o_zip:
+ Out-param.
+*/
+
+int extract_zip_write_file(
+ extract_zip_t* zip,
+ const void* data,
+ size_t data_length,
+ const char* name
+ );
+/* Writes specified data into the zip file.
+
+Returns same as extract_buffer_write(): 0 on success, +1 if short write due to
+EOF or -1 with errno set.
+
+zip:
+ From extract_zip_open().
+data:
+ File contents.
+data_length:
+ Length in bytes of file contents.
+name:
+ Name of file within the zip file.
+*/
+
+
+int extract_zip_close(extract_zip_t** pzip);
+/* Finishes writing the zip file (e.g. appends Central directory file headers
+and End of central directory record).
+
+Does not call extract_buffer_close().
+
+zip:
+ From extract_zip_open().
+*/
+
+#endif