summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
Diffstat (limited to 'extract/src/document.h')
-rw-r--r--extract/src/document.h137
1 files changed, 128 insertions, 9 deletions
diff --git a/extract/src/document.h b/extract/src/document.h
index c59348f4..2dc4f1ee 100644
--- a/extract/src/document.h
+++ b/extract/src/document.h
@@ -1,6 +1,15 @@
#ifndef ARTIFEX_EXTRACT_DOCUMENT_H
#define ARTIFEX_EXTRACT_DOCUMENT_H
+#include "../include/extract.h"
+
+#ifdef _MSC_VER
+ #include "compat_stdint.h"
+#else
+ #include <stdint.h>
+#endif
+
+
static const double pi = 3.141592653589793;
typedef struct
@@ -9,6 +18,16 @@ typedef struct
double y;
} point_t;
+const char* extract_point_string(const point_t* point);
+
+typedef struct
+{
+ point_t min;
+ point_t max;
+} rect_t;
+
+const char* extract_rect_string(const rect_t* rect);
+
typedef struct
{
double a;
@@ -19,9 +38,15 @@ typedef struct
double f;
} matrix_t;
-double matrix_expansion(matrix_t m);
+const char* extract_matrix_string(const matrix_t* matrix);
-int matrix_cmp4(const matrix_t* lhs, const matrix_t* rhs)
+double extract_matrix_expansion(matrix_t m);
+/* Returns a*d - b*c. */
+
+point_t extract_multiply_matrix_point(matrix_t m, point_t p);
+matrix_t extract_multiply_matrix_matrix(matrix_t m1, matrix_t m2);
+
+int extract_matrix_cmp4(const matrix_t* lhs, const matrix_t* rhs)
;
/* Returns zero if first four members of *lhs and *rhs are equal, otherwise
+/-1. */
@@ -48,7 +73,7 @@ typedef struct
matrix_t trm;
char* font_name;
- /* font size is matrix_expansion(trm). */
+ /* font size is extract_matrix_cmp4(trm). */
struct {
unsigned font_bold : 1;
@@ -61,14 +86,21 @@ typedef struct
} span_t;
/* List of chars that have same font and are usually adjacent. */
-char_t* span_char_last(span_t* span);
+void extract_span_init(span_t* span);
+
+void extract_span_free(extract_alloc_t* alloc, span_t** pspan);
+/* Frees a span_t, returning with *pspan set to NULL. */
+
+void extract_spans_free(extract_alloc_t* alloc, span_t*** pspans, int spans_num);
+
+char_t* extract_span_char_last(span_t* span);
/* Returns last character in span. */
-int span_append_c(extract_alloc_t* alloc, span_t* span, int c);
+int extract_span_append_c(extract_alloc_t* alloc, span_t* span, int c);
/* Appends new char_t to an span_t with .ucs=c and all other
fields zeroed. */
-const char* span_string(extract_alloc_t* alloc, span_t* span);
+const char* extract_span_string(extract_alloc_t* alloc, span_t* span);
/* Returns static string containing info about span_t. */
typedef struct
@@ -78,10 +110,13 @@ typedef struct
} line_t;
/* List of spans that are aligned on same line. */
-span_t* line_span_first(line_t* line);
+void extract_line_free(extract_alloc_t* alloc, line_t** pline);
+void extract_lines_free(extract_alloc_t* alloc, line_t*** plines, int lines_num);
+
+span_t* extract_line_span_first(line_t* line);
/* Returns first span in a line. */
-span_t* line_span_last(line_t* line);
+span_t* extract_line_span_last(line_t* line);
/* Returns last span in a line. */
typedef struct
@@ -112,6 +147,61 @@ typedef struct
<name> and <id> are created to be unique identifiers for use in generated docx
file. */
+void extract_image_clear(extract_alloc_t* alloc, image_t* image);
+
+typedef struct
+{
+ float color;
+ rect_t rect;
+} tableline_t;
+/* A line that is part of a table. */
+
+typedef struct
+{
+ tableline_t* tablelines;
+ int tablelines_num;
+} tablelines_t;
+
+
+typedef struct
+{
+ rect_t rect;
+
+ /* If left/above is true, this cell is not obscured by cell to its
+ left/above. */
+ uint8_t left;
+ uint8_t above;
+
+ /* extend_right and extend_down are 1 for normal cells, 2 for cells which
+ extend right/down to cover an additional column/row, 3 to cover two
+ additional columns/rows etc. */
+ int extend_right;
+ int extend_down;
+
+ /* Contents of this cell. */
+ line_t** lines;
+ int lines_num;
+ paragraph_t** paragraphs;
+ int paragraphs_num;
+} cell_t;
+/* A cell within a table. */
+
+void extract_cell_init(cell_t* cell);
+void extract_cell_free(extract_alloc_t* alloc, cell_t** pcell);
+
+typedef struct
+{
+ point_t pos; /* top-left. */
+
+ /* Array of cells_num_x*cells_num_y cells; cell (x, y) is:
+ cells_num_x * y + x.
+ */
+ cell_t** cells;
+ int cells_num_x;
+ int cells_num_y;
+} table_t;
+
+
typedef struct
{
span_t** spans;
@@ -129,10 +219,17 @@ typedef struct
int paragraphs_num;
/* These refer to items in .lines. Initially empty, then set
by extract_join(). */
+
+ tablelines_t tablelines_horizontal;
+ tablelines_t tablelines_vertical;
+
+ table_t** tables;
+ int tables_num;
} extract_page_t;
/* A page. Contains different representations of the list of spans. NB not
-called page_t because this clashes with a system type on hpux. */
++called page_t because this clashes with a system type on hpux. */
+
typedef struct
{
@@ -150,9 +247,31 @@ typedef struct
int imagetypes_num;
} images_t;
+
int extract_document_join(extract_alloc_t* alloc, document_t* document);
+/* This does all the work of finding paragraphs and tables. */
double extract_matrices_to_font_size(matrix_t* ctm, matrix_t* trm);
+/* Things below here are used when generating output. */
+
+typedef struct
+{
+ char* name;
+ double size;
+ int bold;
+ int italic;
+} font_t;
+/* Basic information about current font. */
+
+typedef struct
+{
+ font_t font;
+ matrix_t* ctm_prev;
+} content_state_t;
+/* Used to keep track of font information when writing paragraphs of odt
+content, e.g. so we know whether a font has changed so need to start a new odt
+span. */
+
#endif