+//
+// Alternative licensing terms are available from the licensor.
+// For commercial licensing, see or contact
+// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
+// CA 94129, USA, for further information.
+
+#ifndef MUPDF_FITZ_STORY_H
+#define MUPDF_FITZ_STORY_H
+
+#include "mupdf/fitz/system.h"
+#include "mupdf/fitz/context.h"
+#include "mupdf/fitz/buffer.h"
+#include "mupdf/fitz/device.h"
+#include "mupdf/fitz/xml.h"
+#include "mupdf/fitz/archive.h"
+
+/*
+ This header file provides an API for laying out and placing styled
+ text on a page, or pages.
+
+ First a text story is created from some styled HTML.
+
+ Next, this story can be laid out into a given rectangle (possibly
+ retrying several times with updated rectangles as required).
+
+ Next, the laid out story can be drawn to a given device.
+
+ In the case where the text story cannot be fitted into the given
+ areas all at once, these two steps can be repeated multiple
+ times until the text story is completely consumed.
+
+ Finally, the text story can be dropped in the usual fashion.
+*/
+
+
+typedef struct fz_story fz_story;
+
+/*
+ Create a text story using styled html.
+
+ Passing a NULL buffer will be treated as an empty document.
+ Passing a NULL user_css will be treated as an empty CSS string.
+ A non-NULL dir will allow images etc to be loaded. The
+ story keeps its own reference, so the caller can drop its
+ reference after this call.
+*/
+fz_story *fz_new_story(fz_context *ctx, fz_buffer *buf, const char *user_css, float em, fz_archive *dir);
+
+/*
+ Retrieve the warnings given from parsing this story.
+
+ If there are warnings, this will be returned as a NULL terminated
+ C string. If there are no warnings, this will return NULL.
+
+ These warnings will not be complete until AFTER any DOM manipulations
+ have been completed.
+
+ This function does not need to be called, but once it has been
+ the DOM is no longer accessible, and any fz_xml pointer
+ retrieved from fz_story_docment is no longer valid.
+*/
+const char *fz_story_warnings(fz_context *ctx, fz_story *story);
+
+/*
+ Equivalent to fz_place_story_flags with flags being 0.
+*/
+int fz_place_story(fz_context *ctx, fz_story *story, fz_rect where, fz_rect *filled);
+
+/*
+ Place (or continue placing) a story into the supplied rectangle
+ 'where', updating 'filled' with the actual area that was used.
+ Returns zero (FZ_PLACE_STORY_RETURN_ALL_FITTED) if all the
+ content fitted, non-zero if there is more to fit.
+
+ If the FZ_PLACE_STORY_FLAG_NO_OVERFLOW flag is set, then a
+ return code of FZ_PLACE_STORY_RETURN_OVERFLOW_WIDTH will be
+ returned when the next item (word) to be placed would not fit
+ in a rectangle of that given width.
+
+ Note, that filled may not be returned as a strict subset of
+ where, due to padding/margins at the bottom of pages, and
+ non-wrapping content extending to the right.
+
+ Subsequent calls will attempt to place the same section of story
+ again and again, until the placed story is drawn using fz_draw_story,
+ whereupon subsequent calls to fz_place_story will attempt to place
+ the unused remainder of the story.
+
+ After this function is called, the DOM is no longer accessible,
+ and any fz_xml pointer retrieved from fz_story_document is no
+ longer valid.
+
+ flags: Additional flags controlling layout. Pass 0 if none
+ required.
+*/
+int fz_place_story_flags(fz_context *ctx, fz_story *story, fz_rect where, fz_rect *filled, int flags);
+
+enum
+{
+ /* Avoid the usual HTML behaviour of overflowing the box horizontally
+ * in some circumstances. We now abort the place in such cases and
+ * return with */
+ FZ_PLACE_STORY_FLAG_NO_OVERFLOW = 1,
+
+ /* Specific return codes from fz_place_story_flags. Also
+ * "non-zero" for 'more to fit'. */
+ FZ_PLACE_STORY_RETURN_ALL_FITTED = 0,
+ FZ_PLACE_STORY_RETURN_OVERFLOW_WIDTH = 2
+};
+
+/*
+ Draw the placed story to the given device.
+
+ This moves the point at which subsequent calls to fz_place_story
+ will restart placing to the end of what has just been output.
+*/
+void fz_draw_story(fz_context *ctx, fz_story *story, fz_device *dev, fz_matrix ctm);
+
+/*
+ Reset the position within the story at which the next layout call
+ will continue to the start of the story.
+*/
+void fz_reset_story(fz_context *ctx, fz_story *story);
+
+/*
+ Drop the html story.
+*/
+void fz_drop_story(fz_context *ctx, fz_story *story);
+
+/*
+ Get a borrowed reference to the DOM document pointer for this
+ story. Do not destroy this reference, it will be destroyed
+ when the story is laid out.
+
+ This only makes sense before the first placement of the story
+ or retrieval of the warnings. Once either of those things happen
+ the DOM representation is destroyed.
+*/
+fz_xml *fz_story_document(fz_context *ctx, fz_story *story);
+
+
+typedef struct
+{
+ /* The overall depth of this element in the box structure.
+ * This can be used to compare the relative depths of different
+ * elements, but shouldn't be relied upon not to change between
+ * different versions of MuPDF. */
+ int depth;
+
+ /* The heading level of this element. 0 if not a header, or 1-6 for h1-h6. */
+ int heading;
+
+ /* The id for this element. */
+ const char *id;
+
+ /* The href for this element. */
+ const char *href;
+
+ /* The rectangle for this element. */
+ fz_rect rect;
+
+ /* The immediate text for this element. */
+ const char *text;
+
+ /* This indicates whether this opens and/or closes this element.
+ *
+ * As we traverse the tree we do a depth first search. In order for
+ * the caller of fz_story_positions to know whether a given element
+ * is inside another element, we therefore announce 'start' and 'stop'
+ * for each element. For instance, with:
+ *
+ *
+ *
Chapter 1
...
+ * Chapter 2
...
+ * ...
+ *
+ *
+ *
Chapter 10
...
+ * Chapter 11
...
+ * ...
+ *
+ *
+ * We would announce:
+ * + id='part1' (open)
+ * + header=1 "Chapter 1" (open/close)
+ * + header=1 "Chapter 2" (open/close)
+ * ...
+ * + id='part1' (close)
+ * + id='part2' (open)
+ * + header=1 "Chapter 10" (open/close)
+ * + header=1 "Chapter 11" (open/close)
+ * ...
+ * + id='part2' (close)
+ *
+ * If bit 0 is set, then this 'opens' the element.
+ * If bit 1 is set, then this 'closes' the element.
+ */
+ int open_close;
+
+ /* A count of the number of rectangles that the layout code has split the
+ * story into so far. After the first layout, this will be 1. If a
+ * layout is repeated, this number is not incremented. */
+ int rectangle_num;
+} fz_story_element_position;
+
+typedef void (fz_story_position_callback)(fz_context *ctx, void *arg, const fz_story_element_position *);
+
+/*
+ Enumerate the positions for key blocks in the story.
+
+ This will cause the supplied function to be called with details of each
+ element in the story that is either a header, or has an id.
+*/
+void fz_story_positions(fz_context *ctx, fz_story *story, fz_story_position_callback *cb, void *arg);
+
+#endif
diff --git a/include/mupdf/fitz/stream.h b/include/mupdf/fitz/stream.h
index b0571e0..13d1c77 100644
--- a/include/mupdf/fitz/stream.h
+++ b/include/mupdf/fitz/stream.h
@@ -1,3 +1,25 @@
+// Copyright (C) 2004-2021 Artifex Software, Inc.
+//
+// This file is part of MuPDF.
+//
+// MuPDF is free software: you can redistribute it and/or modify it under the
+// terms of the GNU Affero General Public License as published by the Free
+// Software Foundation, either version 3 of the License, or (at your option)
+// any later version.
+//
+// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with MuPDF. If not, see
+//
+// Alternative licensing terms are available from the licensor.
+// For commercial licensing, see or contact
+// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
+// CA 94129, USA, for further information.
+
#ifndef MUPDF_FITZ_STREAM_H
#define MUPDF_FITZ_STREAM_H
@@ -5,12 +27,12 @@
#include "mupdf/fitz/context.h"
#include "mupdf/fitz/buffer.h"
-/*
- fz_file_exists: Return true if the named file exists and is readable.
+/**
+ Return true if the named file exists and is readable.
*/
int fz_file_exists(fz_context *ctx, const char *path);
-/*
+/**
fz_stream is a buffered reader capable of seeking in both
directions.
@@ -19,24 +41,37 @@ int fz_file_exists(fz_context *ctx, const char *path);
Only the data between rp and wp is valid.
*/
-typedef struct fz_stream_s fz_stream;
+typedef struct fz_stream fz_stream;
-/*
- fz_open_file: Open the named file and wrap it in a stream.
+/**
+ Open the named file and wrap it in a stream.
- filename: Path to a file. On non-Windows machines the filename should
- be exactly as it would be passed to fopen(2). On Windows machines, the
- path should be UTF-8 encoded so that non-ASCII characters can be
- represented. Other platforms do the encoding as standard anyway (and
- in most cases, particularly for MacOS and Linux, the encoding they
- use is UTF-8 anyway).
+ filename: Path to a file. On non-Windows machines the filename
+ should be exactly as it would be passed to fopen(2). On Windows
+ machines, the path should be UTF-8 encoded so that non-ASCII
+ characters can be represented. Other platforms do the encoding
+ as standard anyway (and in most cases, particularly for MacOS
+ and Linux, the encoding they use is UTF-8 anyway).
*/
fz_stream *fz_open_file(fz_context *ctx, const char *filename);
-fz_stream *fz_open_file_progressive(fz_context *ctx, const char *filename, int bps);
+/**
+ Do the same as fz_open_file, but delete the file upon close.
+*/
+fz_stream *fz_open_file_autodelete(fz_context *ctx, const char *filename);
-/*
- fz_open_file_w: Open the named file and wrap it in a stream.
+/**
+ Open the named file and wrap it in a stream.
+
+ Does the same as fz_open_file, but in the event the file
+ does not open, it will return NULL rather than throw an
+ exception.
+*/
+fz_stream *fz_try_open_file(fz_context *ctx, const char *name);
+
+#ifdef _WIN32
+/**
+ Open the named file and wrap it in a stream.
This function is only available when compiling for Win32.
@@ -44,12 +79,21 @@ fz_stream *fz_open_file_progressive(fz_context *ctx, const char *filename, int b
to _wfopen().
*/
fz_stream *fz_open_file_w(fz_context *ctx, const wchar_t *filename);
+#endif /* _WIN32 */
-/*
- fz_open_memory: Open a block of memory as a stream.
+/**
+ Return the filename (UTF-8 encoded) from which a stream was opened.
- data: Pointer to start of data block. Ownership of the data block is
- NOT passed in.
+ Returns NULL if the filename is not available (or the stream was
+ opened from a source other than a file).
+*/
+const char *fz_stream_filename(fz_context *ctx, fz_stream *stm);
+
+/**
+ Open a block of memory as a stream.
+
+ data: Pointer to start of data block. Ownership of the data
+ block is NOT passed in.
len: Number of bytes in data block.
@@ -58,19 +102,19 @@ fz_stream *fz_open_file_w(fz_context *ctx, const wchar_t *filename);
*/
fz_stream *fz_open_memory(fz_context *ctx, const unsigned char *data, size_t len);
-/*
- fz_open_buffer: Open a buffer as a stream.
+/**
+ Open a buffer as a stream.
- buf: The buffer to open. Ownership of the buffer is NOT passed in
- (this function takes its own reference).
+ buf: The buffer to open. Ownership of the buffer is NOT passed
+ in (this function takes its own reference).
Returns pointer to newly created stream. May throw exceptions on
failure to allocate.
*/
fz_stream *fz_open_buffer(fz_context *ctx, fz_buffer *buf);
-/*
- fz_open_leecher: Attach a filter to a stream that will store any
+/**
+ Attach a filter to a stream that will store any
characters read from the stream into the supplied buffer.
chain: The underlying stream to leech from.
@@ -83,33 +127,46 @@ fz_stream *fz_open_buffer(fz_context *ctx, fz_buffer *buf);
*/
fz_stream *fz_open_leecher(fz_context *ctx, fz_stream *chain, fz_buffer *buf);
-/*
- fz_drop_stream: Close an open stream.
+/**
+ Increments the reference count for a stream. Returns the same
+ pointer.
- Drops a reference for the stream. Once no references remain
- the stream will be closed, as will any file descriptor the
- stream is using.
+ Never throws exceptions.
+*/
+fz_stream *fz_keep_stream(fz_context *ctx, fz_stream *stm);
+
+/**
+ Decrements the reference count for a stream.
+
+ When the reference count for the stream hits zero, frees the
+ storage used for the fz_stream itself, and (usually)
+ releases the underlying resources that the stream is based upon
+ (depends on the method used to open the stream initially).
*/
void fz_drop_stream(fz_context *ctx, fz_stream *stm);
-/*
- fz_tell: return the current reading position within a stream
+/**
+ return the current reading position within a stream
*/
int64_t fz_tell(fz_context *ctx, fz_stream *stm);
-/*
- fz_seek: Seek within a stream.
+/**
+ Seek within a stream.
stm: The stream to seek within.
offset: The offset to seek to.
whence: From where the offset is measured (see fseek).
+ SEEK_SET - start of stream.
+ SEEK_CUR - current position.
+ SEEK_END - end of stream.
+
*/
void fz_seek(fz_context *ctx, fz_stream *stm, int64_t offset, int whence);
-/*
- fz_read: Read from a stream into a given data block.
+/**
+ Read from a stream into a given data block.
stm: The stream to read from.
@@ -121,8 +178,8 @@ void fz_seek(fz_context *ctx, fz_stream *stm, int64_t offset, int whence);
*/
size_t fz_read(fz_context *ctx, fz_stream *stm, unsigned char *data, size_t len);
-/*
- fz_skip: Read from a stream discarding data.
+/**
+ Read from a stream discarding data.
stm: The stream to read from.
@@ -132,8 +189,8 @@ size_t fz_read(fz_context *ctx, fz_stream *stm, unsigned char *data, size_t len)
*/
size_t fz_skip(fz_context *ctx, fz_stream *stm, size_t len);
-/*
- fz_read_all: Read all of a stream into a buffer.
+/**
+ Read all of a stream into a buffer.
stm: The stream to read from
@@ -144,12 +201,20 @@ size_t fz_skip(fz_context *ctx, fz_stream *stm, size_t len);
*/
fz_buffer *fz_read_all(fz_context *ctx, fz_stream *stm, size_t initial);
-/*
- fz_read_file: Read all the contents of a file into a buffer.
+/**
+ Read all the contents of a file into a buffer.
*/
fz_buffer *fz_read_file(fz_context *ctx, const char *filename);
-/*
+/**
+ Read all the contents of a file into a buffer.
+
+ Returns NULL if the file does not exist, otherwise
+ behaves exactly as fz_read_file.
+*/
+fz_buffer *fz_try_read_file(fz_context *ctx, const char *filename);
+
+/**
fz_read_[u]int(16|24|32|64)(_le)?
Read a 16/32/64 bit signed/unsigned integer from stream,
@@ -175,41 +240,40 @@ int16_t fz_read_int16_le(fz_context *ctx, fz_stream *stm);
int32_t fz_read_int32_le(fz_context *ctx, fz_stream *stm);
int64_t fz_read_int64_le(fz_context *ctx, fz_stream *stm);
-/*
- fz_read_string: Read a null terminated string from the stream into
+float fz_read_float_le(fz_context *ctx, fz_stream *stm);
+float fz_read_float(fz_context *ctx, fz_stream *stm);
+
+/**
+ Read a null terminated string from the stream into
a buffer of a given length. The buffer will be null terminated.
- Throws on failure (including the failure to fit the entire string
- including the terminator into the buffer).
+ Throws on failure (including the failure to fit the entire
+ string including the terminator into the buffer).
*/
void fz_read_string(fz_context *ctx, fz_stream *stm, char *buffer, int len);
-enum
-{
- FZ_STREAM_META_PROGRESSIVE = 1,
- FZ_STREAM_META_LENGTH = 2
-};
+/**
+ Read a utf-8 rune from a stream.
-/*
- fz_stream_meta: Perform a meta call on a stream (typically to
- request meta information about a stream).
-
- stm: The stream to query.
-
- key: The meta request identifier.
-
- size: Meta request specific parameter - typically the size of
- the data block pointed to by ptr.
-
- ptr: Meta request specific parameter - typically a pointer to
- a block of data to be filled in.
-
- Returns -1 if this stream does not support this meta operation,
- or a meta operation specific return value.
+ In the event of encountering badly formatted utf-8 codes
+ (such as a leading code with an unexpected number of following
+ codes) no error/exception is given, but undefined values may be
+ returned.
*/
-int fz_stream_meta(fz_context *ctx, fz_stream *stm, int key, int size, void *ptr);
+int fz_read_rune(fz_context *ctx, fz_stream *in);
-/*
- fz_stream_next_fn: A function type for use when implementing
+/**
+ Read a utf-16 rune from a stream. (little endian and
+ big endian respectively).
+
+ In the event of encountering badly formatted utf-16 codes
+ (mismatched surrogates) no error/exception is given, but
+ undefined values may be returned.
+*/
+int fz_read_utf16_le(fz_context *ctx, fz_stream *stm);
+int fz_read_utf16_be(fz_context *ctx, fz_stream *stm);
+
+/**
+ A function type for use when implementing
fz_streams. The supplied function of this type is called
whenever data is required, and the current buffer is empty.
@@ -226,8 +290,8 @@ int fz_stream_meta(fz_context *ctx, fz_stream *stm, int key, int size, void *ptr
*/
typedef int (fz_stream_next_fn)(fz_context *ctx, fz_stream *stm, size_t max);
-/*
- fz_stream_drop_fn: A function type for use when implementing
+/**
+ A function type for use when implementing
fz_streams. The supplied function of this type is called
when the stream is dropped, to release the stream specific
state information.
@@ -236,8 +300,8 @@ typedef int (fz_stream_next_fn)(fz_context *ctx, fz_stream *stm, size_t max);
*/
typedef void (fz_stream_drop_fn)(fz_context *ctx, void *state);
-/*
- fz_stream_seek_fn: A function type for use when implementing
+/**
+ A function type for use when implementing
fz_streams. The supplied function of this type is called when
fz_seek is requested, and the arguments are as defined for
fz_seek.
@@ -246,21 +310,12 @@ typedef void (fz_stream_drop_fn)(fz_context *ctx, void *state);
*/
typedef void (fz_stream_seek_fn)(fz_context *ctx, fz_stream *stm, int64_t offset, int whence);
-/*
- fz_stream_meta_fn: A function type for use when implementing
- fz_streams. The supplied function of this type is called when
- fz_meta is requested, and the arguments are as defined for
- fz_meta.
-
- The stream can find it's private state in stm->state.
-*/
-typedef int (fz_stream_meta_fn)(fz_context *ctx, fz_stream *stm, int key, int size, void *ptr);
-
-struct fz_stream_s
+struct fz_stream
{
int refs;
int error;
int eof;
+ int progressive;
int64_t pos;
int avail;
int bits;
@@ -269,11 +324,10 @@ struct fz_stream_s
fz_stream_next_fn *next;
fz_stream_drop_fn *drop;
fz_stream_seek_fn *seek;
- fz_stream_meta_fn *meta;
};
-/*
- fz_new_stream: Create a new stream object with the given
+/**
+ Create a new stream object with the given
internal state and function pointers.
state: Internal state (opaque to everything but implementation).
@@ -287,10 +341,8 @@ struct fz_stream_s
*/
fz_stream *fz_new_stream(fz_context *ctx, void *state, fz_stream_next_fn *next, fz_stream_drop_fn *drop);
-fz_stream *fz_keep_stream(fz_context *ctx, fz_stream *stm);
-
-/*
- fz_read_best: Attempt to read a stream into a buffer. If truncated
+/**
+ Attempt to read a stream into a buffer. If truncated
is NULL behaves as fz_read_all, sets a truncated flag in case of
error.
@@ -300,21 +352,37 @@ fz_stream *fz_keep_stream(fz_context *ctx, fz_stream *stm);
truncated: Flag to store success/failure indication in.
+ worst_case: 0 for unknown, otherwise an upper bound for the
+ size of the stream.
+
Returns a buffer created from reading from the stream.
*/
-fz_buffer *fz_read_best(fz_context *ctx, fz_stream *stm, size_t initial, int *truncated);
+fz_buffer *fz_read_best(fz_context *ctx, fz_stream *stm, size_t initial, int *truncated, size_t worst_case);
-/*
- fz_read_line: Read a line from stream into the buffer until either a
- terminating newline or EOF, which it replaces with a null byte ('\0').
+/**
+ Read a line from stream into the buffer until either a
+ terminating newline or EOF, which it replaces with a null byte
+ ('\0').
- Returns buf on success, and NULL when end of file occurs while no characters
- have been read.
+ Returns buf on success, and NULL when end of file occurs while
+ no characters have been read.
*/
char *fz_read_line(fz_context *ctx, fz_stream *stm, char *buf, size_t max);
-/*
- fz_available: Ask how many bytes are available immediately from
+/**
+ Skip over a given string in a stream. Return 0 if successfully
+ skipped, non-zero otherwise. As many characters will be skipped
+ over as matched in the string.
+*/
+int fz_skip_string(fz_context *ctx, fz_stream *stm, const char *str);
+
+/**
+ Skip over whitespace (bytes <= 32) in a stream.
+*/
+void fz_skip_space(fz_context *ctx, fz_stream *stm);
+
+/**
+ Ask how many bytes are available immediately from
a given stream.
stm: The stream to read from.
@@ -343,6 +411,7 @@ static inline size_t fz_available(fz_context *ctx, fz_stream *stm, size_t max)
fz_catch(ctx)
{
fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
+ fz_report_error(ctx);
fz_warn(ctx, "read error; treating as end of file");
stm->error = 1;
c = EOF;
@@ -356,8 +425,8 @@ static inline size_t fz_available(fz_context *ctx, fz_stream *stm, size_t max)
return stm->wp - stm->rp;
}
-/*
- fz_read_byte: Read the next byte from a stream.
+/**
+ Read the next byte from a stream.
stm: The stream t read from.
@@ -377,6 +446,7 @@ static inline int fz_read_byte(fz_context *ctx, fz_stream *stm)
fz_catch(ctx)
{
fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
+ fz_report_error(ctx);
fz_warn(ctx, "read error; treating as end of file");
stm->error = 1;
c = EOF;
@@ -386,8 +456,8 @@ static inline int fz_read_byte(fz_context *ctx, fz_stream *stm)
return c;
}
-/*
- fz_peek_byte: Peek at the next byte in a stream.
+/**
+ Peek at the next byte in a stream.
stm: The stream to peek at.
@@ -411,6 +481,7 @@ static inline int fz_peek_byte(fz_context *ctx, fz_stream *stm)
fz_catch(ctx)
{
fz_rethrow_if(ctx, FZ_ERROR_TRYLATER);
+ fz_report_error(ctx);
fz_warn(ctx, "read error; treating as end of file");
stm->error = 1;
c = EOF;
@@ -420,8 +491,8 @@ static inline int fz_peek_byte(fz_context *ctx, fz_stream *stm)
return c;
}
-/*
- fz_unread_byte: Unread the single last byte successfully
+/**
+ Unread the single last byte successfully
read from a stream. Do not call this without having
successfully read a byte.
@@ -432,6 +503,13 @@ static inline void fz_unread_byte(fz_context *ctx FZ_UNUSED, fz_stream *stm)
stm->rp--;
}
+/**
+ Query if the stream has reached EOF (during normal bytewise
+ reading).
+
+ See fz_is_eof_bits for the equivalent function for bitwise
+ reading.
+*/
static inline int fz_is_eof(fz_context *ctx, fz_stream *stm)
{
if (stm->rp == stm->wp)
@@ -443,8 +521,8 @@ static inline int fz_is_eof(fz_context *ctx, fz_stream *stm)
return 0;
}
-/*
- fz_read_bits: Read the next n bits from a stream (assumed to
+/**
+ Read the next n bits from a stream (assumed to
be packed most significant bit first).
stm: The stream to read from.
@@ -486,8 +564,8 @@ static inline unsigned int fz_read_bits(fz_context *ctx, fz_stream *stm, int n)
return x;
}
-/*
- fz_read_rbits: Read the next n bits from a stream (assumed to
+/**
+ Read the next n bits from a stream (assumed to
be packed least significant bit first).
stm: The stream to read from.
@@ -535,8 +613,8 @@ static inline unsigned int fz_read_rbits(fz_context *ctx, fz_stream *stm, int n)
return x;
}
-/*
- fz_sync_bits: Called after reading bits to tell the stream
+/**
+ Called after reading bits to tell the stream
that we are about to return to reading bytewise. Resyncs
the stream to whole byte boundaries.
*/
@@ -545,9 +623,24 @@ static inline void fz_sync_bits(fz_context *ctx FZ_UNUSED, fz_stream *stm)
stm->avail = 0;
}
+/**
+ Query if the stream has reached EOF (during bitwise
+ reading).
+
+ See fz_is_eof for the equivalent function for bytewise
+ reading.
+*/
static inline int fz_is_eof_bits(fz_context *ctx, fz_stream *stm)
{
return fz_is_eof(ctx, stm) && (stm->avail == 0 || stm->bits == EOF);
}
+/* Implementation details: subject to change. */
+
+/**
+ Create a stream from a FILE * that will not be closed
+ when the stream is dropped.
+*/
+fz_stream *fz_open_file_ptr_no_close(fz_context *ctx, FILE *file);
+
#endif
diff --git a/include/mupdf/fitz/string-util.h b/include/mupdf/fitz/string-util.h
index 13941d6..4acc644 100644
--- a/include/mupdf/fitz/string-util.h
+++ b/include/mupdf/fitz/string-util.h
@@ -1,33 +1,66 @@
+// Copyright (C) 2004-2022 Artifex Software, Inc.
+//
+// This file is part of MuPDF.
+//
+// MuPDF is free software: you can redistribute it and/or modify it under the
+// terms of the GNU Affero General Public License as published by the Free
+// Software Foundation, either version 3 of the License, or (at your option)
+// any later version.
+//
+// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with MuPDF. If not, see
+//
+// Alternative licensing terms are available from the licensor.
+// For commercial licensing, see or contact
+// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
+// CA 94129, USA, for further information.
+
#ifndef MUPDF_FITZ_STRING_H
#define MUPDF_FITZ_STRING_H
#include "mupdf/fitz/system.h"
+#include "mupdf/fitz/context.h"
-/* The Unicode character used to incoming character whose value is unknown or unrepresentable. */
+/* The Unicode character used to incoming character whose value is
+ * unknown or unrepresentable. */
#define FZ_REPLACEMENT_CHARACTER 0xFFFD
-/*
+/**
Safe string functions
*/
-/*
- fz_strsep: Given a pointer to a C string (or a pointer to NULL) break
- it at the first occurrence of a delimiter char (from a given set).
+/**
+ Return strlen(s), if that is less than maxlen, or maxlen if
+ there is no null byte ('\0') among the first maxlen bytes.
+*/
+size_t fz_strnlen(const char *s, size_t maxlen);
- stringp: Pointer to a C string pointer (or NULL). Updated on exit to
- point to the first char of the string after the delimiter that was
- found. The string pointed to by stringp will be corrupted by this
- call (as the found delimiter will be overwritten by 0).
+/**
+ Given a pointer to a C string (or a pointer to NULL) break
+ it at the first occurrence of a delimiter char (from a given
+ set).
+
+ stringp: Pointer to a C string pointer (or NULL). Updated on
+ exit to point to the first char of the string after the
+ delimiter that was found. The string pointed to by stringp will
+ be corrupted by this call (as the found delimiter will be
+ overwritten by 0).
delim: A C string of acceptable delimiter characters.
- Returns a pointer to a C string containing the chars of stringp up
- to the first delimiter char (or the end of the string), or NULL.
+ Returns a pointer to a C string containing the chars of stringp
+ up to the first delimiter char (or the end of the string), or
+ NULL.
*/
char *fz_strsep(char **stringp, const char *delim);
-/*
- fz_strlcpy: Copy at most n-1 chars of a string into a destination
+/**
+ Copy at most n-1 chars of a string into a destination
buffer with null termination, returning the real length of the
initial string (excluding terminator).
@@ -41,8 +74,8 @@ char *fz_strsep(char **stringp, const char *delim);
*/
size_t fz_strlcpy(char *dst, const char *src, size_t n);
-/*
- fz_strlcat: Concatenate 2 strings, with a maximum length.
+/**
+ Concatenate 2 strings, with a maximum length.
dst: pointer to first string in a buffer of n bytes.
@@ -50,50 +83,108 @@ size_t fz_strlcpy(char *dst, const char *src, size_t n);
n: Size (in bytes) of buffer that dst is in.
- Returns the real length that a concatenated dst + src would have been
- (not including terminator).
+ Returns the real length that a concatenated dst + src would have
+ been (not including terminator).
*/
size_t fz_strlcat(char *dst, const char *src, size_t n);
-/*
- fz_dirname: extract the directory component from a path.
+/**
+ Find the start of the first occurrence of the substring needle in haystack.
+*/
+void *fz_memmem(const void *haystack, size_t haystacklen, const void *needle, size_t needlelen);
+
+/**
+ extract the directory component from a path.
*/
void fz_dirname(char *dir, const char *path, size_t dirsize);
-/*
- fz_urldecode: decode url escapes.
+/**
+ Find the filename component in a path.
+*/
+const char *fz_basename(const char *path);
+
+/**
+ Like fz_decode_uri_component but in-place.
*/
char *fz_urldecode(char *url);
-/*
- fz_format_output_path: create output file name using a template.
- If the path contains %[0-9]*d, the first such pattern will be replaced
- with the page number. If the template does not contain such a pattern, the page
- number will be inserted before the file suffix. If the template does not have
- a file suffix, the page number will be added to the end.
+/**
+ * Return a new string representing the unencoded version of the given URI.
+ * This decodes all escape sequences except those that would result in a reserved
+ * character that are part of the URI syntax (; / ? : @ & = + $ , #).
+ */
+char *fz_decode_uri(fz_context *ctx, const char *s);
+
+/**
+ * Return a new string representing the unencoded version of the given URI component.
+ * This decodes all escape sequences!
+ */
+char *fz_decode_uri_component(fz_context *ctx, const char *s);
+
+/**
+ * Return a new string representing the provided string encoded as a URI.
+ */
+char *fz_encode_uri(fz_context *ctx, const char *s);
+
+/**
+ * Return a new string representing the provided string encoded as an URI component.
+ * This also encodes the special reserved characters (; / ? : @ & = + $ , #).
+ */
+char *fz_encode_uri_component(fz_context *ctx, const char *s);
+
+/**
+ * Return a new string representing the provided string encoded as an URI path name.
+ * This also encodes the special reserved characters except /.
+ */
+char *fz_encode_uri_pathname(fz_context *ctx, const char *s);
+
+/**
+ create output file name using a template.
+
+ If the path contains %[0-9]*d, the first such pattern will be
+ replaced with the page number. If the template does not contain
+ such a pattern, the page number will be inserted before the
+ filename extension. If the template does not have a filename
+ extension, the page number will be added to the end.
*/
void fz_format_output_path(fz_context *ctx, char *path, size_t size, const char *fmt, int page);
-/*
- fz_cleanname: rewrite path to the shortest string that names the same path.
+/**
+ rewrite path to the shortest string that names the same path.
- Eliminates multiple and trailing slashes, interprets "." and "..".
- Overwrites the string in place.
+ Eliminates multiple and trailing slashes, interprets "." and
+ "..". Overwrites the string in place.
*/
char *fz_cleanname(char *name);
-/*
+/**
+ rewrite path to the shortest string that names the same path.
+
+ Eliminates multiple and trailing slashes, interprets "." and
+ "..". Allocates a new string that the caller must free.
+*/
+char *fz_cleanname_strdup(fz_context *ctx, const char *name);
+
+/**
+ Resolve a path to an absolute file name.
+ The resolved path buffer must be of at least PATH_MAX size.
+*/
+char *fz_realpath(const char *path, char *resolved_path);
+
+/**
Case insensitive (ASCII only) string comparison.
*/
int fz_strcasecmp(const char *a, const char *b);
+int fz_strncasecmp(const char *a, const char *b, size_t n);
-/*
- FZ_UTFMAX: Maximum number of bytes in a decoded rune (maximum length returned by fz_chartorune).
+/**
+ FZ_UTFMAX: Maximum number of bytes in a decoded rune (maximum
+ length returned by fz_chartorune).
*/
enum { FZ_UTFMAX = 4 };
-/*
- fz_chartorune: UTF8 decode a single rune from a sequence of chars.
+/**
+ UTF8 decode a single rune from a sequence of chars.
rune: Pointer to an int to assign the decoded 'rune' to.
@@ -103,8 +194,8 @@ enum { FZ_UTFMAX = 4 };
*/
int fz_chartorune(int *rune, const char *str);
-/*
- fz_runetochar: UTF8 encode a rune to a sequence of chars.
+/**
+ UTF8 encode a rune to a sequence of chars.
str: Pointer to a place to put the UTF8 encoded character.
@@ -114,17 +205,42 @@ int fz_chartorune(int *rune, const char *str);
*/
int fz_runetochar(char *str, int rune);
-/*
- fz_runelen: Count how many chars are required to represent a rune.
+/**
+ Count how many chars are required to represent a rune.
rune: The rune to encode.
- Returns the number of bytes required to represent this run in UTF8.
+ Returns the number of bytes required to represent this run in
+ UTF8.
*/
int fz_runelen(int rune);
-/*
- fz_utflen: Count how many runes the UTF-8 encoded string
+/**
+ Compute the index of a rune in a string.
+
+ str: Pointer to beginning of a string.
+
+ p: Pointer to a char in str.
+
+ Returns the index of the rune pointed to by p in str.
+*/
+int fz_runeidx(const char *str, const char *p);
+
+/**
+ Obtain a pointer to the char representing the rune
+ at a given index.
+
+ str: Pointer to beginning of a string.
+
+ idx: Index of a rune to return a char pointer to.
+
+ Returns a pointer to the char where the desired rune starts,
+ or NULL if the string ends before the index is reached.
+*/
+const char *fz_runeptr(const char *str, int idx);
+
+/**
+ Count how many runes the UTF-8 encoded string
consists of.
s: The UTF-8 encoded, NUL-terminated text string.
@@ -134,33 +250,37 @@ int fz_runelen(int rune);
int fz_utflen(const char *s);
/*
- fz_strtof: Locale-independent decimal to binary
- conversion. On overflow return (-)INFINITY and set errno to ERANGE. On
- underflow return 0 and set errno to ERANGE. Special inputs (case
- insensitive): "NAN", "INF" or "INFINITY".
+ Convert a wchar string into a new heap allocated utf8 one.
+*/
+char *fz_utf8_from_wchar(fz_context *ctx, const wchar_t *s);
+
+/*
+ Convert a utf8 string into a new heap allocated wchar one.
+*/
+wchar_t *fz_wchar_from_utf8(fz_context *ctx, const char *path);
+
+
+/**
+ Locale-independent decimal to binary conversion. On overflow
+ return (-)INFINITY and set errno to ERANGE. On underflow return
+ 0 and set errno to ERANGE. Special inputs (case insensitive):
+ "NAN", "INF" or "INFINITY".
*/
float fz_strtof(const char *s, char **es);
-/*
- fz_strtof_no_exp: Like fz_strtof, but does not recognize exponent
- format. So fz_strtof_no_exp("1.5e20", &tail) will return 1.5 and tail
- will point to "e20".
-*/
-
-float fz_strtof_no_exp(const char *string, char **tailptr);
-/*
- fz_grisu: Compute decimal integer m, exp such that:
- f = m * 10^exp
- m is as short as possible without losing exactness
- Assumes special cases (0, NaN, +Inf, -Inf) have been handled.
-*/
int fz_grisu(float f, char *s, int *exp);
-/*
+/**
Check and parse string into page ranges:
- ( ','? ([0-9]+|'N') ( '-' ([0-9]+|N) )? )+
+ /,?(-?\d+|N)(-(-?\d+|N))?/
*/
int fz_is_page_range(fz_context *ctx, const char *s);
const char *fz_parse_page_range(fz_context *ctx, const char *s, int *a, int *b, int n);
+/**
+ Unicode aware tolower and toupper functions.
+*/
+int fz_tolower(int c);
+int fz_toupper(int c);
+
#endif
diff --git a/include/mupdf/fitz/structured-text.h b/include/mupdf/fitz/structured-text.h
index b062813..ae108b4 100644
--- a/include/mupdf/fitz/structured-text.h
+++ b/include/mupdf/fitz/structured-text.h
@@ -1,59 +1,151 @@
+// Copyright (C) 2004-2021 Artifex Software, Inc.
+//
+// This file is part of MuPDF.
+//
+// MuPDF is free software: you can redistribute it and/or modify it under the
+// terms of the GNU Affero General Public License as published by the Free
+// Software Foundation, either version 3 of the License, or (at your option)
+// any later version.
+//
+// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with MuPDF. If not, see
+//
+// Alternative licensing terms are available from the licensor.
+// For commercial licensing, see or contact
+// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
+// CA 94129, USA, for further information.
+
#ifndef MUPDF_FITZ_STRUCTURED_TEXT_H
#define MUPDF_FITZ_STRUCTURED_TEXT_H
#include "mupdf/fitz/system.h"
+#include "mupdf/fitz/types.h"
#include "mupdf/fitz/context.h"
#include "mupdf/fitz/geometry.h"
#include "mupdf/fitz/font.h"
-#include "mupdf/fitz/colorspace.h"
#include "mupdf/fitz/image.h"
#include "mupdf/fitz/output.h"
#include "mupdf/fitz/device.h"
+#include "mupdf/fitz/pool.h"
-/*
+/**
+ Simple text layout (for use with annotation editing primarily).
+*/
+typedef struct fz_layout_char
+{
+ float x, advance;
+ const char *p; /* location in source text of character */
+ struct fz_layout_char *next;
+} fz_layout_char;
+
+typedef struct fz_layout_line
+{
+ float x, y, font_size;
+ const char *p; /* location in source text of start of line */
+ fz_layout_char *text;
+ struct fz_layout_line *next;
+} fz_layout_line;
+
+typedef struct
+{
+ fz_pool *pool;
+ fz_matrix matrix;
+ fz_matrix inv_matrix;
+ fz_layout_line *head, **tailp;
+ fz_layout_char **text_tailp;
+} fz_layout_block;
+
+/**
+ Create a new layout block, with new allocation pool, zero
+ matrices, and initialise linked pointers.
+*/
+fz_layout_block *fz_new_layout(fz_context *ctx);
+
+/**
+ Drop layout block. Free the pool, and linked blocks.
+
+ Never throws exceptions.
+*/
+void fz_drop_layout(fz_context *ctx, fz_layout_block *block);
+
+/**
+ Add a new line to the end of the layout block.
+*/
+void fz_add_layout_line(fz_context *ctx, fz_layout_block *block, float x, float y, float h, const char *p);
+
+/**
+ Add a new char to the line at the end of the layout block.
+*/
+void fz_add_layout_char(fz_context *ctx, fz_layout_block *block, float x, float w, const char *p);
+
+/**
Text extraction device: Used for searching, format conversion etc.
(In development - Subject to change in future versions)
*/
-typedef struct fz_stext_char_s fz_stext_char;
-typedef struct fz_stext_line_s fz_stext_line;
-typedef struct fz_stext_block_s fz_stext_block;
-typedef struct fz_stext_page_s fz_stext_page;
+typedef struct fz_stext_char fz_stext_char;
+typedef struct fz_stext_line fz_stext_line;
+typedef struct fz_stext_block fz_stext_block;
-/*
- FZ_STEXT_PRESERVE_LIGATURES: If this option is activated ligatures
- are passed through to the application in their original form. If
- this option is deactivated ligatures are expanded into their
- constituent parts, e.g. the ligature ffi is expanded into three
- separate characters f, f and i.
+/**
+ FZ_STEXT_PRESERVE_LIGATURES: If this option is activated
+ ligatures are passed through to the application in their
+ original form. If this option is deactivated ligatures are
+ expanded into their constituent parts, e.g. the ligature ffi is
+ expanded into three separate characters f, f and i.
- FZ_STEXT_PRESERVE_WHITESPACE: If this option is activated whitespace
- is passed through to the application in its original form. If this
- option is deactivated any type of horizontal whitespace (including
- horizontal tabs) will be replaced with space characters of variable
- width.
+ FZ_STEXT_PRESERVE_WHITESPACE: If this option is activated
+ whitespace is passed through to the application in its original
+ form. If this option is deactivated any type of horizontal
+ whitespace (including horizontal tabs) will be replaced with
+ space characters of variable width.
- FZ_STEXT_PRESERVE_IMAGES: If this option is set, then images will
- be stored in the structured text structure. The default is to ignore
- all images.
+ FZ_STEXT_PRESERVE_IMAGES: If this option is set, then images
+ will be stored in the structured text structure. The default is
+ to ignore all images.
+
+ FZ_STEXT_INHIBIT_SPACES: If this option is set, we will not try
+ to add missing space characters where there are large gaps
+ between characters.
+
+ FZ_STEXT_DEHYPHENATE: If this option is set, hyphens at the
+ end of a line will be removed and the lines will be merged.
+
+ FZ_STEXT_PRESERVE_SPANS: If this option is set, spans on the same line
+ will not be merged. Each line will thus be a span of text with the same
+ font, colour, and size.
+
+ FZ_STEXT_MEDIABOX_CLIP: If this option is set, characters entirely
+ outside each page's mediabox will be ignored.
*/
enum
{
FZ_STEXT_PRESERVE_LIGATURES = 1,
FZ_STEXT_PRESERVE_WHITESPACE = 2,
FZ_STEXT_PRESERVE_IMAGES = 4,
+ FZ_STEXT_INHIBIT_SPACES = 8,
+ FZ_STEXT_DEHYPHENATE = 16,
+ FZ_STEXT_PRESERVE_SPANS = 32,
+ FZ_STEXT_MEDIABOX_CLIP = 64,
+ FZ_STEXT_USE_CID_FOR_UNKNOWN_UNICODE = 128,
};
-/*
- A text page is a list of blocks, together with an overall bounding box.
+/**
+ A text page is a list of blocks, together with an overall
+ bounding box.
*/
-struct fz_stext_page_s
+typedef struct
{
fz_pool *pool;
fz_rect mediabox;
fz_stext_block *first_block, *last_block;
-};
+} fz_stext_page;
enum
{
@@ -61,10 +153,11 @@ enum
FZ_STEXT_BLOCK_IMAGE = 1
};
-/*
- A text block is a list of lines of text (typically a paragraph), or an image.
+/**
+ A text block is a list of lines of text (typically a paragraph),
+ or an image.
*/
-struct fz_stext_block_s
+struct fz_stext_block
{
int type;
fz_rect bbox;
@@ -75,10 +168,10 @@ struct fz_stext_block_s
fz_stext_block *prev, *next;
};
-/*
+/**
A text line is a list of characters that share a common baseline.
*/
-struct fz_stext_line_s
+struct fz_stext_line
{
int wmode; /* 0 for horizontal, 1 for vertical */
fz_point dir; /* normalized direction of baseline */
@@ -87,111 +180,186 @@ struct fz_stext_line_s
fz_stext_line *prev, *next;
};
-/*
- A text char is a unicode character, the style in which is appears, and
- the point at which it is positioned.
+/**
+ A text char is a unicode character, the style in which is
+ appears, and the point at which it is positioned.
*/
-struct fz_stext_char_s
+struct fz_stext_char
{
- int c;
+ int c; /* unicode character value */
+ int bidi; /* even for LTR, odd for RTL */
+ int color; /* sRGB hex color */
fz_point origin;
- fz_rect bbox;
+ fz_quad quad;
float size;
fz_font *font;
fz_stext_char *next;
};
-extern const char *fz_stext_options_usage;
+FZ_DATA extern const char *fz_stext_options_usage;
-int fz_stext_char_count(fz_context *ctx, fz_stext_page *page);
-const fz_stext_char *fz_stext_char_at(fz_context *ctx, fz_stext_page *page, int idx);
+/**
+ Create an empty text page.
-/*
- fz_new_stext_page: Create an empty text page.
-
- The text page is filled out by the text device to contain the blocks
- and lines of text on the page.
+ The text page is filled out by the text device to contain the
+ blocks and lines of text on the page.
mediabox: optional mediabox information.
*/
-fz_stext_page *fz_new_stext_page(fz_context *ctx, const fz_rect *mediabox);
+fz_stext_page *fz_new_stext_page(fz_context *ctx, fz_rect mediabox);
void fz_drop_stext_page(fz_context *ctx, fz_stext_page *page);
-/*
- fz_print_stext_page_as_html: Output a page to a file in HTML (visual) format.
+/**
+ Output structured text to a file in HTML (visual) format.
*/
-void fz_print_stext_page_as_html(fz_context *ctx, fz_output *out, fz_stext_page *page);
+void fz_print_stext_page_as_html(fz_context *ctx, fz_output *out, fz_stext_page *page, int id);
void fz_print_stext_header_as_html(fz_context *ctx, fz_output *out);
void fz_print_stext_trailer_as_html(fz_context *ctx, fz_output *out);
-/*
- fz_print_stext_page_as_xhtml: Output a page to a file in XHTML (semantic) format.
+/**
+ Output structured text to a file in XHTML (semantic) format.
*/
-void fz_print_stext_page_as_xhtml(fz_context *ctx, fz_output *out, fz_stext_page *page);
+void fz_print_stext_page_as_xhtml(fz_context *ctx, fz_output *out, fz_stext_page *page, int id);
void fz_print_stext_header_as_xhtml(fz_context *ctx, fz_output *out);
void fz_print_stext_trailer_as_xhtml(fz_context *ctx, fz_output *out);
-/*
- fz_print_stext_page_as_xml: Output a page to a file in XML format.
+/**
+ Output structured text to a file in XML format.
*/
-void fz_print_stext_page_as_xml(fz_context *ctx, fz_output *out, fz_stext_page *page);
+void fz_print_stext_page_as_xml(fz_context *ctx, fz_output *out, fz_stext_page *page, int id);
-/*
- fz_print_stext_page_as_text: Output a page to a file in UTF-8 format.
+/**
+ Output structured text to a file in JSON format.
+*/
+void fz_print_stext_page_as_json(fz_context *ctx, fz_output *out, fz_stext_page *page, float scale);
+
+/**
+ Output structured text to a file in plain-text UTF-8 format.
*/
void fz_print_stext_page_as_text(fz_context *ctx, fz_output *out, fz_stext_page *page);
-/*
- fz_search_stext_page: Search for occurrence of 'needle' in text page.
+/**
+ Search for occurrence of 'needle' in text page.
- Return the number of hits and store hit bboxes in the passed in array.
+ Return the number of hits and store hit quads in the passed in
+ array.
- NOTE: This is an experimental interface and subject to change without notice.
+ NOTE: This is an experimental interface and subject to change
+ without notice.
*/
-int fz_search_stext_page(fz_context *ctx, fz_stext_page *text, const char *needle, fz_rect *hit_bbox, int hit_max);
+int fz_search_stext_page(fz_context *ctx, fz_stext_page *text, const char *needle, int *hit_mark, fz_quad *hit_bbox, int hit_max);
-/*
- fz_highlight_selection: Return a list of rectangles to highlight lines inside the selection points.
+/**
+ Return a list of quads to highlight lines inside the selection
+ points.
*/
-int fz_highlight_selection(fz_context *ctx, fz_stext_page *page, fz_point a, fz_point b, fz_rect *hit_bbox, int hit_max);
+int fz_highlight_selection(fz_context *ctx, fz_stext_page *page, fz_point a, fz_point b, fz_quad *quads, int max_quads);
-/*
- fz_copy_selection: Return a newly allocated UTF-8 string with the text for a given selection.
+enum
+{
+ FZ_SELECT_CHARS,
+ FZ_SELECT_WORDS,
+ FZ_SELECT_LINES,
+};
- crlf: If true, write "\r\n" style line endings (otherwise "\n" only).
+fz_quad fz_snap_selection(fz_context *ctx, fz_stext_page *page, fz_point *ap, fz_point *bp, int mode);
+
+/**
+ Return a newly allocated UTF-8 string with the text for a given
+ selection.
+
+ crlf: If true, write "\r\n" style line endings (otherwise "\n"
+ only).
*/
char *fz_copy_selection(fz_context *ctx, fz_stext_page *page, fz_point a, fz_point b, int crlf);
-/*
- struct fz_stext_options: Options for creating a pixmap and draw device.
-*/
-typedef struct fz_stext_options_s fz_stext_options;
+/**
+ Return a newly allocated UTF-8 string with the text for a given
+ selection rectangle.
-struct fz_stext_options_s
+ crlf: If true, write "\r\n" style line endings (otherwise "\n"
+ only).
+*/
+char *fz_copy_rectangle(fz_context *ctx, fz_stext_page *page, fz_rect area, int crlf);
+
+/**
+ Options for creating structured text.
+*/
+typedef struct
{
int flags;
-};
+ float scale;
+} fz_stext_options;
-/*
- fz_parse_stext_options: Parse stext device options from a comma separated key-value string.
+/**
+ Parse stext device options from a comma separated key-value
+ string.
*/
fz_stext_options *fz_parse_stext_options(fz_context *ctx, fz_stext_options *opts, const char *string);
-/*
- fz_new_stext_device: Create a device to extract the text on a page.
+/**
+ Create a device to extract the text on a page.
Gather the text on a page into blocks and lines.
- The reading order is taken from the order the text is drawn in the
- source file, so may not be accurate.
+ The reading order is taken from the order the text is drawn in
+ the source file, so may not be accurate.
page: The text page to which content should be added. This will
usually be a newly created (empty) text page, but it can be one
- containing data already (for example when merging multiple pages,
- or watermarking).
+ containing data already (for example when merging multiple
+ pages, or watermarking).
options: Options to configure the stext device.
*/
fz_device *fz_new_stext_device(fz_context *ctx, fz_stext_page *page, const fz_stext_options *options);
+/**
+ Create a device to OCR the text on the page.
+
+ Renders the page internally to a bitmap that is then OCRd. Text
+ is then forwarded onto the target device.
+
+ target: The target device to receive the OCRd text.
+
+ ctm: The transform to apply to the mediabox to get the size for
+ the rendered page image. Also used to calculate the resolution
+ for the page image. In general, this will be the same as the CTM
+ that you pass to fz_run_page (or fz_run_display_list) to feed
+ this device.
+
+ mediabox: The mediabox (in points). Combined with the CTM to get
+ the bounds of the pixmap used internally for the rendered page
+ image.
+
+ with_list: If with_list is false, then all non-text operations
+ are forwarded instantly to the target device. This results in
+ the target device seeing all NON-text operations, followed by
+ all the text operations (derived from OCR).
+
+ If with_list is true, then all the marking operations are
+ collated into a display list which is then replayed to the
+ target device at the end.
+
+ language: NULL (for "eng"), or a pointer to a string to describe
+ the languages/scripts that should be used for OCR (e.g.
+ "eng,ara").
+
+ datadir: NULL (for ""), or a pointer to a path string otherwise
+ provided to Tesseract in the TESSDATA_PREFIX environment variable.
+
+ progress: NULL, or function to be called periodically to indicate
+ progress. Return 0 to continue, or 1 to cancel. progress_arg is
+ returned as the void *. The int is a value between 0 and 100 to
+ indicate progress.
+
+ progress_arg: A void * value to be parrotted back to the progress
+ function.
+*/
+fz_device *fz_new_ocr_device(fz_context *ctx, fz_device *target, fz_matrix ctm, fz_rect mediabox, int with_list, const char *language,
+ const char *datadir, int (*progress)(fz_context *, void *, int), void *progress_arg);
+
+fz_document *fz_open_reflowed_document(fz_context *ctx, fz_document *underdoc, const fz_stext_options *opts);
+
+
#endif
diff --git a/include/mupdf/fitz/system.h b/include/mupdf/fitz/system.h
index c480bd7..6ca13ac 100644
--- a/include/mupdf/fitz/system.h
+++ b/include/mupdf/fitz/system.h
@@ -1,3 +1,25 @@
+// Copyright (C) 2004-2022 Artifex Software, Inc.
+//
+// This file is part of MuPDF.
+//
+// MuPDF is free software: you can redistribute it and/or modify it under the
+// terms of the GNU Affero General Public License as published by the Free
+// Software Foundation, either version 3 of the License, or (at your option)
+// any later version.
+//
+// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with MuPDF. If not, see
+//
+// Alternative licensing terms are available from the licensor.
+// For commercial licensing, see or contact
+// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
+// CA 94129, USA, for further information.
+
#ifndef MUPDF_FITZ_SYSTEM_H
#define MUPDF_FITZ_SYSTEM_H
@@ -8,13 +30,16 @@
#endif
#endif
-/*
+/**
Include the standard libc headers.
*/
#include /* needed for size_t */
#include /* needed for va_list vararg functions */
#include /* needed for the try/catch macros */
+#include /* useful for debug printfs */
+
+#include "export.h"
#if defined(_MSC_VER) && (_MSC_VER < 1700) /* MSVC older than VS2012 */
typedef signed char int8_t;
@@ -43,7 +68,7 @@ typedef unsigned __int64 uint64_t;
#define FZ_SQRT2 1.41421356f
#define FZ_LN2 0.69314718f
-/*
+/**
Spot architectures where we have optimisations.
*/
@@ -53,32 +78,43 @@ typedef unsigned __int64 uint64_t;
#endif
#endif
-/*
+/**
Some differences in libc can be smoothed over
*/
-#ifdef __APPLE__
-#define HAVE_SIGSETJMP
-#elif defined(__unix) && !defined(__NACL__)
-#define HAVE_SIGSETJMP
+#ifndef __STRICT_ANSI__
+#if defined(__APPLE__)
+#ifndef HAVE_SIGSETJMP
+#define HAVE_SIGSETJMP 1
+#endif
+#elif defined(__unix)
+#ifndef __EMSCRIPTEN__
+#ifndef HAVE_SIGSETJMP
+#define HAVE_SIGSETJMP 1
+#endif
+#endif
+#endif
+#endif
+#ifndef HAVE_SIGSETJMP
+#define HAVE_SIGSETJMP 0
#endif
-/*
- Where possible (i.e. on platforms on which they are provided), use
- sigsetjmp/siglongjmp in preference to setjmp/longjmp. We don't alter
- signal handlers within mupdf, so there is no need for us to
- store/restore them - hence we use the non-restoring variants. This
- makes a large speed difference on MacOSX (and probably other
- platforms too.
+/**
+ Where possible (i.e. on platforms on which they are provided),
+ use sigsetjmp/siglongjmp in preference to setjmp/longjmp. We
+ don't alter signal handlers within mupdf, so there is no need
+ for us to store/restore them - hence we use the non-restoring
+ variants. This makes a large speed difference on MacOSX (and
+ probably other platforms too.
*/
-#ifdef HAVE_SIGSETJMP
+#if HAVE_SIGSETJMP
#define fz_setjmp(BUF) sigsetjmp(BUF, 0)
#define fz_longjmp(BUF,VAL) siglongjmp(BUF, VAL)
-#define fz_jmp_buf sigjmp_buf
+typedef sigjmp_buf fz_jmp_buf;
#else
#define fz_setjmp(BUF) setjmp(BUF)
#define fz_longjmp(BUF,VAL) longjmp(BUF,VAL)
-#define fz_jmp_buf jmp_buf
+typedef jmp_buf fz_jmp_buf;
#endif
/* these constants mirror the corresponding macros in stdio.h */
@@ -120,16 +156,15 @@ static __inline int signbit(double x)
#define isinf(x) (!_finite(x))
#endif
+#if _MSC_VER <= 1920 /* MSVC 2019 */
#define hypotf _hypotf
+#endif
#define atoll _atoi64
#endif
#ifdef _WIN32
-char *fz_utf8_from_wchar(const wchar_t *s);
-wchar_t *fz_wchar_from_utf8(const char *s);
-
/* really a FILE* but we don't want to include stdio.h here */
void *fz_fopen_utf8(const char *name, const char *mode);
int fz_remove_utf8(const char *name);
@@ -144,27 +179,41 @@ void fz_free_argv(int argc, char **argv);
#define S_ISDIR(mode) ((mode) & S_IFDIR)
#endif
-/* inline is standard in C++. For some compilers we can enable it within C too. */
+int64_t fz_stat_ctime(const char *path);
+int64_t fz_stat_mtime(const char *path);
+int fz_mkdir(char *path);
+
+
+/* inline is standard in C++. For some compilers we can enable it within
+ * C too. Some compilers think they know better than we do about when
+ * to actually honour inline (particularly for large functions); use
+ * fz_forceinline to kick them into really inlining. */
#ifndef __cplusplus
-#if __STDC_VERSION__ == 199901L /* C99 */
-#elif _MSC_VER >= 1500 /* MSVC 9 or newer */
+#if defined (__STDC_VERSION_) && (__STDC_VERSION__ >= 199901L) /* C99 */
+#elif defined(_MSC_VER) && (_MSC_VER >= 1500) /* MSVC 9 or newer */
#define inline __inline
-#elif __GNUC__ >= 3 /* GCC 3 or newer */
+#define fz_forceinline __forceinline
+#elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC 3 or newer */
#define inline __inline
#else /* Unknown or ancient */
#define inline
#endif
#endif
+#ifndef fz_forceinline
+#define fz_forceinline inline
+#endif
+
/* restrict is standard in C99, but not in all C++ compilers. */
-#if __STDC_VERSION__ == 199901L /* C99 */
-#elif _MSC_VER >= 1600 /* MSVC 10 or newer */
-#define restrict __restrict
-#elif __GNUC__ >= 3 /* GCC 3 or newer */
-#define restrict __restrict
+#if defined (__STDC_VERSION_) && (__STDC_VERSION__ >= 199901L) /* C99 */
+#define FZ_RESTRICT restrict
+#elif defined(_MSC_VER) && (_MSC_VER >= 1600) /* MSVC 10 or newer */
+#define FZ_RESTRICT __restrict
+#elif defined(__GNUC__) && (__GNUC__ >= 3) /* GCC 3 or newer */
+#define FZ_RESTRICT __restrict
#else /* Unknown or ancient */
-#define restrict
+#define FZ_RESTRICT
#endif
/* noreturn is a GCC extension */
@@ -178,8 +227,9 @@ void fz_free_argv(int argc, char **argv);
#endif
#endif
-/* Flag unused parameters, for use with 'static inline' functions in headers. */
-#if __GNUC__ > 2 || __GNUC__ == 2 && __GNUC_MINOR__ >= 7
+/* Flag unused parameters, for use with 'static inline' functions in
+ * headers. */
+#if defined(__GNUC__) && (__GNUC__ > 2 || __GNUC__ == 2 && __GNUC_MINOR__ >= 7)
#define FZ_UNUSED __attribute__((__unused__))
#else
#define FZ_UNUSED
@@ -189,7 +239,7 @@ void fz_free_argv(int argc, char **argv);
#ifdef __printflike
#define FZ_PRINTFLIKE(F,V) __printflike(F,V)
#else
-#if __GNUC__ > 2 || __GNUC__ == 2 && __GNUC_MINOR__ >= 7
+#if defined(__GNUC__) && (__GNUC__ > 2 || __GNUC__ == 2 && __GNUC_MINOR__ >= 7)
#define FZ_PRINTFLIKE(F,V) __attribute__((__format__ (__printf__, F, V)))
#else
#define FZ_PRINTFLIKE(F,V)
@@ -202,8 +252,8 @@ void fz_free_argv(int argc, char **argv);
/* If we're compiling as thumb code, then we need to tell the compiler
* to enter and exit ARM mode around our assembly sections. If we move
- * the ARM functions to a separate file and arrange for it to be compiled
- * without thumb mode, we can save some time on entry.
+ * the ARM functions to a separate file and arrange for it to be
+ * compiled without thumb mode, we can save some time on entry.
*/
/* This is slightly suboptimal; __thumb__ and __thumb2__ become defined
* and undefined by #pragma arm/#pragma thumb - but we can't define a
@@ -218,10 +268,47 @@ void fz_free_argv(int argc, char **argv);
#endif
+/* Memory block alignment */
+
+/* Most architectures are happy with blocks being aligned to the size
+ * of void *'s. Some (notably sparc) are not.
+ *
+ * Some architectures (notably amd64) are happy for pointers to be 32bit
+ * aligned even on 64bit systems. By making use of this we can save lots
+ * of memory in data structures (notably the display list).
+ *
+ * We attempt to cope with these vagaries via the following definitions.
+ */
+
+/* All blocks allocated by mupdf's allocators are expected to be
+ * returned aligned to FZ_MEMORY_BLOCK_ALIGN_MOD. This is sizeof(void *)
+ * unless overwritten by a predefinition, or by a specific architecture
+ * being detected. */
+#ifndef FZ_MEMORY_BLOCK_ALIGN_MOD
+#if defined(sparc) || defined(__sparc) || defined(__sparc__)
+#define FZ_MEMORY_BLOCK_ALIGN_MOD 8
+#else
+#define FZ_MEMORY_BLOCK_ALIGN_MOD sizeof(void *)
+#endif
+#endif
+
+/* MuPDF will ensure that its use of pointers in packed structures
+ * (such as the display list) will be aligned to FZ_POINTER_ALIGN_MOD.
+ * This is the same as FZ_MEMORY_BLOCK_ALIGN_MOD unless overridden by
+ * a predefinition, or by a specific architecture being detected. */
+#ifndef FZ_POINTER_ALIGN_MOD
+#if defined(__amd64) || defined(__amd64__) || defined(__x86_64) || defined(__x86_64__)
+#define FZ_POINTER_ALIGN_MOD 4
+#else
+#define FZ_POINTER_ALIGN_MOD FZ_MEMORY_BLOCK_ALIGN_MOD
+#endif
+#endif
+
#ifdef CLUSTER
-/* Include this first so our defines don't clash with the system definitions */
+/* Include this first so our defines don't clash with the system
+ * definitions */
#include
-/*
+/**
* Trig functions
*/
static float
@@ -321,6 +408,10 @@ static inline float my_sinf(float x)
x -= xn;
xn *= x2 / 72.0f;
x += xn;
+ if (x > 1)
+ x = 1;
+ else if (x < -1)
+ x = -1;
return x;
}
@@ -360,4 +451,9 @@ static inline float my_atan2f(float o, float a)
#define atan2f(x,y) my_atan2f((x),(y))
#endif
+static inline int fz_is_pow2(int a)
+{
+ return (a != 0) && (a & (a-1)) == 0;
+}
+
#endif
diff --git a/include/mupdf/fitz/text.h b/include/mupdf/fitz/text.h
index d7d7ffb..d7562f9 100644
--- a/include/mupdf/fitz/text.h
+++ b/include/mupdf/fitz/text.h
@@ -1,3 +1,25 @@
+// Copyright (C) 2004-2021 Artifex Software, Inc.
+//
+// This file is part of MuPDF.
+//
+// MuPDF is free software: you can redistribute it and/or modify it under the
+// terms of the GNU Affero General Public License as published by the Free
+// Software Foundation, either version 3 of the License, or (at your option)
+// any later version.
+//
+// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with MuPDF. If not, see
+//
+// Alternative licensing terms are available from the licensor.
+// For commercial licensing, see or contact
+// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
+// CA 94129, USA, for further information.
+
#ifndef MUPDF_FITZ_TEXT_H
#define MUPDF_FITZ_TEXT_H
@@ -7,7 +29,7 @@
#include "mupdf/fitz/path.h"
#include "mupdf/fitz/bidi.h"
-/*
+/**
Text buffer.
The trm field contains the a, b, c and d coefficients.
@@ -19,21 +41,18 @@
with indexes into the glyph array.
*/
-typedef struct fz_text_s fz_text;
-typedef struct fz_text_span_s fz_text_span;
-typedef struct fz_text_item_s fz_text_item;
-
-struct fz_text_item_s
+typedef struct
{
float x, y;
int gid; /* -1 for one gid to many ucs mappings */
int ucs; /* -1 for one ucs to many gid mappings */
-};
+ int cid; /* CID for CJK fonts, raw character code for other fonts; or unicode for non-PDF formats. */
+} fz_text_item;
#define FZ_LANG_TAG2(c1,c2) ((c1-'a'+1) + ((c2-'a'+1)*27))
#define FZ_LANG_TAG3(c1,c2,c3) ((c1-'a'+1) + ((c2-'a'+1)*27) + ((c3-'a'+1)*27*27))
-typedef enum fz_text_language_e
+typedef enum
{
FZ_LANG_UNSET = 0,
FZ_LANG_ur = FZ_LANG_TAG2('u','r'),
@@ -45,7 +64,7 @@ typedef enum fz_text_language_e
FZ_LANG_zh_Hant = FZ_LANG_TAG3('z','h','t'),
} fz_text_language;
-struct fz_text_span_s
+typedef struct fz_text_span
{
fz_font *font;
fz_matrix trm;
@@ -55,41 +74,40 @@ struct fz_text_span_s
unsigned language : 15; /* The language as marked in the original document */
int len, cap;
fz_text_item *items;
- fz_text_span *next;
-};
+ struct fz_text_span *next;
+} fz_text_span;
-struct fz_text_s
+typedef struct
{
int refs;
fz_text_span *head, *tail;
-};
+} fz_text;
-/*
- fz_new_text: Create a new empty fz_text object.
+/**
+ Create a new empty fz_text object.
Throws exception on failure to allocate.
*/
fz_text *fz_new_text(fz_context *ctx);
-/*
- fz_keep_text: Add a reference to a fz_text.
+/**
+ Increment the reference count for the text object. The same
+ pointer is returned.
- text: text object to keep a reference to.
-
- Return the same text pointer.
+ Never throws exceptions.
*/
fz_text *fz_keep_text(fz_context *ctx, const fz_text *text);
-/*
- fz_drop_text: Drop a reference to the object, freeing
- if it is the last one.
+/**
+ Decrement the reference count for the text object. When the
+ reference count hits zero, the text object is freed.
- text: Object to drop the reference to.
+ Never throws exceptions.
*/
void fz_drop_text(fz_context *ctx, const fz_text *text);
-/*
- fz_show_glyph: Add a glyph/unicode value to a text object.
+/**
+ Add a glyph/unicode value to a text object.
text: Text object to add to.
@@ -101,6 +119,8 @@ void fz_drop_text(fz_context *ctx, const fz_text *text);
unicode: The unicode character for the glyph.
+ cid: The CJK CID value or raw character code.
+
wmode: 1 for vertical mode, 0 for horizontal.
bidi_level: The bidirectional level for this glyph.
@@ -113,17 +133,17 @@ void fz_drop_text(fz_context *ctx, const fz_text *text);
Throws exception on failure to allocate.
*/
-void fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, const fz_matrix *trm, int glyph, int unicode, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language);
+void fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix trm, int glyph, int unicode, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language);
+void fz_show_glyph_aux(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix trm, int glyph, int unicode, int cid, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language lang);
-/*
- fz_show_string: Add a UTF8 string to a text object.
+/**
+ Add a UTF8 string to a text object.
text: Text object to add to.
font: The font the string should be added in.
- trm: The transform to use. Will be updated according
- to the advance of the string on exit.
+ trm: The transform to use.
s: The utf-8 string to add.
@@ -131,18 +151,27 @@ void fz_show_glyph(fz_context *ctx, fz_text *text, fz_font *font, const fz_matri
bidi_level: The bidirectional level for this glyph.
- markup_dir: The direction of the text as specified in the
- markup.
+ markup_dir: The direction of the text as specified in the markup.
language: The language in use (if known, 0 otherwise)
- (e.g. FZ_LANG_zh_Hans).
+ (e.g. FZ_LANG_zh_Hans).
- Throws exception on failure to allocate.
+ Returns the transform updated with the advance width of the
+ string.
*/
-void fz_show_string(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix *trm, const char *s, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language);
+fz_matrix fz_show_string(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix trm, const char *s, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language);
-/*
- fz_bound_text: Find the bounds of a given text object.
+/**
+ Measure the advance width of a UTF8 string should it be added to a text object.
+
+ This uses the same layout algorithms as fz_show_string, and can be used
+ to calculate text alignment adjustments.
+*/
+fz_matrix
+fz_measure_string(fz_context *ctx, fz_font *user_font, fz_matrix trm, const char *s, int wmode, int bidi_level, fz_bidi_direction markup_dir, fz_text_language language);
+
+/**
+ Find the bounds of a given text object.
text: The text object to find the bounds of.
@@ -156,18 +185,9 @@ void fz_show_string(fz_context *ctx, fz_text *text, fz_font *font, fz_matrix *tr
Returns a pointer to r, which is updated to contain the
bounding box for the text object.
*/
-fz_rect *fz_bound_text(fz_context *ctx, const fz_text *text, const fz_stroke_state *stroke, const fz_matrix *ctm, fz_rect *r);
+fz_rect fz_bound_text(fz_context *ctx, const fz_text *text, const fz_stroke_state *stroke, fz_matrix ctm);
-/*
- fz_clone_text: Clone a text object.
-
- text: The text object to clone.
-
- Throws an exception on allocation failure.
-*/
-fz_text *fz_clone_text(fz_context *ctx, const fz_text *text);
-
-/*
+/**
Convert ISO 639 (639-{1,2,3,5}) language specification
strings losslessly to a 15 bit fz_text_language code.
@@ -178,7 +198,7 @@ fz_text *fz_clone_text(fz_context *ctx, const fz_text *text);
*/
fz_text_language fz_text_language_from_string(const char *str);
-/*
+/**
Recover ISO 639 (639-{1,2,3,5}) language specification
strings losslessly from a 15 bit fz_text_language code.
diff --git a/include/mupdf/fitz/track-usage.h b/include/mupdf/fitz/track-usage.h
index 6c4409f..69e8425 100644
--- a/include/mupdf/fitz/track-usage.h
+++ b/include/mupdf/fitz/track-usage.h
@@ -1,29 +1,51 @@
+// Copyright (C) 2004-2021 Artifex Software, Inc.
+//
+// This file is part of MuPDF.
+//
+// MuPDF is free software: you can redistribute it and/or modify it under the
+// terms of the GNU Affero General Public License as published by the Free
+// Software Foundation, either version 3 of the License, or (at your option)
+// any later version.
+//
+// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with MuPDF. If not, see
+//
+// Alternative licensing terms are available from the licensor.
+// For commercial licensing, see or contact
+// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
+// CA 94129, USA, for further information.
+
#ifndef TRACK_USAGE_H
#define TRACK_USAGE_H
#ifdef TRACK_USAGE
-typedef struct track_usage_data_s {
+typedef struct track_usage_data {
int count;
const char *function;
int line;
const char *desc;
- struct track_usage_data_s *next;
-} track_usage_data_t;
+ struct track_usage_data *next;
+} track_usage_data;
#define TRACK_LABEL(A) \
do { \
- static track_usage_data_t USAGE_DATA = { 0 };\
+ static track_usage_data USAGE_DATA = { 0 };\
track_usage(&USAGE_DATA, __FILE__, __LINE__, A);\
} while (0)
#define TRACK_FN() \
do { \
- static track_usage_data_t USAGE_DATA = { 0 };\
+ static track_usage_data USAGE_DATA = { 0 };\
track_usage(&USAGE_DATA, __FILE__, __LINE__, __FUNCTION__);\
} while (0)
-void track_usage(track_usage_data_t *data, const char *function, int line, const char *desc);
+void track_usage(track_usage_data *data, const char *function, int line, const char *desc);
#else
diff --git a/include/mupdf/fitz/transition.h b/include/mupdf/fitz/transition.h
index 65d170d..89a8087 100644
--- a/include/mupdf/fitz/transition.h
+++ b/include/mupdf/fitz/transition.h
@@ -1,3 +1,25 @@
+// Copyright (C) 2004-2021 Artifex Software, Inc.
+//
+// This file is part of MuPDF.
+//
+// MuPDF is free software: you can redistribute it and/or modify it under the
+// terms of the GNU Affero General Public License as published by the Free
+// Software Foundation, either version 3 of the License, or (at your option)
+// any later version.
+//
+// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with MuPDF. If not, see
+//
+// Alternative licensing terms are available from the licensor.
+// For commercial licensing, see or contact
+// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
+// CA 94129, USA, for further information.
+
#ifndef MUPDF_FITZ_TRANSITION_H
#define MUPDF_FITZ_TRANSITION_H
@@ -5,8 +27,6 @@
#include "mupdf/fitz/pixmap.h"
/* Transition support */
-typedef struct fz_transition_s fz_transition;
-
enum {
FZ_TRANSITION_NONE = 0, /* aka 'R' or 'REPLACE' */
FZ_TRANSITION_SPLIT,
@@ -22,7 +42,7 @@ enum {
FZ_TRANSITION_FADE
};
-struct fz_transition_s
+typedef struct
{
int type;
float duration; /* Effect duration (seconds) */
@@ -36,10 +56,10 @@ struct fz_transition_s
/* State variables for use of the transition code */
int state0;
int state1;
-};
+} fz_transition;
-/*
- fz_generate_transition: Generate a frame of a transition.
+/**
+ Generate a frame of a transition.
tpix: Target pixmap
opix: Old pixmap
@@ -48,6 +68,8 @@ struct fz_transition_s
trans: Transition details
Returns 1 if successfully generated a frame.
+
+ Note: Pixmaps must include alpha.
*/
int fz_generate_transition(fz_context *ctx, fz_pixmap *tpix, fz_pixmap *opix, fz_pixmap *npix, int time, fz_transition *trans);
diff --git a/include/mupdf/fitz/tree.h b/include/mupdf/fitz/tree.h
index 92f45d3..b4d7ac6 100644
--- a/include/mupdf/fitz/tree.h
+++ b/include/mupdf/fitz/tree.h
@@ -1,24 +1,62 @@
+// Copyright (C) 2004-2021 Artifex Software, Inc.
+//
+// This file is part of MuPDF.
+//
+// MuPDF is free software: you can redistribute it and/or modify it under the
+// terms of the GNU Affero General Public License as published by the Free
+// Software Foundation, either version 3 of the License, or (at your option)
+// any later version.
+//
+// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with MuPDF. If not, see
+//
+// Alternative licensing terms are available from the licensor.
+// For commercial licensing, see or contact
+// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
+// CA 94129, USA, for further information.
+
#ifndef MUPDF_FITZ_TREE_H
#define MUPDF_FITZ_TREE_H
#include "mupdf/fitz/system.h"
#include "mupdf/fitz/context.h"
-/*
+/**
AA-tree to look up things by strings.
*/
-typedef struct fz_tree_s fz_tree;
+typedef struct fz_tree fz_tree;
+/**
+ Look for the value of a node in the tree with the given key.
+
+ Simple pointer equivalence is used for key.
+
+ Returns NULL for no match.
+*/
void *fz_tree_lookup(fz_context *ctx, fz_tree *node, const char *key);
-/*
+/**
Insert a new key/value pair and rebalance the tree.
Return the new root of the tree after inserting and rebalancing.
May be called with a NULL root to create a new tree.
+
+ No data is copied into the tree structure; key and value are
+ merely kept as pointers.
*/
fz_tree *fz_tree_insert(fz_context *ctx, fz_tree *root, const char *key, void *value);
+/**
+ Drop the tree.
+
+ The storage used by the tree is freed, and each value has
+ dropfunc called on it.
+*/
void fz_drop_tree(fz_context *ctx, fz_tree *node, void (*dropfunc)(fz_context *ctx, void *value));
#endif
diff --git a/include/mupdf/fitz/types.h b/include/mupdf/fitz/types.h
new file mode 100644
index 0000000..1299d2a
--- /dev/null
+++ b/include/mupdf/fitz/types.h
@@ -0,0 +1,41 @@
+// Copyright (C) 2021 Artifex Software, Inc.
+//
+// This file is part of MuPDF.
+//
+// MuPDF is free software: you can redistribute it and/or modify it under the
+// terms of the GNU Affero General Public License as published by the Free
+// Software Foundation, either version 3 of the License, or (at your option)
+// any later version.
+//
+// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with MuPDF. If not, see
+//
+// Alternative licensing terms are available from the licensor.
+// For commercial licensing, see or contact
+// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
+// CA 94129, USA, for further information.
+
+#ifndef MUPDF_FITZ_TYPES_H
+#define MUPDF_FITZ_TYPES_H
+
+typedef struct fz_document fz_document;
+
+/**
+ Locations within the document are referred to in terms of
+ chapter and page, rather than just a page number. For some
+ documents (such as epub documents with large numbers of pages
+ broken into many chapters) this can make navigation much faster
+ as only the required chapter needs to be decoded at a time.
+*/
+typedef struct
+{
+ int chapter;
+ int page;
+} fz_location;
+
+#endif
diff --git a/include/mupdf/fitz/util.h b/include/mupdf/fitz/util.h
index 49409cb..0048508 100644
--- a/include/mupdf/fitz/util.h
+++ b/include/mupdf/fitz/util.h
@@ -1,3 +1,25 @@
+// Copyright (C) 2004-2022 Artifex Software, Inc.
+//
+// This file is part of MuPDF.
+//
+// MuPDF is free software: you can redistribute it and/or modify it under the
+// terms of the GNU Affero General Public License as published by the Free
+// Software Foundation, either version 3 of the License, or (at your option)
+// any later version.
+//
+// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with MuPDF. If not, see
+//
+// Alternative licensing terms are available from the licensor.
+// For commercial licensing, see or contact
+// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
+// CA 94129, USA, for further information.
+
#ifndef MUPDF_FITZ_UTIL_H
#define MUPDF_FITZ_UTIL_H
@@ -8,70 +30,122 @@
#include "mupdf/fitz/pixmap.h"
#include "mupdf/fitz/structured-text.h"
#include "mupdf/fitz/buffer.h"
+#include "mupdf/fitz/xml.h"
+#include "mupdf/fitz/archive.h"
+#include "mupdf/fitz/display-list.h"
-/*
- fz_new_display_list_from_page: Create a display list with the contents of a page.
+/**
+ Create a display list.
+
+ Ownership of the display list is returned to the caller.
*/
fz_display_list *fz_new_display_list_from_page(fz_context *ctx, fz_page *page);
fz_display_list *fz_new_display_list_from_page_number(fz_context *ctx, fz_document *doc, int number);
+
+/**
+ Create a display list from page contents (no annotations).
+
+ Ownership of the display list is returned to the caller.
+*/
fz_display_list *fz_new_display_list_from_page_contents(fz_context *ctx, fz_page *page);
-fz_display_list *fz_new_display_list_from_annot(fz_context *ctx, fz_annot *annot);
-/*
- fz_new_pixmap_from_page: Render the page to a pixmap using the transform and colorspace.
+/**
+ Render the page to a pixmap using the transform and colorspace.
+
+ Ownership of the pixmap is returned to the caller.
*/
-fz_pixmap *fz_new_pixmap_from_display_list(fz_context *ctx, fz_display_list *list, const fz_matrix *ctm, fz_colorspace *cs, int alpha);
-fz_pixmap *fz_new_pixmap_from_page(fz_context *ctx, fz_page *page, const fz_matrix *ctm, fz_colorspace *cs, int alpha);
-fz_pixmap *fz_new_pixmap_from_page_number(fz_context *ctx, fz_document *doc, int number, const fz_matrix *ctm, fz_colorspace *cs, int alpha);
+fz_pixmap *fz_new_pixmap_from_display_list(fz_context *ctx, fz_display_list *list, fz_matrix ctm, fz_colorspace *cs, int alpha);
+fz_pixmap *fz_new_pixmap_from_page(fz_context *ctx, fz_page *page, fz_matrix ctm, fz_colorspace *cs, int alpha);
+fz_pixmap *fz_new_pixmap_from_page_number(fz_context *ctx, fz_document *doc, int number, fz_matrix ctm, fz_colorspace *cs, int alpha);
-/*
- fz_new_pixmap_from_page_contents: Render the page contents without annotations.
+/**
+ Render the page contents without annotations.
+
+ Ownership of the pixmap is returned to the caller.
*/
-fz_pixmap *fz_new_pixmap_from_page_contents(fz_context *ctx, fz_page *page, const fz_matrix *ctm, fz_colorspace *cs, int alpha);
+fz_pixmap *fz_new_pixmap_from_page_contents(fz_context *ctx, fz_page *page, fz_matrix ctm, fz_colorspace *cs, int alpha);
-/*
- fz_new_pixmap_from_annot: Render an annotation suitable for blending on top of the opaque
- pixmap returned by fz_new_pixmap_from_page_contents.
+/**
+ Render the page contents with control over spot colors.
+
+ Ownership of the pixmap is returned to the caller.
*/
-fz_pixmap *fz_new_pixmap_from_annot(fz_context *ctx, fz_annot *annot, const fz_matrix *ctm, fz_colorspace *cs, int alpha);
+fz_pixmap *fz_new_pixmap_from_display_list_with_separations(fz_context *ctx, fz_display_list *list, fz_matrix ctm, fz_colorspace *cs, fz_separations *seps, int alpha);
+fz_pixmap *fz_new_pixmap_from_page_with_separations(fz_context *ctx, fz_page *page, fz_matrix ctm, fz_colorspace *cs, fz_separations *seps, int alpha);
+fz_pixmap *fz_new_pixmap_from_page_number_with_separations(fz_context *ctx, fz_document *doc, int number, fz_matrix ctm, fz_colorspace *cs, fz_separations *seps, int alpha);
+fz_pixmap *fz_new_pixmap_from_page_contents_with_separations(fz_context *ctx, fz_page *page, fz_matrix ctm, fz_colorspace *cs, fz_separations *seps, int alpha);
-/*
- fz_new_stext_page_from_page: Extract structured text from a page.
+fz_pixmap *fz_fill_pixmap_from_display_list(fz_context *ctx, fz_display_list *list, fz_matrix ctm, fz_pixmap *pix);
+
+/**
+ Extract text from page.
+
+ Ownership of the fz_stext_page is returned to the caller.
*/
fz_stext_page *fz_new_stext_page_from_page(fz_context *ctx, fz_page *page, const fz_stext_options *options);
fz_stext_page *fz_new_stext_page_from_page_number(fz_context *ctx, fz_document *doc, int number, const fz_stext_options *options);
+fz_stext_page *fz_new_stext_page_from_chapter_page_number(fz_context *ctx, fz_document *doc, int chapter, int number, const fz_stext_options *options);
fz_stext_page *fz_new_stext_page_from_display_list(fz_context *ctx, fz_display_list *list, const fz_stext_options *options);
-/*
- fz_new_buffer_from_stext_page: Convert structured text into plain text.
+/**
+ Convert structured text into plain text.
*/
fz_buffer *fz_new_buffer_from_stext_page(fz_context *ctx, fz_stext_page *text);
fz_buffer *fz_new_buffer_from_page(fz_context *ctx, fz_page *page, const fz_stext_options *options);
fz_buffer *fz_new_buffer_from_page_number(fz_context *ctx, fz_document *doc, int number, const fz_stext_options *options);
fz_buffer *fz_new_buffer_from_display_list(fz_context *ctx, fz_display_list *list, const fz_stext_options *options);
-/*
- fz_search_page: Search for the 'needle' text on the page.
- Record the hits in the hit_bbox array and return the number of hits.
- Will stop looking once it has filled hit_max rectangles.
+/**
+ Search for the 'needle' text on the page.
+ Record the hits in the hit_bbox array and return the number of
+ hits. Will stop looking once it has filled hit_max rectangles.
*/
-int fz_search_page(fz_context *ctx, fz_page *page, const char *needle, fz_rect *hit_bbox, int hit_max);
-int fz_search_page_number(fz_context *ctx, fz_document *doc, int number, const char *needle, fz_rect *hit_bbox, int hit_max);
-int fz_search_display_list(fz_context *ctx, fz_display_list *list, const char *needle, fz_rect *hit_bbox, int hit_max);
+int fz_search_page(fz_context *ctx, fz_page *page, const char *needle, int *hit_mark, fz_quad *hit_bbox, int hit_max);
+int fz_search_page_number(fz_context *ctx, fz_document *doc, int number, const char *needle, int *hit_mark, fz_quad *hit_bbox, int hit_max);
+int fz_search_chapter_page_number(fz_context *ctx, fz_document *doc, int chapter, int page, const char *needle, int *hit_mark, fz_quad *hit_bbox, int hit_max);
+int fz_search_display_list(fz_context *ctx, fz_display_list *list, const char *needle, int *hit_mark, fz_quad *hit_bbox, int hit_max);
-/*
+/**
Parse an SVG document into a display-list.
*/
-fz_display_list *fz_new_display_list_from_svg(fz_context *ctx, fz_buffer *buf, float *w, float *h);
+fz_display_list *fz_new_display_list_from_svg(fz_context *ctx, fz_buffer *buf, const char *base_uri, fz_archive *dir, float *w, float *h);
-/*
+/**
Create a scalable image from an SVG document.
*/
-fz_image *fz_new_image_from_svg(fz_context *ctx, fz_buffer *buf);
+fz_image *fz_new_image_from_svg(fz_context *ctx, fz_buffer *buf, const char *base_uri, fz_archive *dir);
-/*
+/**
+ Parse an SVG document into a display-list.
+*/
+fz_display_list *fz_new_display_list_from_svg_xml(fz_context *ctx, fz_xml_doc *xmldoc, fz_xml *xml, const char *base_uri, fz_archive *dir, float *w, float *h);
+
+/**
+ Create a scalable image from an SVG document.
+*/
+fz_image *fz_new_image_from_svg_xml(fz_context *ctx, fz_xml_doc *xmldoc, fz_xml *xml, const char *base_uri, fz_archive *dir);
+
+/**
Write image as a data URI (for HTML and SVG output).
*/
void fz_write_image_as_data_uri(fz_context *ctx, fz_output *out, fz_image *image);
+void fz_write_pixmap_as_data_uri(fz_context *ctx, fz_output *out, fz_pixmap *pixmap);
+void fz_append_image_as_data_uri(fz_context *ctx, fz_buffer *out, fz_image *image);
+void fz_append_pixmap_as_data_uri(fz_context *ctx, fz_buffer *out, fz_pixmap *pixmap);
+
+/**
+ Use text extraction to convert the input document into XHTML,
+ then open the result as a new document that can be reflowed.
+*/
+fz_document *fz_new_xhtml_document_from_document(fz_context *ctx, fz_document *old_doc, const fz_stext_options *opts);
+
+/**
+ Returns an fz_buffer containing a page after conversion to specified format.
+
+ page: The page to convert.
+ format, options: Passed to fz_new_document_writer_with_output() internally.
+ transform, cookie: Passed to fz_run_page() internally.
+*/
+fz_buffer *fz_new_buffer_from_page_with_format(fz_context *ctx, fz_page *page, const char *format, const char *options, fz_matrix transform, fz_cookie *cookie);
#endif
diff --git a/include/mupdf/fitz/vendor.go b/include/mupdf/fitz/vendor.go
new file mode 100644
index 0000000..4a5fffb
--- /dev/null
+++ b/include/mupdf/fitz/vendor.go
@@ -0,0 +1,3 @@
+//go:build required
+
+package vendor
diff --git a/include/mupdf/fitz/version.h b/include/mupdf/fitz/version.h
index 20a8e85..8bf080b 100644
--- a/include/mupdf/fitz/version.h
+++ b/include/mupdf/fitz/version.h
@@ -1,9 +1,31 @@
+// Copyright (C) 2004-2024 Artifex Software, Inc.
+//
+// This file is part of MuPDF.
+//
+// MuPDF is free software: you can redistribute it and/or modify it under the
+// terms of the GNU Affero General Public License as published by the Free
+// Software Foundation, either version 3 of the License, or (at your option)
+// any later version.
+//
+// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with MuPDF. If not, see
+//
+// Alternative licensing terms are available from the licensor.
+// For commercial licensing, see or contact
+// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
+// CA 94129, USA, for further information.
+
#ifndef MUPDF_FITZ_VERSION_H
#define MUPDF_FITZ_VERSION_H
#ifndef FZ_VERSION
-#define FZ_VERSION "1.13.0"
+#define FZ_VERSION "1.24.9"
#define FZ_VERSION_MAJOR 1
-#define FZ_VERSION_MINOR 13
-#define FZ_VERSION_PATCH 0
+#define FZ_VERSION_MINOR 24
+#define FZ_VERSION_PATCH 9
#endif
#endif
diff --git a/include/mupdf/fitz/write-pixmap.h b/include/mupdf/fitz/write-pixmap.h
new file mode 100644
index 0000000..8ddb1ef
--- /dev/null
+++ b/include/mupdf/fitz/write-pixmap.h
@@ -0,0 +1,499 @@
+// Copyright (C) 2004-2023 Artifex Software, Inc.
+//
+// This file is part of MuPDF.
+//
+// MuPDF is free software: you can redistribute it and/or modify it under the
+// terms of the GNU Affero General Public License as published by the Free
+// Software Foundation, either version 3 of the License, or (at your option)
+// any later version.
+//
+// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with MuPDF. If not, see
+//
+// Alternative licensing terms are available from the licensor.
+// For commercial licensing, see or contact
+// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
+// CA 94129, USA, for further information.
+
+#ifndef MUPDF_FITZ_WRITE_PIXMAP_H
+#define MUPDF_FITZ_WRITE_PIXMAP_H
+
+#include "mupdf/fitz/system.h"
+#include "mupdf/fitz/context.h"
+#include "mupdf/fitz/output.h"
+#include "mupdf/fitz/band-writer.h"
+#include "mupdf/fitz/pixmap.h"
+#include "mupdf/fitz/bitmap.h"
+#include "mupdf/fitz/buffer.h"
+#include "mupdf/fitz/image.h"
+#include "mupdf/fitz/writer.h"
+
+/**
+ PCL output
+*/
+typedef struct
+{
+ /* Features of a particular printer */
+ int features;
+ const char *odd_page_init;
+ const char *even_page_init;
+
+ /* Options for this job */
+ int tumble;
+ int duplex_set;
+ int duplex;
+ int paper_size;
+ int manual_feed_set;
+ int manual_feed;
+ int media_position_set;
+ int media_position;
+ int orientation;
+
+ /* Updated as we move through the job */
+ int page_count;
+} fz_pcl_options;
+
+/**
+ Initialize PCL option struct for a given preset.
+
+ Currently defined presets include:
+
+ generic Generic PCL printer
+ ljet4 HP DeskJet
+ dj500 HP DeskJet 500
+ fs600 Kyocera FS-600
+ lj HP LaserJet, HP LaserJet Plus
+ lj2 HP LaserJet IIp, HP LaserJet IId
+ lj3 HP LaserJet III
+ lj3d HP LaserJet IIId
+ lj4 HP LaserJet 4
+ lj4pl HP LaserJet 4 PL
+ lj4d HP LaserJet 4d
+ lp2563b HP 2563B line printer
+ oce9050 Oce 9050 Line printer
+*/
+void fz_pcl_preset(fz_context *ctx, fz_pcl_options *opts, const char *preset);
+
+/**
+ Parse PCL options.
+
+ Currently defined options and values are as follows:
+
+ preset=X Either "generic" or one of the presets as for fz_pcl_preset.
+ spacing=0 No vertical spacing capability
+ spacing=1 PCL 3 spacing (*p+Y)
+ spacing=2 PCL 4 spacing (*bY)
+ spacing=3 PCL 5 spacing (*bY and clear seed row)
+ mode2 Disable/Enable mode 2 graphics compression
+ mode3 Disable/Enable mode 3 graphics compression
+ eog_reset End of graphics (*rB) resets all parameters
+ has_duplex Duplex supported (&lS)
+ has_papersize Papersize setting supported (&lA)
+ has_copies Number of copies supported (&lX)
+ is_ljet4pjl Disable/Enable HP 4PJL model-specific output
+ is_oce9050 Disable/Enable Oce 9050 model-specific output
+*/
+fz_pcl_options *fz_parse_pcl_options(fz_context *ctx, fz_pcl_options *opts, const char *args);
+
+/**
+ Create a new band writer, outputing monochrome pcl.
+*/
+fz_band_writer *fz_new_mono_pcl_band_writer(fz_context *ctx, fz_output *out, const fz_pcl_options *options);
+
+/**
+ Write a bitmap as mono PCL.
+*/
+void fz_write_bitmap_as_pcl(fz_context *ctx, fz_output *out, const fz_bitmap *bitmap, const fz_pcl_options *pcl);
+
+/**
+ Save a bitmap as mono PCL.
+*/
+void fz_save_bitmap_as_pcl(fz_context *ctx, fz_bitmap *bitmap, char *filename, int append, const fz_pcl_options *pcl);
+
+/**
+ Create a new band writer, outputing color pcl.
+*/
+fz_band_writer *fz_new_color_pcl_band_writer(fz_context *ctx, fz_output *out, const fz_pcl_options *options);
+
+/**
+ Write an (RGB) pixmap as color PCL.
+*/
+void fz_write_pixmap_as_pcl(fz_context *ctx, fz_output *out, const fz_pixmap *pixmap, const fz_pcl_options *pcl);
+
+/**
+ Save an (RGB) pixmap as color PCL.
+*/
+void fz_save_pixmap_as_pcl(fz_context *ctx, fz_pixmap *pixmap, char *filename, int append, const fz_pcl_options *pcl);
+
+/**
+ PCLm output
+*/
+typedef struct
+{
+ int compress;
+ int strip_height;
+
+ /* Updated as we move through the job */
+ int page_count;
+} fz_pclm_options;
+
+/**
+ Parse PCLm options.
+
+ Currently defined options and values are as follows:
+
+ compression=none: No compression
+ compression=flate: Flate compression
+ strip-height=n: Strip height (default 16)
+*/
+fz_pclm_options *fz_parse_pclm_options(fz_context *ctx, fz_pclm_options *opts, const char *args);
+
+/**
+ Create a new band writer, outputing pclm
+*/
+fz_band_writer *fz_new_pclm_band_writer(fz_context *ctx, fz_output *out, const fz_pclm_options *options);
+
+/**
+ Write a (Greyscale or RGB) pixmap as pclm.
+*/
+void fz_write_pixmap_as_pclm(fz_context *ctx, fz_output *out, const fz_pixmap *pixmap, const fz_pclm_options *options);
+
+/**
+ Save a (Greyscale or RGB) pixmap as pclm.
+*/
+void fz_save_pixmap_as_pclm(fz_context *ctx, fz_pixmap *pixmap, char *filename, int append, const fz_pclm_options *options);
+
+/**
+ PDFOCR output
+*/
+typedef struct
+{
+ int compress;
+ int strip_height;
+ char language[256];
+ char datadir[1024];
+
+ /* Updated as we move through the job */
+ int page_count;
+} fz_pdfocr_options;
+
+/**
+ Parse PDFOCR options.
+
+ Currently defined options and values are as follows:
+
+ compression=none: No compression
+ compression=flate: Flate compression
+ strip-height=n: Strip height (default 16)
+ ocr-language=: OCR Language (default eng)
+ ocr-datadir=: OCR data path (default rely on TESSDATA_PREFIX)
+*/
+fz_pdfocr_options *fz_parse_pdfocr_options(fz_context *ctx, fz_pdfocr_options *opts, const char *args);
+
+/**
+ Create a new band writer, outputing pdfocr.
+
+ Ownership of output stays with the caller, the band writer
+ borrows the reference. The caller must keep the output around
+ for the duration of the band writer, and then close/drop as
+ appropriate.
+*/
+fz_band_writer *fz_new_pdfocr_band_writer(fz_context *ctx, fz_output *out, const fz_pdfocr_options *options);
+
+/**
+ Set the progress callback for a pdfocr bandwriter.
+*/
+void fz_pdfocr_band_writer_set_progress(fz_context *ctx, fz_band_writer *writer, fz_pdfocr_progress_fn *progress_fn, void *progress_arg);
+
+/**
+ Write a (Greyscale or RGB) pixmap as pdfocr.
+*/
+void fz_write_pixmap_as_pdfocr(fz_context *ctx, fz_output *out, const fz_pixmap *pixmap, const fz_pdfocr_options *options);
+
+/**
+ Save a (Greyscale or RGB) pixmap as pdfocr.
+*/
+void fz_save_pixmap_as_pdfocr(fz_context *ctx, fz_pixmap *pixmap, char *filename, int append, const fz_pdfocr_options *options);
+
+/**
+ Save a (Greyscale or RGB) pixmap as a png.
+*/
+void fz_save_pixmap_as_png(fz_context *ctx, fz_pixmap *pixmap, const char *filename);
+
+/**
+ Write a pixmap as a JPEG.
+*/
+void fz_write_pixmap_as_jpeg(fz_context *ctx, fz_output *out, fz_pixmap *pix, int quality, int invert_cmyk);
+
+/**
+ Save a pixmap as a JPEG.
+*/
+void fz_save_pixmap_as_jpeg(fz_context *ctx, fz_pixmap *pixmap, const char *filename, int quality);
+
+/**
+ Write a (Greyscale or RGB) pixmap as a png.
+*/
+void fz_write_pixmap_as_png(fz_context *ctx, fz_output *out, const fz_pixmap *pixmap);
+
+/**
+ Pixmap data as JP2K with no subsampling.
+
+ quality = 100 = lossless
+ otherwise for a factor of x compression use 100-x. (so 80 is 1:20 compression)
+*/
+void fz_write_pixmap_as_jpx(fz_context *ctx, fz_output *out, fz_pixmap *pix, int quality);
+
+/**
+ Save pixmap data as JP2K with no subsampling.
+
+ quality = 100 = lossless
+ otherwise for a factor of x compression use 100-x. (so 80 is 1:20 compression)
+*/
+void fz_save_pixmap_as_jpx(fz_context *ctx, fz_pixmap *pixmap, const char *filename, int q);
+
+/**
+ Create a new png band writer (greyscale or RGB, with or without
+ alpha).
+*/
+fz_band_writer *fz_new_png_band_writer(fz_context *ctx, fz_output *out);
+
+/**
+ Reencode a given image as a PNG into a buffer.
+
+ Ownership of the buffer is returned.
+*/
+fz_buffer *fz_new_buffer_from_image_as_png(fz_context *ctx, fz_image *image, fz_color_params color_params);
+fz_buffer *fz_new_buffer_from_image_as_pnm(fz_context *ctx, fz_image *image, fz_color_params color_params);
+fz_buffer *fz_new_buffer_from_image_as_pam(fz_context *ctx, fz_image *image, fz_color_params color_params);
+fz_buffer *fz_new_buffer_from_image_as_psd(fz_context *ctx, fz_image *image, fz_color_params color_params);
+fz_buffer *fz_new_buffer_from_image_as_jpeg(fz_context *ctx, fz_image *image, fz_color_params color_params, int quality, int invert_cmyk);
+fz_buffer *fz_new_buffer_from_image_as_jpx(fz_context *ctx, fz_image *image, fz_color_params color_params, int quality);
+
+/**
+ Reencode a given pixmap as a PNG into a buffer.
+
+ Ownership of the buffer is returned.
+*/
+fz_buffer *fz_new_buffer_from_pixmap_as_png(fz_context *ctx, fz_pixmap *pixmap, fz_color_params color_params);
+fz_buffer *fz_new_buffer_from_pixmap_as_pnm(fz_context *ctx, fz_pixmap *pixmap, fz_color_params color_params);
+fz_buffer *fz_new_buffer_from_pixmap_as_pam(fz_context *ctx, fz_pixmap *pixmap, fz_color_params color_params);
+fz_buffer *fz_new_buffer_from_pixmap_as_psd(fz_context *ctx, fz_pixmap *pix, fz_color_params color_params);
+fz_buffer *fz_new_buffer_from_pixmap_as_jpeg(fz_context *ctx, fz_pixmap *pixmap, fz_color_params color_params, int quality, int invert_cmyk);
+fz_buffer *fz_new_buffer_from_pixmap_as_jpx(fz_context *ctx, fz_pixmap *pix, fz_color_params color_params, int quality);
+
+/**
+ Save a pixmap as a pnm (greyscale or rgb, no alpha).
+*/
+void fz_save_pixmap_as_pnm(fz_context *ctx, fz_pixmap *pixmap, const char *filename);
+
+/**
+ Write a pixmap as a pnm (greyscale or rgb, no alpha).
+*/
+void fz_write_pixmap_as_pnm(fz_context *ctx, fz_output *out, fz_pixmap *pixmap);
+
+/**
+ Create a band writer targetting pnm (greyscale or rgb, no
+ alpha).
+*/
+fz_band_writer *fz_new_pnm_band_writer(fz_context *ctx, fz_output *out);
+
+/**
+ Save a pixmap as a pnm (greyscale, rgb or cmyk, with or without
+ alpha).
+*/
+void fz_save_pixmap_as_pam(fz_context *ctx, fz_pixmap *pixmap, const char *filename);
+
+/**
+ Write a pixmap as a pnm (greyscale, rgb or cmyk, with or without
+ alpha).
+*/
+void fz_write_pixmap_as_pam(fz_context *ctx, fz_output *out, fz_pixmap *pixmap);
+
+/**
+ Create a band writer targetting pnm (greyscale, rgb or cmyk,
+ with or without alpha).
+*/
+fz_band_writer *fz_new_pam_band_writer(fz_context *ctx, fz_output *out);
+
+/**
+ Save a bitmap as a pbm.
+*/
+void fz_save_bitmap_as_pbm(fz_context *ctx, fz_bitmap *bitmap, const char *filename);
+
+/**
+ Write a bitmap as a pbm.
+*/
+void fz_write_bitmap_as_pbm(fz_context *ctx, fz_output *out, fz_bitmap *bitmap);
+
+/**
+ Create a new band writer, targetting pbm.
+*/
+fz_band_writer *fz_new_pbm_band_writer(fz_context *ctx, fz_output *out);
+
+/**
+ Save a pixmap as a pbm. (Performing halftoning).
+*/
+void fz_save_pixmap_as_pbm(fz_context *ctx, fz_pixmap *pixmap, const char *filename);
+
+/**
+ Save a CMYK bitmap as a pkm.
+*/
+void fz_save_bitmap_as_pkm(fz_context *ctx, fz_bitmap *bitmap, const char *filename);
+
+/**
+ Write a CMYK bitmap as a pkm.
+*/
+void fz_write_bitmap_as_pkm(fz_context *ctx, fz_output *out, fz_bitmap *bitmap);
+
+/**
+ Create a new pkm band writer for CMYK pixmaps.
+*/
+fz_band_writer *fz_new_pkm_band_writer(fz_context *ctx, fz_output *out);
+
+/**
+ Save a CMYK pixmap as a pkm. (Performing halftoning).
+*/
+void fz_save_pixmap_as_pkm(fz_context *ctx, fz_pixmap *pixmap, const char *filename);
+
+/**
+ Write a (gray, rgb, or cmyk, no alpha) pixmap out as postscript.
+*/
+void fz_write_pixmap_as_ps(fz_context *ctx, fz_output *out, const fz_pixmap *pixmap);
+
+/**
+ Save a (gray, rgb, or cmyk, no alpha) pixmap out as postscript.
+*/
+void fz_save_pixmap_as_ps(fz_context *ctx, fz_pixmap *pixmap, char *filename, int append);
+
+/**
+ Create a postscript band writer for gray, rgb, or cmyk, no
+ alpha.
+*/
+fz_band_writer *fz_new_ps_band_writer(fz_context *ctx, fz_output *out);
+
+/**
+ Write the file level header for ps band writer output.
+*/
+void fz_write_ps_file_header(fz_context *ctx, fz_output *out);
+
+/**
+ Write the file level trailer for ps band writer output.
+*/
+void fz_write_ps_file_trailer(fz_context *ctx, fz_output *out, int pages);
+
+/**
+ Save a pixmap as a PSD file.
+*/
+void fz_save_pixmap_as_psd(fz_context *ctx, fz_pixmap *pixmap, const char *filename);
+
+/**
+ Write a pixmap as a PSD file.
+*/
+void fz_write_pixmap_as_psd(fz_context *ctx, fz_output *out, const fz_pixmap *pixmap);
+
+/**
+ Open a PSD band writer.
+*/
+fz_band_writer *fz_new_psd_band_writer(fz_context *ctx, fz_output *out);
+
+typedef struct
+{
+ /* These are not interpreted as CStrings by the writing code,
+ * but are rather copied directly out. */
+ char media_class[64];
+ char media_color[64];
+ char media_type[64];
+ char output_type[64];
+
+ unsigned int advance_distance;
+ int advance_media;
+ int collate;
+ int cut_media;
+ int duplex;
+ int insert_sheet;
+ int jog;
+ int leading_edge;
+ int manual_feed;
+ unsigned int media_position;
+ unsigned int media_weight;
+ int mirror_print;
+ int negative_print;
+ unsigned int num_copies;
+ int orientation;
+ int output_face_up;
+ unsigned int PageSize[2];
+ int separations;
+ int tray_switch;
+ int tumble;
+
+ int media_type_num;
+ int compression;
+ unsigned int row_count;
+ unsigned int row_feed;
+ unsigned int row_step;
+
+ /* These are not interpreted as CStrings by the writing code, but
+ * are rather copied directly out. */
+ char rendering_intent[64];
+ char page_size_name[64];
+} fz_pwg_options;
+
+/**
+ Save a pixmap as a PWG.
+*/
+void fz_save_pixmap_as_pwg(fz_context *ctx, fz_pixmap *pixmap, char *filename, int append, const fz_pwg_options *pwg);
+
+/**
+ Save a bitmap as a PWG.
+*/
+void fz_save_bitmap_as_pwg(fz_context *ctx, fz_bitmap *bitmap, char *filename, int append, const fz_pwg_options *pwg);
+
+/**
+ Write a pixmap as a PWG.
+*/
+void fz_write_pixmap_as_pwg(fz_context *ctx, fz_output *out, const fz_pixmap *pixmap, const fz_pwg_options *pwg);
+
+/**
+ Write a bitmap as a PWG.
+*/
+void fz_write_bitmap_as_pwg(fz_context *ctx, fz_output *out, const fz_bitmap *bitmap, const fz_pwg_options *pwg);
+
+/**
+ Write a pixmap as a PWG page.
+
+ Caller should provide a file header by calling
+ fz_write_pwg_file_header, but can then write several pages to
+ the same file.
+*/
+void fz_write_pixmap_as_pwg_page(fz_context *ctx, fz_output *out, const fz_pixmap *pixmap, const fz_pwg_options *pwg);
+
+/**
+ Write a bitmap as a PWG page.
+
+ Caller should provide a file header by calling
+ fz_write_pwg_file_header, but can then write several pages to
+ the same file.
+*/
+void fz_write_bitmap_as_pwg_page(fz_context *ctx, fz_output *out, const fz_bitmap *bitmap, const fz_pwg_options *pwg);
+
+/**
+ Create a new monochrome pwg band writer.
+*/
+fz_band_writer *fz_new_mono_pwg_band_writer(fz_context *ctx, fz_output *out, const fz_pwg_options *pwg);
+
+/**
+ Create a new color pwg band writer.
+*/
+fz_band_writer *fz_new_pwg_band_writer(fz_context *ctx, fz_output *out, const fz_pwg_options *pwg);
+
+/**
+ Output the file header to a pwg stream, ready for pages to follow it.
+*/
+void fz_write_pwg_file_header(fz_context *ctx, fz_output *out); /* for use by mudraw.c */
+
+#endif
diff --git a/include/mupdf/fitz/writer.h b/include/mupdf/fitz/writer.h
index 1ce5164..23b78fa 100644
--- a/include/mupdf/fitz/writer.h
+++ b/include/mupdf/fitz/writer.h
@@ -1,3 +1,25 @@
+// Copyright (C) 2004-2023 Artifex Software, Inc.
+//
+// This file is part of MuPDF.
+//
+// MuPDF is free software: you can redistribute it and/or modify it under the
+// terms of the GNU Affero General Public License as published by the Free
+// Software Foundation, either version 3 of the License, or (at your option)
+// any later version.
+//
+// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with MuPDF. If not, see
+//
+// Alternative licensing terms are available from the licensor.
+// For commercial licensing, see or contact
+// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
+// CA 94129, USA, for further information.
+
#ifndef MUPDF_FITZ_WRITER_H
#define MUPDF_FITZ_WRITER_H
@@ -7,28 +29,28 @@
#include "mupdf/fitz/document.h"
#include "mupdf/fitz/device.h"
-typedef struct fz_document_writer_s fz_document_writer;
+typedef struct fz_document_writer fz_document_writer;
-/*
- fz_document_writer_begin_page_fn: Function type to start
+/**
+ Function type to start
the process of writing a page to a document.
mediabox: page size rectangle in points.
Returns a fz_device to write page contents to.
*/
-typedef fz_device *(fz_document_writer_begin_page_fn)(fz_context *ctx, fz_document_writer *wri, const fz_rect *mediabox);
+typedef fz_device *(fz_document_writer_begin_page_fn)(fz_context *ctx, fz_document_writer *wri, fz_rect mediabox);
-/*
- fz_document_writer_end_page_fn: Function type to end the
+/**
+ Function type to end the
process of writing a page to a document.
dev: The device created by the begin_page function.
*/
typedef void (fz_document_writer_end_page_fn)(fz_context *ctx, fz_document_writer *wri, fz_device *dev);
-/*
- fz_document_writer_close_writer_fn: Function type to end
+/**
+ Function type to end
the process of writing pages to a document.
This writes any file level trailers required. After this
@@ -36,8 +58,8 @@ typedef void (fz_document_writer_end_page_fn)(fz_context *ctx, fz_document_write
*/
typedef void (fz_document_writer_close_writer_fn)(fz_context *ctx, fz_document_writer *wri);
-/*
- fz_document_writer_drop_writer_fn: Function type to discard
+/**
+ Function type to discard
an fz_document_writer. This may be called at any time during
the process to release all the resources owned by the writer.
@@ -47,63 +69,111 @@ typedef void (fz_document_writer_close_writer_fn)(fz_context *ctx, fz_document_w
*/
typedef void (fz_document_writer_drop_writer_fn)(fz_context *ctx, fz_document_writer *wri);
-/*
- Structure is public to allow other structures to
- be derived from it. Do not access members directly.
-*/
-struct fz_document_writer_s
-{
- fz_document_writer_begin_page_fn *begin_page;
- fz_document_writer_end_page_fn *end_page;
- fz_document_writer_close_writer_fn *close_writer;
- fz_document_writer_drop_writer_fn *drop_writer;
- fz_device *dev;
-};
-
-/*
- fz_new_document_writer_of_size: Internal function to allocate a
- block for a derived document_writer structure, with the base
- structure's function pointers populated correctly, and the extra
- space zero initialised.
-*/
-fz_document_writer *fz_new_document_writer_of_size(fz_context *ctx, size_t size,
- fz_document_writer_begin_page_fn *begin_page,
- fz_document_writer_end_page_fn *end_page,
- fz_document_writer_close_writer_fn *close,
- fz_document_writer_drop_writer_fn *drop);
-
#define fz_new_derived_document_writer(CTX,TYPE,BEGIN_PAGE,END_PAGE,CLOSE,DROP) \
((TYPE *)Memento_label(fz_new_document_writer_of_size(CTX,sizeof(TYPE),BEGIN_PAGE,END_PAGE,CLOSE,DROP),#TYPE))
+/**
+ Look for a given option (key) in the opts string. Return 1 if
+ it has it, and update *val to point to the value within opts.
+*/
int fz_has_option(fz_context *ctx, const char *opts, const char *key, const char **val);
+
+/**
+ Check to see if an option, a, from a string matches a reference
+ option, b.
+
+ (i.e. a could be 'foo' or 'foo,bar...' etc, but b can only be
+ 'foo'.)
+*/
int fz_option_eq(const char *a, const char *b);
-/*
- fz_new_document_writer: Create a new fz_document_writer, for a
+/**
+ Copy an option (val) into a destination buffer (dest), of maxlen
+ bytes.
+
+ Returns the number of bytes (including terminator) that did not
+ fit. If val is maxlen or greater bytes in size, it will be left
+ unterminated.
+*/
+size_t fz_copy_option(fz_context *ctx, const char *val, char *dest, size_t maxlen);
+
+/**
+ Create a new fz_document_writer, for a
file of the given type.
path: The document name to write (or NULL for default)
- format: Which format to write (currently cbz, html, pdf, pam, pbm,
- pgm, pkm, png, ppm, pnm, svg, text, tga, xhtml)
+ format: Which format to write (currently cbz, html, pdf, pam,
+ pbm, pgm, pkm, png, ppm, pnm, svg, text, xhtml, docx, odt)
options: NULL, or pointer to comma separated string to control
file generation.
*/
fz_document_writer *fz_new_document_writer(fz_context *ctx, const char *path, const char *format, const char *options);
+/**
+ Like fz_new_document_writer but takes a fz_output for writing
+ the result. Only works for multi-page formats.
+*/
+fz_document_writer *
+fz_new_document_writer_with_output(fz_context *ctx, fz_output *out, const char *format, const char *options);
+
+fz_document_writer *
+fz_new_document_writer_with_buffer(fz_context *ctx, fz_buffer *buf, const char *format, const char *options);
+
+/**
+ Document writers for various possible output formats.
+
+ All of the "_with_output" variants pass the ownership of out in
+ immediately upon calling. The writers are responsible for
+ dropping the fz_output when they are finished with it (even
+ if they throw an exception during creation).
+*/
fz_document_writer *fz_new_pdf_writer(fz_context *ctx, const char *path, const char *options);
+fz_document_writer *fz_new_pdf_writer_with_output(fz_context *ctx, fz_output *out, const char *options);
fz_document_writer *fz_new_svg_writer(fz_context *ctx, const char *path, const char *options);
+fz_document_writer *fz_new_svg_writer_with_output(fz_context *ctx, fz_output *out, const char *options);
fz_document_writer *fz_new_text_writer(fz_context *ctx, const char *format, const char *path, const char *options);
+fz_document_writer *fz_new_text_writer_with_output(fz_context *ctx, const char *format, fz_output *out, const char *options);
+
+fz_document_writer *fz_new_odt_writer(fz_context *ctx, const char *path, const char *options);
+fz_document_writer *fz_new_odt_writer_with_output(fz_context *ctx, fz_output *out, const char *options);
+fz_document_writer *fz_new_docx_writer(fz_context *ctx, const char *path, const char *options);
+fz_document_writer *fz_new_docx_writer_with_output(fz_context *ctx, fz_output *out, const char *options);
fz_document_writer *fz_new_ps_writer(fz_context *ctx, const char *path, const char *options);
+fz_document_writer *fz_new_ps_writer_with_output(fz_context *ctx, fz_output *out, const char *options);
fz_document_writer *fz_new_pcl_writer(fz_context *ctx, const char *path, const char *options);
+fz_document_writer *fz_new_pcl_writer_with_output(fz_context *ctx, fz_output *out, const char *options);
+fz_document_writer *fz_new_pclm_writer(fz_context *ctx, const char *path, const char *options);
+fz_document_writer *fz_new_pclm_writer_with_output(fz_context *ctx, fz_output *out, const char *options);
fz_document_writer *fz_new_pwg_writer(fz_context *ctx, const char *path, const char *options);
+fz_document_writer *fz_new_pwg_writer_with_output(fz_context *ctx, fz_output *out, const char *options);
fz_document_writer *fz_new_cbz_writer(fz_context *ctx, const char *path, const char *options);
+fz_document_writer *fz_new_cbz_writer_with_output(fz_context *ctx, fz_output *out, const char *options);
+
+/**
+ Used to report progress of the OCR operation.
+
+ page: Current page being processed.
+
+ percent: Progress of the OCR operation for the
+ current page in percent. Whether it reaches 100
+ once a page is finished, depends on the OCR engine.
+
+ Return 0 to continue progress, return 1 to cancel the
+ operation.
+*/
+typedef int (fz_pdfocr_progress_fn)(fz_context *ctx, void *progress_arg, int page, int percent);
+
+fz_document_writer *fz_new_pdfocr_writer(fz_context *ctx, const char *path, const char *options);
+fz_document_writer *fz_new_pdfocr_writer_with_output(fz_context *ctx, fz_output *out, const char *options);
+void fz_pdfocr_writer_set_progress(fz_context *ctx, fz_document_writer *writer, fz_pdfocr_progress_fn *progress, void *);
+
+fz_document_writer *fz_new_jpeg_pixmap_writer(fz_context *ctx, const char *path, const char *options);
fz_document_writer *fz_new_png_pixmap_writer(fz_context *ctx, const char *path, const char *options);
-fz_document_writer *fz_new_tga_pixmap_writer(fz_context *ctx, const char *path, const char *options);
fz_document_writer *fz_new_pam_pixmap_writer(fz_context *ctx, const char *path, const char *options);
fz_document_writer *fz_new_pnm_pixmap_writer(fz_context *ctx, const char *path, const char *options);
fz_document_writer *fz_new_pgm_pixmap_writer(fz_context *ctx, const char *path, const char *options);
@@ -111,24 +181,31 @@ fz_document_writer *fz_new_ppm_pixmap_writer(fz_context *ctx, const char *path,
fz_document_writer *fz_new_pbm_pixmap_writer(fz_context *ctx, const char *path, const char *options);
fz_document_writer *fz_new_pkm_pixmap_writer(fz_context *ctx, const char *path, const char *options);
-/*
- fz_begin_page: Called to start the process of writing a page to
+/**
+ Called to start the process of writing a page to
a document.
mediabox: page size rectangle in points.
- Returns a fz_device to write page contents to.
+ Returns a borrowed fz_device to write page contents to. This
+ should be kept if required, and only dropped if it was kept.
*/
-fz_device *fz_begin_page(fz_context *ctx, fz_document_writer *wri, const fz_rect *mediabox);
+fz_device *fz_begin_page(fz_context *ctx, fz_document_writer *wri, fz_rect mediabox);
-/*
- fz_end_page: Called to end the process of writing a page to a
+/**
+ Called to end the process of writing a page to a
document.
*/
void fz_end_page(fz_context *ctx, fz_document_writer *wri);
-/*
- fz_close_document_writer: Called to end the process of writing
+/**
+ Convenience function to feed all the pages of a document to
+ fz_begin_page/fz_run_page/fz_end_page.
+*/
+void fz_write_document(fz_context *ctx, fz_document_writer *wri, fz_document *doc);
+
+/**
+ Called to end the process of writing
pages to a document.
This writes any file level trailers required. After this
@@ -136,8 +213,8 @@ void fz_end_page(fz_context *ctx, fz_document_writer *wri);
*/
void fz_close_document_writer(fz_context *ctx, fz_document_writer *wri);
-/*
- fz_drop_document_writer: Called to discard a fz_document_writer.
+/**
+ Called to discard a fz_document_writer.
This may be called at any time during the process to release all
the resources owned by the writer.
@@ -149,11 +226,41 @@ void fz_drop_document_writer(fz_context *ctx, fz_document_writer *wri);
fz_document_writer *fz_new_pixmap_writer(fz_context *ctx, const char *path, const char *options, const char *default_path, int n,
void (*save)(fz_context *ctx, fz_pixmap *pix, const char *filename));
-extern const char *fz_pdf_write_options_usage;
-extern const char *fz_svg_write_options_usage;
+FZ_DATA extern const char *fz_pdf_write_options_usage;
+FZ_DATA extern const char *fz_svg_write_options_usage;
+
+FZ_DATA extern const char *fz_pcl_write_options_usage;
+FZ_DATA extern const char *fz_pclm_write_options_usage;
+FZ_DATA extern const char *fz_pwg_write_options_usage;
+FZ_DATA extern const char *fz_pdfocr_write_options_usage;
+
+/* Implementation details: subject to change. */
+
+/**
+ Structure is public to allow other structures to
+ be derived from it. Do not access members directly.
+*/
+struct fz_document_writer
+{
+ fz_document_writer_begin_page_fn *begin_page;
+ fz_document_writer_end_page_fn *end_page;
+ fz_document_writer_close_writer_fn *close_writer;
+ fz_document_writer_drop_writer_fn *drop_writer;
+ fz_device *dev;
+};
+
+/**
+ Internal function to allocate a
+ block for a derived document_writer structure, with the base
+ structure's function pointers populated correctly, and the extra
+ space zero initialised.
+*/
+fz_document_writer *fz_new_document_writer_of_size(fz_context *ctx, size_t size,
+ fz_document_writer_begin_page_fn *begin_page,
+ fz_document_writer_end_page_fn *end_page,
+ fz_document_writer_close_writer_fn *close,
+ fz_document_writer_drop_writer_fn *drop);
+
-extern const char *fz_pcl_write_options_usage;
-extern const char *fz_pclm_write_options_usage;
-extern const char *fz_pwg_write_options_usage;
#endif
diff --git a/include/mupdf/fitz/xml.h b/include/mupdf/fitz/xml.h
index d3cbd64..7792f4a 100644
--- a/include/mupdf/fitz/xml.h
+++ b/include/mupdf/fitz/xml.h
@@ -1,88 +1,397 @@
+// Copyright (C) 2004-2022 Artifex Software, Inc.
+//
+// This file is part of MuPDF.
+//
+// MuPDF is free software: you can redistribute it and/or modify it under the
+// terms of the GNU Affero General Public License as published by the Free
+// Software Foundation, either version 3 of the License, or (at your option)
+// any later version.
+//
+// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with MuPDF. If not, see
+//
+// Alternative licensing terms are available from the licensor.
+// For commercial licensing, see or contact
+// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
+// CA 94129, USA, for further information.
+
#ifndef MUPDF_FITZ_XML_H
#define MUPDF_FITZ_XML_H
#include "mupdf/fitz/system.h"
#include "mupdf/fitz/context.h"
+#include "mupdf/fitz/buffer.h"
+#include "mupdf/fitz/pool.h"
+#include "mupdf/fitz/archive.h"
-/*
+/**
XML document model
*/
-typedef struct fz_xml_doc_s fz_xml_doc;
-typedef struct fz_xml_s fz_xml;
+typedef struct fz_xml fz_xml;
-/*
- fz_parse_xml: Parse the contents of buffer into a tree of xml nodes.
+/* For backwards compatibility */
+typedef fz_xml fz_xml_doc;
+
+/**
+ Parse the contents of buffer into a tree of xml nodes.
preserve_white: whether to keep or delete all-whitespace nodes.
*/
-fz_xml_doc *fz_parse_xml(fz_context *ctx, fz_buffer *buf, int preserve_white);
+fz_xml *fz_parse_xml(fz_context *ctx, fz_buffer *buf, int preserve_white);
-/*
- fz_drop_xml: Free the XML node and all its children and siblings.
+/**
+ Parse the contents of buffer into a tree of xml nodes.
+
+ preserve_white: whether to keep or delete all-whitespace nodes.
*/
-void fz_drop_xml(fz_context *ctx, fz_xml_doc *xml);
+fz_xml *fz_parse_xml_stream(fz_context *ctx, fz_stream *stream, int preserve_white);
-/*
- fz_detach_xml: Detach a node from the tree, unlinking it from its parent,
+/**
+ Parse the contents of an archive entry into a tree of xml nodes.
+
+ preserve_white: whether to keep or delete all-whitespace nodes.
+*/
+fz_xml *fz_parse_xml_archive_entry(fz_context *ctx, fz_archive *dir, const char *filename, int preserve_white);
+
+/**
+ Try and parse the contents of an archive entry into a tree of xml nodes.
+
+ preserve_white: whether to keep or delete all-whitespace nodes.
+
+ Will return NULL if the archive entry can't be found. Otherwise behaves
+ the same as fz_parse_xml_archive_entry. May throw exceptions.
+*/
+fz_xml *fz_try_parse_xml_archive_entry(fz_context *ctx, fz_archive *dir, const char *filename, int preserve_white);
+
+/**
+ Parse the contents of a buffer into a tree of XML nodes,
+ using the HTML5 parsing algorithm.
+*/
+fz_xml *fz_parse_xml_from_html5(fz_context *ctx, fz_buffer *buf);
+
+/**
+ Add a reference to the XML.
+*/
+fz_xml *fz_keep_xml(fz_context *ctx, fz_xml *xml);
+
+/**
+ Drop a reference to the XML. When the last reference is
+ dropped, the node and all its children and siblings will
+ be freed.
+*/
+void fz_drop_xml(fz_context *ctx, fz_xml *xml);
+
+/**
+ Detach a node from the tree, unlinking it from its parent,
and setting the document root to the node.
*/
-void fz_detach_xml(fz_context *ctx, fz_xml_doc *xml, fz_xml *node);
+void fz_detach_xml(fz_context *ctx, fz_xml *node);
-/*
- fz_xml_root: Get the root node for the document.
+/**
+ Return the topmost XML node of a document.
*/
fz_xml *fz_xml_root(fz_xml_doc *xml);
-/*
- fz_xml_prev: Return previous sibling of XML node.
+/**
+ Return previous sibling of XML node.
*/
fz_xml *fz_xml_prev(fz_xml *item);
-/*
- fz_xml_next: Return next sibling of XML node.
+/**
+ Return next sibling of XML node.
*/
fz_xml *fz_xml_next(fz_xml *item);
-/*
- fz_xml_up: Return parent of XML node.
+/**
+ Return parent of XML node.
*/
fz_xml *fz_xml_up(fz_xml *item);
-/*
- fz_xml_down: Return first child of XML node.
+/**
+ Return first child of XML node.
*/
fz_xml *fz_xml_down(fz_xml *item);
-/*
- fz_xml_is_tag: Return true if the tag name matches.
+/**
+ Return true if the tag name matches.
*/
int fz_xml_is_tag(fz_xml *item, const char *name);
-/*
- fz_xml_tag: Return tag of XML node. Return NULL for text nodes.
+/**
+ Return tag of XML node. Return NULL for text nodes.
*/
char *fz_xml_tag(fz_xml *item);
-/*
- fz_xml_att: Return the value of an attribute of an XML node.
+/**
+ Return the value of an attribute of an XML node.
NULL if the attribute doesn't exist.
*/
char *fz_xml_att(fz_xml *item, const char *att);
-/*
- fz_xml_text: Return the text content of an XML node.
+/**
+ Return the value of an attribute of an XML node.
+ If the first attribute doesn't exist, try the second.
+ NULL if neither attribute exists.
+*/
+char *fz_xml_att_alt(fz_xml *item, const char *one, const char *two);
+
+/**
+ Check for a matching attribute on an XML node.
+
+ If the node has the requested attribute (name), and the value
+ matches (match) then return 1. Otherwise, 0.
+*/
+int fz_xml_att_eq(fz_xml *item, const char *name, const char *match);
+
+/**
+ Add an attribute to an XML node.
+*/
+void fz_xml_add_att(fz_context *ctx, fz_pool *pool, fz_xml *node, const char *key, const char *val);
+
+/**
+ Return the text content of an XML node.
Return NULL if the node is a tag.
*/
char *fz_xml_text(fz_xml *item);
-/*
- fz_debug_xml: Pretty-print an XML tree to stdout.
+/**
+ Pretty-print an XML tree to given output.
+*/
+void fz_output_xml(fz_context *ctx, fz_output *out, fz_xml *item, int level);
+
+/**
+ Pretty-print an XML tree to stdout. (Deprecated, use
+ fz_output_xml in preference).
*/
void fz_debug_xml(fz_xml *item, int level);
+/**
+ Search the siblings of XML nodes starting with item looking for
+ the first with the given tag.
+
+ Return NULL if none found.
+*/
fz_xml *fz_xml_find(fz_xml *item, const char *tag);
+
+/**
+ Search the siblings of XML nodes starting with the first sibling
+ of item looking for the first with the given tag.
+
+ Return NULL if none found.
+*/
fz_xml *fz_xml_find_next(fz_xml *item, const char *tag);
+
+/**
+ Search the siblings of XML nodes starting with the first child
+ of item looking for the first with the given tag.
+
+ Return NULL if none found.
+*/
fz_xml *fz_xml_find_down(fz_xml *item, const char *tag);
+/**
+ Search the siblings of XML nodes starting with item looking for
+ the first with the given tag (or any tag if tag is NULL), and
+ with a matching attribute.
+
+ Return NULL if none found.
+*/
+fz_xml *fz_xml_find_match(fz_xml *item, const char *tag, const char *att, const char *match);
+
+/**
+ Search the siblings of XML nodes starting with the first sibling
+ of item looking for the first with the given tag (or any tag if tag
+ is NULL), and with a matching attribute.
+
+ Return NULL if none found.
+*/
+fz_xml *fz_xml_find_next_match(fz_xml *item, const char *tag, const char *att, const char *match);
+
+/**
+ Search the siblings of XML nodes starting with the first child
+ of item looking for the first with the given tag (or any tag if
+ tag is NULL), and with a matching attribute.
+
+ Return NULL if none found.
+*/
+fz_xml *fz_xml_find_down_match(fz_xml *item, const char *tag, const char *att, const char *match);
+
+/**
+ Perform a depth first search from item, returning the first
+ child that matches the given tag (or any tag if tag is NULL),
+ with the given attribute (if att is non NULL), that matches
+ match (if match is non NULL).
+*/
+fz_xml *fz_xml_find_dfs(fz_xml *item, const char *tag, const char *att, const char *match);
+
+/**
+ Perform a depth first search from item, returning the first
+ child that matches the given tag (or any tag if tag is NULL),
+ with the given attribute (if att is non NULL), that matches
+ match (if match is non NULL). The search stops if it ever
+ reaches the top of the tree, or the declared 'top' item.
+*/
+fz_xml *fz_xml_find_dfs_top(fz_xml *item, const char *tag, const char *att, const char *match, fz_xml *top);
+
+/**
+ Perform a depth first search onwards from item, returning the first
+ child that matches the given tag (or any tag if tag is NULL),
+ with the given attribute (if att is non NULL), that matches
+ match (if match is non NULL).
+*/
+fz_xml *fz_xml_find_next_dfs(fz_xml *item, const char *tag, const char *att, const char *match);
+
+/**
+ Perform a depth first search onwards from item, returning the first
+ child that matches the given tag (or any tag if tag is NULL),
+ with the given attribute (if att is non NULL), that matches
+ match (if match is non NULL). The search stops if it ever reaches
+ the top of the tree, or the declared 'top' item.
+*/
+fz_xml *fz_xml_find_next_dfs_top(fz_xml *item, const char *tag, const char *att, const char *match, fz_xml *top);
+
+/**
+ DOM-like functions for html in xml.
+*/
+
+/**
+ Return a borrowed reference for the 'body' element of
+ the given DOM.
+*/
+fz_xml *fz_dom_body(fz_context *ctx, fz_xml *dom);
+
+/**
+ Return a borrowed reference for the document (the top
+ level element) of the DOM.
+*/
+fz_xml *fz_dom_document_element(fz_context *ctx, fz_xml *dom);
+
+/**
+ Create an element of a given tag type for the given DOM.
+
+ The element is not linked into the DOM yet.
+*/
+fz_xml *fz_dom_create_element(fz_context *ctx, fz_xml *dom, const char *tag);
+
+/**
+ Create a text node for the given DOM.
+
+ The element is not linked into the DOM yet.
+*/
+fz_xml *fz_dom_create_text_node(fz_context *ctx, fz_xml *dom, const char *text);
+
+/**
+ Find the first element matching the requirements in a depth first traversal from elt.
+
+ The tagname must match tag, unless tag is NULL, when all tag names are considered to match.
+
+ If att is NULL, then all tags match.
+ Otherwise:
+ If match is NULL, then only nodes that have an att attribute match.
+ If match is non-NULL, then only nodes that have an att attribute that matches match match.
+
+ Returns NULL (if no match found), or a borrowed reference to the first matching element.
+*/
+fz_xml *fz_dom_find(fz_context *ctx, fz_xml *elt, const char *tag, const char *att, const char *match);
+
+/**
+ Find the next element matching the requirements.
+*/
+fz_xml *fz_dom_find_next(fz_context *ctx, fz_xml *elt, const char *tag, const char *att, const char *match);
+
+/**
+ Insert an element as the last child of a parent, unlinking the
+ child from its current position if required.
+*/
+void fz_dom_append_child(fz_context *ctx, fz_xml *parent, fz_xml *child);
+
+/**
+ Insert an element (new_elt), before another element (node),
+ unlinking the new_elt from its current position if required.
+*/
+void fz_dom_insert_before(fz_context *ctx, fz_xml *node, fz_xml *new_elt);
+
+/**
+ Insert an element (new_elt), after another element (node),
+ unlinking the new_elt from its current position if required.
+*/
+void fz_dom_insert_after(fz_context *ctx, fz_xml *node, fz_xml *new_elt);
+
+/**
+ Remove an element from the DOM. The element can be added back elsewhere
+ if required.
+
+ No reference counting changes for the element.
+*/
+void fz_dom_remove(fz_context *ctx, fz_xml *elt);
+
+/**
+ Clone an element (and its children).
+
+ A borrowed reference to the clone is returned. The clone is not
+ yet linked into the DOM.
+*/
+fz_xml *fz_dom_clone(fz_context *ctx, fz_xml *elt);
+
+/**
+ Return a borrowed reference to the first child of a node,
+ or NULL if there isn't one.
+*/
+fz_xml *fz_dom_first_child(fz_context *ctx, fz_xml *elt);
+
+/**
+ Return a borrowed reference to the parent of a node,
+ or NULL if there isn't one.
+*/
+fz_xml *fz_dom_parent(fz_context *ctx, fz_xml *elt);
+
+/**
+ Return a borrowed reference to the next sibling of a node,
+ or NULL if there isn't one.
+*/
+fz_xml *fz_dom_next(fz_context *ctx, fz_xml *elt);
+
+/**
+ Return a borrowed reference to the previous sibling of a node,
+ or NULL if there isn't one.
+*/
+fz_xml *fz_dom_previous(fz_context *ctx, fz_xml *elt);
+
+/**
+ Add an attribute to an element.
+
+ Ownership of att and value remain with the caller.
+*/
+void fz_dom_add_attribute(fz_context *ctx, fz_xml *elt, const char *att, const char *value);
+
+/**
+ Remove an attribute from an element.
+*/
+void fz_dom_remove_attribute(fz_context *ctx, fz_xml *elt, const char *att);
+
+/**
+ Retrieve the value of a given attribute from a given element.
+
+ Returns a borrowed pointer to the value or NULL if not found.
+*/
+const char *fz_dom_attribute(fz_context *ctx, fz_xml *elt, const char *att);
+
+/**
+ Enumerate through the attributes of an element.
+
+ Call with i=0,1,2,3... to enumerate attributes.
+
+ On return *att and the return value will be NULL if there are not
+ that many attributes to read. Otherwise, *att will be filled in
+ with a borrowed pointer to the attribute name, and the return
+ value will be a borrowed pointer to the value.
+*/
+const char *fz_dom_get_attribute(fz_context *ctx, fz_xml *elt, int i, const char **att);
+
#endif
diff --git a/include/mupdf/memento.h b/include/mupdf/memento.h
index 0776860..b2d01b9 100644
--- a/include/mupdf/memento.h
+++ b/include/mupdf/memento.h
@@ -1,14 +1,16 @@
-/* Copyright (C) 2009-2017 Artifex Software, Inc.
+/* Copyright (C) 2009-2022 Artifex Software, Inc.
All Rights Reserved.
This software is provided AS-IS with no warranty, either express or
implied.
- This software is distributed under license and may not be copied, modified
- or distributed except as expressly authorized under the terms of that
- license. Refer to licensing information at http://www.artifex.com
- or contact Artifex Software, Inc., 7 Mt. Lassen Drive - Suite A-134,
- San Rafael, CA 94903, U.S.A., +1(415)492-9861, for further information.
+ This software is distributed under license and may not be copied,
+ modified or distributed except as expressly authorized under the terms
+ of the license contained in the file COPYING in this distribution.
+
+ Refer to licensing information at http://www.artifex.com or contact
+ Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
+ CA 94129, USA, for further information.
*/
/* Memento: A library to aid debugging of memory leaks/heap corruption.
@@ -75,8 +77,7 @@
* An example:
* Suppose we have a gs invocation that crashes with memory corruption.
* * Build with -DMEMENTO.
- * * In your debugger put breakpoints on Memento_inited and
- * Memento_Breakpoint.
+ * * In your debugger put a breakpoint on Memento_breakpoint.
* * Run the program. It will stop in Memento_inited.
* * Execute Memento_setParanoia(1); (In VS use Ctrl-Alt-Q). (Note #1)
* * Continue execution.
@@ -92,9 +93,9 @@
* and 1458 - so if we rerun and stop the program at 1457, we can then
* step through, possibly with a data breakpoint at 0x172e710 and see
* when it occurs.
- * * So restart the program from the beginning. When we hit Memento_inited
- * execute Memento_breakAt(1457); (and maybe Memento_setParanoia(1), or
- * Memento_setParanoidAt(1457))
+ * * So restart the program from the beginning. When we stop after
+ * initialisation execute Memento_breakAt(1457); (and maybe
+ * Memento_setParanoia(1), or Memento_setParanoidAt(1457))
* * Continue execution until we hit Memento_breakpoint.
* * Now you can step through and watch the memory corruption happen.
*
@@ -140,30 +141,99 @@
* Memento has some experimental code in it to trap new/delete (and
* new[]/delete[] if required) calls.
*
- * In order for this to work, either:
+ * In all cases, Memento will provide a C API that new/delete
+ * operators can be built upon:
+ * void *Memento_cpp_new(size_t size);
+ * void Memento_cpp_delete(void *pointer);
+ * void *Memento_cpp_new_array(size_t size);
+ * void Memento_cpp_delete_array(void *pointer);
*
- * 1) Build memento.c with the c++ compiler.
+ * There are various ways that actual operator definitions can be
+ * provided:
+ *
+ * 1) If memento.c is built with the c++ compiler, then global new
+ * and delete operators will be built in to memento by default.
+ *
+ * 2) If memento.c is built as normal with the C compiler, then
+ * no such veneers will be built in. The caller must provide them
+ * themselves. This can be done either by:
+ *
+ * a) Copying the lines between:
+ * // C++ Operator Veneers - START
+ * and
+ * // C++ Operator Veneers - END
+ * from memento.c into a C++ file within their own project.
*
* or
*
- * 2) Build memento.c as normal with the C compiler, then from any
- * one of your .cpp files, do:
+ * b) Add the following lines to a C++ file in the project:
+ * #define MEMENTO_CPP_EXTRAS_ONLY
+ * #include "memento.c"
*
- * #define MEMENTO_CPP_EXTRAS_ONLY
- * #include "memento.c"
+ * 3) For those people that would like to be able to compile memento.c
+ * with a C compiler, and provide new/delete veneers globally
+ * within their own C++ code (so avoiding the need for memento.h to
+ * be included from every file), define MEMENTO_NO_CPLUSPLUS as you
+ * build, and Memento will not provide any veneers itself, instead
+ * relying on the library user to provide them.
*
- * In the case where MEMENTO is not defined, this will not do anything.
+ * For convenience the lines to implement such veneers can be found
+ * at the end of memento.c between:
+ * // C++ Operator Veneers - START
+ * and
+ * // C++ Operator Veneers - END
+ *
+ * Memento's interception of new/delete can be disabled at runtime
+ * by using Memento_setIgnoreNewDelete(1). Alternatively the
+ * MEMENTO_IGNORENEWDELETE environment variable can be set to 1 to
+ * achieve the same result.
*
* Both Windows and GCC provide separate new[] and delete[] operators
* for arrays. Apparently some systems do not. If this is the case for
* your system, define MEMENTO_CPP_NO_ARRAY_CONSTRUCTORS.
+ *
+ * "libbacktrace.so failed to load"
+ *
+ * In order to give nice backtraces on unix, Memento will try to use
+ * a libbacktrace dynamic library. If it can't find it, you'll see
+ * that warning, and your backtraces won't include file/line information.
+ *
+ * To fix this you'll need to build your own libbacktrace. Don't worry
+ * it's really easy:
+ * git clone git://github.com/ianlancetaylor/libbacktrace
+ * cd libbacktrace
+ * ./configure --enable-shared
+ * make
+ *
+ * This leaves the build .so as .libs/libbacktrace.so
+ *
+ * Memento will look for this on LD_LIBRARY_PATH, or in /opt/lib/,
+ * or in /lib/, or in /usr/lib/, or in /usr/local/lib/. I recommend
+ * using /opt/lib/ as this won't conflict with anything that you
+ * get via a package manager like apt.
+ *
+ * sudo mkdir /opt
+ * sudo mkdir /opt/lib
+ * sudo cp .libs/libbacktrace.so /opt/lib/
*/
+#ifdef __cplusplus
+
+// Avoids problems with strdup()'s throw() attribute on Linux.
+#include
+
+extern "C" {
+#endif
+
#ifndef MEMENTO_H
-#define MEMENTO_H
+/* Include all these first, so our definitions below do
+ * not conflict with them. */
+#include
+#include
+#include
-#include /* for size_t */
+#define MEMENTO_H
#ifndef MEMENTO_UNDERLYING_MALLOC
#define MEMENTO_UNDERLYING_MALLOC malloc
@@ -187,8 +257,6 @@
#define MEMENTO_ALLOCFILL 0xa8
#define MEMENTO_FREEFILL 0xa9
-#define MEMENTO_FREELIST_MAX 0x2000000
-
int Memento_checkBlock(void *);
int Memento_checkAllMemory(void);
int Memento_check(void);
@@ -205,18 +273,36 @@ int Memento_failAt(int);
int Memento_failThisEvent(void);
void Memento_listBlocks(void);
void Memento_listNewBlocks(void);
+void Memento_listPhasedBlocks(void);
size_t Memento_setMax(size_t);
void Memento_stats(void);
void *Memento_label(void *, const char *);
void Memento_tick(void);
+int Memento_setVerbose(int);
+
+/* Terminate backtraces if we see specified function name. E.g.
+'cfunction_call' will exclude Python interpreter functions when Python calls C
+code. Returns 0 on success, -1 on failure (out of memory). */
+int Memento_addBacktraceLimitFnname(const char *fnname);
+
+/* If is 0, we do not call Memento_fin() in an atexit() handler. */
+int Memento_setAtexitFin(int atexitfin);
+
+int Memento_setIgnoreNewDelete(int ignore);
void *Memento_malloc(size_t s);
void *Memento_realloc(void *, size_t s);
void Memento_free(void *);
void *Memento_calloc(size_t, size_t);
+char *Memento_strdup(const char*);
+#if !defined(MEMENTO_GS_HACKS) && !defined(MEMENTO_MUPDF_HACKS)
+int Memento_asprintf(char **ret, const char *format, ...);
+int Memento_vasprintf(char **ret, const char *format, va_list ap);
+#endif
void Memento_info(void *addr);
void Memento_listBlockInfo(void);
+void Memento_blockInfo(void *blk);
void *Memento_takeByteRef(void *blk);
void *Memento_dropByteRef(void *blk);
void *Memento_takeShortRef(void *blk);
@@ -236,23 +322,48 @@ int Memento_checkIntPointerOrNull(void *blk);
void Memento_startLeaking(void);
void Memento_stopLeaking(void);
+/* Returns number of allocation events so far. */
+int Memento_sequence(void);
+
+/* Returns non-zero if our process was forked by Memento squeeze. */
+int Memento_squeezing(void);
+
void Memento_fin(void);
+void Memento_bt(void);
+
+void *Memento_cpp_new(size_t size);
+void Memento_cpp_delete(void *pointer);
+void *Memento_cpp_new_array(size_t size);
+void Memento_cpp_delete_array(void *pointer);
+
+void Memento_showHash(unsigned int hash);
+
#ifdef MEMENTO
#ifndef COMPILING_MEMENTO_C
-#define malloc Memento_malloc
-#define free Memento_free
-#define realloc Memento_realloc
-#define calloc Memento_calloc
+#define malloc Memento_malloc
+#define free Memento_free
+#define realloc Memento_realloc
+#define calloc Memento_calloc
+#define strdup Memento_strdup
+#if !defined(MEMENTO_GS_HACKS) && !defined(MEMENTO_MUPDF_HACKS)
+#define asprintf Memento_asprintf
+#define vasprintf Memento_vasprintf
+#endif
#endif
#else
-#define Memento_malloc MEMENTO_UNDERLYING_MALLOC
-#define Memento_free MEMENTO_UNDERLYING_FREE
-#define Memento_realloc MEMENTO_UNDERLYING_REALLOC
-#define Memento_calloc MEMENTO_UNDERLYING_CALLOC
+#define Memento_malloc MEMENTO_UNDERLYING_MALLOC
+#define Memento_free MEMENTO_UNDERLYING_FREE
+#define Memento_realloc MEMENTO_UNDERLYING_REALLOC
+#define Memento_calloc MEMENTO_UNDERLYING_CALLOC
+#define Memento_strdup strdup
+#if !defined(MEMENTO_GS_HACKS) && !defined(MEMENTO_MUPDF_HACKS)
+#define Memento_asprintf asprintf
+#define Memento_vasprintf vasprintf
+#endif
#define Memento_checkBlock(A) 0
#define Memento_checkAllMemory() 0
@@ -269,11 +380,13 @@ void Memento_fin(void);
#define Memento_failThisEvent() 0
#define Memento_listBlocks() do {} while (0)
#define Memento_listNewBlocks() do {} while (0)
+#define Memento_listPhasedBlocks() do {} while (0)
#define Memento_setMax(A) 0
#define Memento_stats() do {} while (0)
#define Memento_label(A,B) (A)
#define Memento_info(A) do {} while (0)
#define Memento_listBlockInfo() do {} while (0)
+#define Memento_blockInfo(A) do {} while (0)
#define Memento_takeByteRef(A) (A)
#define Memento_dropByteRef(A) (A)
#define Memento_takeShortRef(A) (A)
@@ -288,12 +401,23 @@ void Memento_fin(void);
#define Memento_checkBytePointerOrNull(A) 0
#define Memento_checkShortPointerOrNull(A) 0
#define Memento_checkIntPointerOrNull(A) 0
+#define Memento_setIgnoreNewDelete(v) 0
#define Memento_tick() do {} while (0)
#define Memento_startLeaking() do {} while (0)
#define Memento_stopLeaking() do {} while (0)
#define Memento_fin() do {} while (0)
+#define Memento_bt() do {} while (0)
+#define Memento_sequence() (0)
+#define Memento_squeezing() (0)
+#define Memento_setVerbose(A) (A)
+#define Memento_addBacktraceLimitFnname(A) (0)
+#define Memento_setAtexitFin(atexitfin) (0)
#endif /* MEMENTO */
+#ifdef __cplusplus
+}
+#endif
+
#endif /* MEMENTO_H */
diff --git a/include/mupdf/vendor.go b/include/mupdf/vendor.go
new file mode 100644
index 0000000..4a5fffb
--- /dev/null
+++ b/include/mupdf/vendor.go
@@ -0,0 +1,3 @@
+//go:build required
+
+package vendor
diff --git a/include/vendor.go b/include/vendor.go
new file mode 100644
index 0000000..4a5fffb
--- /dev/null
+++ b/include/vendor.go
@@ -0,0 +1,3 @@
+//go:build required
+
+package vendor
diff --git a/libs/libmupdf_android_arm.a b/libs/libmupdf_android_arm.a
deleted file mode 100644
index ae1fc31..0000000
Binary files a/libs/libmupdf_android_arm.a and /dev/null differ
diff --git a/libs/libmupdf_android_arm64.a b/libs/libmupdf_android_arm64.a
index 834d0ff..634ba8a 100644
Binary files a/libs/libmupdf_android_arm64.a and b/libs/libmupdf_android_arm64.a differ
diff --git a/libs/libmupdf_darwin_amd64.a b/libs/libmupdf_darwin_amd64.a
index 4cc736a..de8b642 100644
Binary files a/libs/libmupdf_darwin_amd64.a and b/libs/libmupdf_darwin_amd64.a differ
diff --git a/libs/libmupdf_darwin_arm64.a b/libs/libmupdf_darwin_arm64.a
new file mode 100644
index 0000000..8dfab9b
Binary files /dev/null and b/libs/libmupdf_darwin_arm64.a differ
diff --git a/libs/libmupdf_linux_386.a b/libs/libmupdf_linux_386.a
deleted file mode 100644
index 425e438..0000000
Binary files a/libs/libmupdf_linux_386.a and /dev/null differ
diff --git a/libs/libmupdf_linux_amd64.a b/libs/libmupdf_linux_amd64.a
index 9272ac0..1a639a5 100644
Binary files a/libs/libmupdf_linux_amd64.a and b/libs/libmupdf_linux_amd64.a differ
diff --git a/libs/libmupdf_linux_amd64_musl.a b/libs/libmupdf_linux_amd64_musl.a
new file mode 100644
index 0000000..7aa9373
Binary files /dev/null and b/libs/libmupdf_linux_amd64_musl.a differ
diff --git a/libs/libmupdf_linux_amd64_nopie.a b/libs/libmupdf_linux_amd64_nopie.a
deleted file mode 100644
index 56daddd..0000000
Binary files a/libs/libmupdf_linux_amd64_nopie.a and /dev/null differ
diff --git a/libs/libmupdf_linux_arm.a b/libs/libmupdf_linux_arm.a
deleted file mode 100644
index 0bb614f..0000000
Binary files a/libs/libmupdf_linux_arm.a and /dev/null differ
diff --git a/libs/libmupdf_linux_arm64.a b/libs/libmupdf_linux_arm64.a
index 8984667..c542191 100644
Binary files a/libs/libmupdf_linux_arm64.a and b/libs/libmupdf_linux_arm64.a differ
diff --git a/libs/libmupdf_linux_arm64_musl.a b/libs/libmupdf_linux_arm64_musl.a
new file mode 100644
index 0000000..5180435
Binary files /dev/null and b/libs/libmupdf_linux_arm64_musl.a differ
diff --git a/libs/libmupdf_windows_386.a b/libs/libmupdf_windows_386.a
deleted file mode 100644
index 5ed6a5e..0000000
Binary files a/libs/libmupdf_windows_386.a and /dev/null differ
diff --git a/libs/libmupdf_windows_amd64.a b/libs/libmupdf_windows_amd64.a
index c495944..2070f9f 100644
Binary files a/libs/libmupdf_windows_amd64.a and b/libs/libmupdf_windows_amd64.a differ
diff --git a/libs/libmupdf_windows_arm64.a b/libs/libmupdf_windows_arm64.a
new file mode 100644
index 0000000..4244e07
Binary files /dev/null and b/libs/libmupdf_windows_arm64.a differ
diff --git a/libs/libmupdfthird_android_arm.a b/libs/libmupdfthird_android_arm.a
deleted file mode 100644
index b9df955..0000000
Binary files a/libs/libmupdfthird_android_arm.a and /dev/null differ
diff --git a/libs/libmupdfthird_android_arm64.a b/libs/libmupdfthird_android_arm64.a
index 408bc12..b3e58e6 100644
Binary files a/libs/libmupdfthird_android_arm64.a and b/libs/libmupdfthird_android_arm64.a differ
diff --git a/libs/libmupdfthird_darwin_amd64.a b/libs/libmupdfthird_darwin_amd64.a
index 563d7b4..6d75616 100644
Binary files a/libs/libmupdfthird_darwin_amd64.a and b/libs/libmupdfthird_darwin_amd64.a differ
diff --git a/libs/libmupdfthird_darwin_arm64.a b/libs/libmupdfthird_darwin_arm64.a
new file mode 100644
index 0000000..571215a
Binary files /dev/null and b/libs/libmupdfthird_darwin_arm64.a differ
diff --git a/libs/libmupdfthird_linux_386.a b/libs/libmupdfthird_linux_386.a
deleted file mode 100644
index 9f064d4..0000000
Binary files a/libs/libmupdfthird_linux_386.a and /dev/null differ
diff --git a/libs/libmupdfthird_linux_amd64.a b/libs/libmupdfthird_linux_amd64.a
index de3f6f6..e4d29dc 100644
Binary files a/libs/libmupdfthird_linux_amd64.a and b/libs/libmupdfthird_linux_amd64.a differ
diff --git a/libs/libmupdfthird_linux_amd64_musl.a b/libs/libmupdfthird_linux_amd64_musl.a
new file mode 100644
index 0000000..95d0244
Binary files /dev/null and b/libs/libmupdfthird_linux_amd64_musl.a differ
diff --git a/libs/libmupdfthird_linux_amd64_nopie.a b/libs/libmupdfthird_linux_amd64_nopie.a
deleted file mode 100644
index ecc8773..0000000
Binary files a/libs/libmupdfthird_linux_amd64_nopie.a and /dev/null differ
diff --git a/libs/libmupdfthird_linux_arm.a b/libs/libmupdfthird_linux_arm.a
deleted file mode 100644
index 6bea90b..0000000
Binary files a/libs/libmupdfthird_linux_arm.a and /dev/null differ
diff --git a/libs/libmupdfthird_linux_arm64.a b/libs/libmupdfthird_linux_arm64.a
index 2efd5ab..9e5ea94 100644
Binary files a/libs/libmupdfthird_linux_arm64.a and b/libs/libmupdfthird_linux_arm64.a differ
diff --git a/libs/libmupdfthird_linux_arm64_musl.a b/libs/libmupdfthird_linux_arm64_musl.a
new file mode 100644
index 0000000..5f0cf17
Binary files /dev/null and b/libs/libmupdfthird_linux_arm64_musl.a differ
diff --git a/libs/libmupdfthird_windows_386.a b/libs/libmupdfthird_windows_386.a
deleted file mode 100644
index be8d3c7..0000000
Binary files a/libs/libmupdfthird_windows_386.a and /dev/null differ
diff --git a/libs/libmupdfthird_windows_amd64.a b/libs/libmupdfthird_windows_amd64.a
index 96cd3d6..cf33623 100644
Binary files a/libs/libmupdfthird_windows_amd64.a and b/libs/libmupdfthird_windows_amd64.a differ
diff --git a/libs/libmupdfthird_windows_arm64.a b/libs/libmupdfthird_windows_arm64.a
new file mode 100644
index 0000000..52a8cb7
Binary files /dev/null and b/libs/libmupdfthird_windows_arm64.a differ
diff --git a/libs/vendor.go b/libs/vendor.go
new file mode 100644
index 0000000..4a5fffb
--- /dev/null
+++ b/libs/vendor.go
@@ -0,0 +1,3 @@
+//go:build required
+
+package vendor
diff --git a/purego_darwin.go b/purego_darwin.go
new file mode 100644
index 0000000..419cd86
--- /dev/null
+++ b/purego_darwin.go
@@ -0,0 +1,33 @@
+//go:build (!cgo || nocgo) && darwin
+
+package fitz
+
+import (
+ "fmt"
+
+ "github.com/ebitengine/purego"
+)
+
+const (
+ libname = "libmupdf.dylib"
+)
+
+// loadLibrary loads the so and panics on error.
+func loadLibrary() uintptr {
+ handle, err := purego.Dlopen(libname, purego.RTLD_NOW|purego.RTLD_GLOBAL)
+ if err != nil {
+ panic(fmt.Errorf("cannot load library: %w", err))
+ }
+
+ return handle
+}
+
+// procAddress returns the address of symbol name.
+func procAddress(handle uintptr, procName string) uintptr {
+ addr, err := purego.Dlsym(handle, procName)
+ if err != nil {
+ panic(fmt.Errorf("cannot get proc address for %s: %w", procName, err))
+ }
+
+ return addr
+}
diff --git a/purego_linux.go b/purego_linux.go
new file mode 100644
index 0000000..d69686f
--- /dev/null
+++ b/purego_linux.go
@@ -0,0 +1,32 @@
+//go:build (!cgo || nocgo) && unix && !darwin
+
+package fitz
+
+import (
+ "fmt"
+ "github.com/ebitengine/purego"
+)
+
+const (
+ libname = "libmupdf.so"
+)
+
+// loadLibrary loads the so and panics on error.
+func loadLibrary() uintptr {
+ handle, err := purego.Dlopen(libname, purego.RTLD_NOW|purego.RTLD_GLOBAL)
+ if err != nil {
+ panic(fmt.Errorf("cannot load library: %w", err))
+ }
+
+ return handle
+}
+
+// procAddress returns the address of symbol name.
+func procAddress(handle uintptr, procName string) uintptr {
+ addr, err := purego.Dlsym(handle, procName)
+ if err != nil {
+ panic(fmt.Errorf("cannot get proc address for %s: %w", procName, err))
+ }
+
+ return addr
+}
diff --git a/purego_windows.go b/purego_windows.go
new file mode 100644
index 0000000..7585968
--- /dev/null
+++ b/purego_windows.go
@@ -0,0 +1,34 @@
+//go:build (!cgo || nocgo) && windows
+
+package fitz
+
+import (
+ "fmt"
+ "syscall"
+
+ "golang.org/x/sys/windows"
+)
+
+const (
+ libname = "libmupdf.dll"
+)
+
+// loadLibrary loads the dll and panics on error.
+func loadLibrary() uintptr {
+ handle, err := syscall.LoadLibrary(libname)
+ if err != nil {
+ panic(fmt.Errorf("cannot load library %s: %w", libname, err))
+ }
+
+ return uintptr(handle)
+}
+
+// procAddress returns the address of symbol name.
+func procAddress(handle uintptr, procName string) uintptr {
+ addr, err := windows.GetProcAddress(windows.Handle(handle), procName)
+ if err != nil {
+ panic(fmt.Errorf("cannot get proc address for %s: %w", procName, err))
+ }
+
+ return addr
+}
diff --git a/testdata/test.bmp b/testdata/test.bmp
new file mode 100644
index 0000000..02dc6d6
Binary files /dev/null and b/testdata/test.bmp differ
diff --git a/testdata/test.cbz b/testdata/test.cbz
new file mode 100644
index 0000000..168f6f5
Binary files /dev/null and b/testdata/test.cbz differ
diff --git a/testdata/test.docx b/testdata/test.docx
new file mode 100644
index 0000000..1bac61d
Binary files /dev/null and b/testdata/test.docx differ
diff --git a/testdata/test.epub b/testdata/test.epub
new file mode 100644
index 0000000..9ccf430
Binary files /dev/null and b/testdata/test.epub differ
diff --git a/testdata/test.fb2 b/testdata/test.fb2
new file mode 100644
index 0000000..f1292bf
--- /dev/null
+++ b/testdata/test.fb2
@@ -0,0 +1,11 @@
+
+
+
+
+ Hello World
+
+
+
+
+
+
diff --git a/testdata/test.gif b/testdata/test.gif
new file mode 100644
index 0000000..d5e923d
Binary files /dev/null and b/testdata/test.gif differ
diff --git a/testdata/test.jb2 b/testdata/test.jb2
new file mode 100644
index 0000000..ae48d91
Binary files /dev/null and b/testdata/test.jb2 differ
diff --git a/testdata/test.jp2 b/testdata/test.jp2
new file mode 100644
index 0000000..63714f9
Binary files /dev/null and b/testdata/test.jp2 differ
diff --git a/testdata/test.jpg b/testdata/test.jpg
new file mode 100644
index 0000000..3219d31
Binary files /dev/null and b/testdata/test.jpg differ
diff --git a/testdata/test.jxr b/testdata/test.jxr
new file mode 100644
index 0000000..3e4fc35
Binary files /dev/null and b/testdata/test.jxr differ
diff --git a/testdata/test.mobi b/testdata/test.mobi
new file mode 100644
index 0000000..2ac34e9
Binary files /dev/null and b/testdata/test.mobi differ
diff --git a/testdata/test.pam b/testdata/test.pam
new file mode 100644
index 0000000..1653f1f
--- /dev/null
+++ b/testdata/test.pam
@@ -0,0 +1,10 @@
+P7
+WIDTH 3
+HEIGHT 3
+DEPTH 1
+MAXVAL 1
+TUPLETYPE blackandwhite
+ENDHDR
+1 0 1
+0 1 0
+1 0 1
\ No newline at end of file
diff --git a/testdata/test.pbm b/testdata/test.pbm
new file mode 100644
index 0000000..fbcb39d
--- /dev/null
+++ b/testdata/test.pbm
@@ -0,0 +1,5 @@
+P1
+3 3
+1 0 1
+0 1 0
+1 0 1
\ No newline at end of file
diff --git a/testdata/test.pfm b/testdata/test.pfm
new file mode 100644
index 0000000..4b51ce6
Binary files /dev/null and b/testdata/test.pfm differ
diff --git a/testdata/test.pgm b/testdata/test.pgm
new file mode 100644
index 0000000..d2728b9
--- /dev/null
+++ b/testdata/test.pgm
@@ -0,0 +1,6 @@
+P2
+3 3
+2
+0 1 2
+0 1 2
+0 1 2
\ No newline at end of file
diff --git a/testdata/test.png b/testdata/test.png
new file mode 100644
index 0000000..a7045ed
Binary files /dev/null and b/testdata/test.png differ
diff --git a/testdata/test.ppm b/testdata/test.ppm
new file mode 100644
index 0000000..63b65ad
--- /dev/null
+++ b/testdata/test.ppm
@@ -0,0 +1,12 @@
+P3
+3 3
+255
+255 0 0
+ 0 255 0
+ 0 0 255
+255 255 0
+ 0 255 0
+255 0 255
+255 255 255
+ 0 255 0
+ 0 255 255
\ No newline at end of file
diff --git a/testdata/test.pptx b/testdata/test.pptx
new file mode 100644
index 0000000..ea72794
Binary files /dev/null and b/testdata/test.pptx differ
diff --git a/testdata/test.psd b/testdata/test.psd
new file mode 100644
index 0000000..908f0e0
Binary files /dev/null and b/testdata/test.psd differ
diff --git a/testdata/test.svg b/testdata/test.svg
new file mode 100644
index 0000000..0555bfe
--- /dev/null
+++ b/testdata/test.svg
@@ -0,0 +1,6 @@
+
+
+
+
\ No newline at end of file
diff --git a/testdata/test.tif b/testdata/test.tif
new file mode 100644
index 0000000..c131b8f
Binary files /dev/null and b/testdata/test.tif differ
diff --git a/testdata/test.xlsx b/testdata/test.xlsx
new file mode 100644
index 0000000..04ac8ab
Binary files /dev/null and b/testdata/test.xlsx differ
diff --git a/testdata/test.xps b/testdata/test.xps
new file mode 100644
index 0000000..d82f61e
Binary files /dev/null and b/testdata/test.xps differ