init commit

2026-02-10 14:45:18 +08:00
parent a530a79566
commit 5ce88674da
142 changed files with 12394 additions and 4280 deletions
--- a/include/mupdf/fitz/string-util.h
+++ b/include/mupdf/fitz/string-util.h
@@ -1,33 +1,66 @@
+// Copyright (C) 2004-2022 Artifex Software, Inc.
+//
+// This file is part of MuPDF.
+//
+// MuPDF is free software: you can redistribute it and/or modify it under the
+// terms of the GNU Affero General Public License as published by the Free
+// Software Foundation, either version 3 of the License, or (at your option)
+// any later version.
+//
+// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
+// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
+// details.
+//
+// You should have received a copy of the GNU Affero General Public License
+// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
+//
+// Alternative licensing terms are available from the licensor.
+// For commercial licensing, see <https://www.artifex.com/> or contact
+// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
+// CA 94129, USA, for further information.
+
 #ifndef MUPDF_FITZ_STRING_H
 #define MUPDF_FITZ_STRING_H

 #include "mupdf/fitz/system.h"
+#include "mupdf/fitz/context.h"

-/* The Unicode character used to incoming character whose value is unknown or unrepresentable. */
+/* The Unicode character used to incoming character whose value is
+ * unknown or unrepresentable. */
 #define FZ_REPLACEMENT_CHARACTER 0xFFFD

-/*
+/**
 	Safe string functions
 */

-/*
-	fz_strsep: Given a pointer to a C string (or a pointer to NULL) break
-	it at the first occurrence of a delimiter char (from a given set).
+/**
+	Return strlen(s), if that is less than maxlen, or maxlen if
+	there is no null byte ('\0') among the first maxlen bytes.
+*/
+size_t fz_strnlen(const char *s, size_t maxlen);

-	stringp: Pointer to a C string pointer (or NULL). Updated on exit to
-	point to the first char of the string after the delimiter that was
-	found. The string pointed to by stringp will be corrupted by this
-	call (as the found delimiter will be overwritten by 0).
+/**
+	Given a pointer to a C string (or a pointer to NULL) break
+	it at the first occurrence of a delimiter char (from a given
+	set).
+
+	stringp: Pointer to a C string pointer (or NULL). Updated on
+	exit to point to the first char of the string after the
+	delimiter that was found. The string pointed to by stringp will
+	be corrupted by this call (as the found delimiter will be
+	overwritten by 0).

 	delim: A C string of acceptable delimiter characters.

-	Returns a pointer to a C string containing the chars of stringp up
-	to the first delimiter char (or the end of the string), or NULL.
+	Returns a pointer to a C string containing the chars of stringp
+	up to the first delimiter char (or the end of the string), or
+	NULL.
 */
 char *fz_strsep(char **stringp, const char *delim);

-/*
-	fz_strlcpy: Copy at most n-1 chars of a string into a destination
+/**
+	Copy at most n-1 chars of a string into a destination
 	buffer with null termination, returning the real length of the
 	initial string (excluding terminator).

@@ -41,8 +74,8 @@ char *fz_strsep(char **stringp, const char *delim);
 */
 size_t fz_strlcpy(char *dst, const char *src, size_t n);

-/*
-	fz_strlcat: Concatenate 2 strings, with a maximum length.
+/**
+	Concatenate 2 strings, with a maximum length.

 	dst: pointer to first string in a buffer of n bytes.

@@ -50,50 +83,108 @@ size_t fz_strlcpy(char *dst, const char *src, size_t n);

 	n: Size (in bytes) of buffer that dst is in.

-	Returns the real length that a concatenated dst + src would have been
-	(not including terminator).
+	Returns the real length that a concatenated dst + src would have
+	been (not including terminator).
 */
 size_t fz_strlcat(char *dst, const char *src, size_t n);

-/*
-	fz_dirname: extract the directory component from a path.
+/**
+	Find the start of the first occurrence of the substring needle in haystack.
+*/
+void *fz_memmem(const void *haystack, size_t haystacklen, const void *needle, size_t needlelen);
+
+/**
+	extract the directory component from a path.
 */
 void fz_dirname(char *dir, const char *path, size_t dirsize);

-/*
-	fz_urldecode: decode url escapes.
+/**
+	Find the filename component in a path.
+*/
+const char *fz_basename(const char *path);
+
+/**
+	Like fz_decode_uri_component but in-place.
 */
 char *fz_urldecode(char *url);

-/*
-	fz_format_output_path: create output file name using a template.
-		If the path contains %[0-9]*d, the first such pattern will be replaced
-		with the page number. If the template does not contain such a pattern, the page
-		number will be inserted before the file suffix. If the template does not have
-		a file suffix, the page number will be added to the end.
+/**
+ * Return a new string representing the unencoded version of the given URI.
+ * This decodes all escape sequences except those that would result in a reserved
+ * character that are part of the URI syntax (; / ? : @ & = + $ , #).
+ */
+char *fz_decode_uri(fz_context *ctx, const char *s);
+
+/**
+ * Return a new string representing the unencoded version of the given URI component.
+ * This decodes all escape sequences!
+ */
+char *fz_decode_uri_component(fz_context *ctx, const char *s);
+
+/**
+ * Return a new string representing the provided string encoded as a URI.
+ */
+char *fz_encode_uri(fz_context *ctx, const char *s);
+
+/**
+ * Return a new string representing the provided string encoded as an URI component.
+ * This also encodes the special reserved characters (; / ? : @ & = + $ , #).
+ */
+char *fz_encode_uri_component(fz_context *ctx, const char *s);
+
+/**
+ * Return a new string representing the provided string encoded as an URI path name.
+ * This also encodes the special reserved characters except /.
+ */
+char *fz_encode_uri_pathname(fz_context *ctx, const char *s);
+
+/**
+	create output file name using a template.
+
+	If the path contains %[0-9]*d, the first such pattern will be
+	replaced with the page number. If the template does not contain
+	such a pattern, the page number will be inserted before the
+	filename extension. If the template does not have a filename
+	extension, the page number will be added to the end.
 */
 void fz_format_output_path(fz_context *ctx, char *path, size_t size, const char *fmt, int page);

-/*
-	fz_cleanname: rewrite path to the shortest string that names the same path.
+/**
+	rewrite path to the shortest string that names the same path.

-	Eliminates multiple and trailing slashes, interprets "." and "..".
-	Overwrites the string in place.
+	Eliminates multiple and trailing slashes, interprets "." and
+	"..". Overwrites the string in place.
 */
 char *fz_cleanname(char *name);

-/*
+/**
+	rewrite path to the shortest string that names the same path.
+
+	Eliminates multiple and trailing slashes, interprets "." and
+	"..". Allocates a new string that the caller must free.
+*/
+char *fz_cleanname_strdup(fz_context *ctx, const char *name);
+
+/**
+	Resolve a path to an absolute file name.
+	The resolved path buffer must be of at least PATH_MAX size.
+*/
+char *fz_realpath(const char *path, char *resolved_path);
+
+/**
 	Case insensitive (ASCII only) string comparison.
 */
 int fz_strcasecmp(const char *a, const char *b);
+int fz_strncasecmp(const char *a, const char *b, size_t n);

-/*
-	FZ_UTFMAX: Maximum number of bytes in a decoded rune (maximum length returned by fz_chartorune).
+/**
+	FZ_UTFMAX: Maximum number of bytes in a decoded rune (maximum
+	length returned by fz_chartorune).
 */
 enum { FZ_UTFMAX = 4 };

-/*
-	fz_chartorune: UTF8 decode a single rune from a sequence of chars.
+/**
+	UTF8 decode a single rune from a sequence of chars.

 	rune: Pointer to an int to assign the decoded 'rune' to.

@@ -103,8 +194,8 @@ enum { FZ_UTFMAX = 4 };
 */
 int fz_chartorune(int *rune, const char *str);

-/*
-	fz_runetochar: UTF8 encode a rune to a sequence of chars.
+/**
+	UTF8 encode a rune to a sequence of chars.

 	str: Pointer to a place to put the UTF8 encoded character.

@@ -114,17 +205,42 @@ int fz_chartorune(int *rune, const char *str);
 */
 int fz_runetochar(char *str, int rune);

-/*
-	fz_runelen: Count how many chars are required to represent a rune.
+/**
+	Count how many chars are required to represent a rune.

 	rune: The rune to encode.

-	Returns the number of bytes required to represent this run in UTF8.
+	Returns the number of bytes required to represent this run in
+	UTF8.
 */
 int fz_runelen(int rune);

-/*
-	fz_utflen: Count how many runes the UTF-8 encoded string
+/**
+	Compute the index of a rune in a string.
+
+	str: Pointer to beginning of a string.
+
+	p: Pointer to a char in str.
+
+	Returns the index of the rune pointed to by p in str.
+*/
+int fz_runeidx(const char *str, const char *p);
+
+/**
+	Obtain a pointer to the char representing the rune
+	at a given index.
+
+	str: Pointer to beginning of a string.
+
+	idx: Index of a rune to return a char pointer to.
+
+	Returns a pointer to the char where the desired rune starts,
+	or NULL if the string ends before the index is reached.
+*/
+const char *fz_runeptr(const char *str, int idx);
+
+/**
+	Count how many runes the UTF-8 encoded string
 	consists of.

 	s: The UTF-8 encoded, NUL-terminated text string.
@@ -134,33 +250,37 @@ int fz_runelen(int rune);
 int fz_utflen(const char *s);

 /*
-	fz_strtof: Locale-independent decimal to binary
-	conversion. On overflow return (-)INFINITY and set errno to ERANGE. On
-	underflow return 0 and set errno to ERANGE. Special inputs (case
-	insensitive): "NAN", "INF" or "INFINITY".
+	Convert a wchar string into a new heap allocated utf8 one.
+*/
+char *fz_utf8_from_wchar(fz_context *ctx, const wchar_t *s);
+
+/*
+	Convert a utf8 string into a new heap allocated wchar one.
+*/
+wchar_t *fz_wchar_from_utf8(fz_context *ctx, const char *path);
+
+
+/**
+	Locale-independent decimal to binary conversion. On overflow
+	return (-)INFINITY and set errno to ERANGE. On underflow return
+	0 and set errno to ERANGE. Special inputs (case insensitive):
+	"NAN", "INF" or "INFINITY".
 */
 float fz_strtof(const char *s, char **es);

-/*
-	fz_strtof_no_exp: Like fz_strtof, but does not recognize exponent
-	format. So fz_strtof_no_exp("1.5e20", &tail) will return 1.5 and tail
-	will point to "e20".
-*/
-
-float fz_strtof_no_exp(const char *string, char **tailptr);
-/*
-	fz_grisu: Compute decimal integer m, exp such that:
-		f = m * 10^exp
-		m is as short as possible without losing exactness
-	Assumes special cases (0, NaN, +Inf, -Inf) have been handled.
-*/
 int fz_grisu(float f, char *s, int *exp);

-/*
+/**
 	Check and parse string into page ranges:
-		( ','? ([0-9]+|'N') ( '-' ([0-9]+|N) )? )+
+		/,?(-?\d+|N)(-(-?\d+|N))?/
 */
 int fz_is_page_range(fz_context *ctx, const char *s);
 const char *fz_parse_page_range(fz_context *ctx, const char *s, int *a, int *b, int n);

+/**
+	Unicode aware tolower and toupper functions.
+*/
+int fz_tolower(int c);
+int fz_toupper(int c);
+
 #endif