init commit
Some checks failed
Test / test (1.22.x, macos-latest) (push) Has been cancelled
Test / test (1.22.x, ubuntu-latest) (push) Has been cancelled

This commit is contained in:
landaiqing
2026-02-10 14:45:18 +08:00
parent a530a79566
commit 5ce88674da
142 changed files with 12394 additions and 4280 deletions

View File

@@ -1,33 +1,66 @@
// Copyright (C) 2004-2022 Artifex Software, Inc.
//
// This file is part of MuPDF.
//
// MuPDF is free software: you can redistribute it and/or modify it under the
// terms of the GNU Affero General Public License as published by the Free
// Software Foundation, either version 3 of the License, or (at your option)
// any later version.
//
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
// details.
//
// You should have received a copy of the GNU Affero General Public License
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
//
// Alternative licensing terms are available from the licensor.
// For commercial licensing, see <https://www.artifex.com/> or contact
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
// CA 94129, USA, for further information.
#ifndef MUPDF_FITZ_STRING_H
#define MUPDF_FITZ_STRING_H
#include "mupdf/fitz/system.h"
#include "mupdf/fitz/context.h"
/* The Unicode character used to incoming character whose value is unknown or unrepresentable. */
/* The Unicode character used to incoming character whose value is
* unknown or unrepresentable. */
#define FZ_REPLACEMENT_CHARACTER 0xFFFD
/*
/**
Safe string functions
*/
/*
fz_strsep: Given a pointer to a C string (or a pointer to NULL) break
it at the first occurrence of a delimiter char (from a given set).
/**
Return strlen(s), if that is less than maxlen, or maxlen if
there is no null byte ('\0') among the first maxlen bytes.
*/
size_t fz_strnlen(const char *s, size_t maxlen);
stringp: Pointer to a C string pointer (or NULL). Updated on exit to
point to the first char of the string after the delimiter that was
found. The string pointed to by stringp will be corrupted by this
call (as the found delimiter will be overwritten by 0).
/**
Given a pointer to a C string (or a pointer to NULL) break
it at the first occurrence of a delimiter char (from a given
set).
stringp: Pointer to a C string pointer (or NULL). Updated on
exit to point to the first char of the string after the
delimiter that was found. The string pointed to by stringp will
be corrupted by this call (as the found delimiter will be
overwritten by 0).
delim: A C string of acceptable delimiter characters.
Returns a pointer to a C string containing the chars of stringp up
to the first delimiter char (or the end of the string), or NULL.
Returns a pointer to a C string containing the chars of stringp
up to the first delimiter char (or the end of the string), or
NULL.
*/
char *fz_strsep(char **stringp, const char *delim);
/*
fz_strlcpy: Copy at most n-1 chars of a string into a destination
/**
Copy at most n-1 chars of a string into a destination
buffer with null termination, returning the real length of the
initial string (excluding terminator).
@@ -41,8 +74,8 @@ char *fz_strsep(char **stringp, const char *delim);
*/
size_t fz_strlcpy(char *dst, const char *src, size_t n);
/*
fz_strlcat: Concatenate 2 strings, with a maximum length.
/**
Concatenate 2 strings, with a maximum length.
dst: pointer to first string in a buffer of n bytes.
@@ -50,50 +83,108 @@ size_t fz_strlcpy(char *dst, const char *src, size_t n);
n: Size (in bytes) of buffer that dst is in.
Returns the real length that a concatenated dst + src would have been
(not including terminator).
Returns the real length that a concatenated dst + src would have
been (not including terminator).
*/
size_t fz_strlcat(char *dst, const char *src, size_t n);
/*
fz_dirname: extract the directory component from a path.
/**
Find the start of the first occurrence of the substring needle in haystack.
*/
void *fz_memmem(const void *haystack, size_t haystacklen, const void *needle, size_t needlelen);
/**
extract the directory component from a path.
*/
void fz_dirname(char *dir, const char *path, size_t dirsize);
/*
fz_urldecode: decode url escapes.
/**
Find the filename component in a path.
*/
const char *fz_basename(const char *path);
/**
Like fz_decode_uri_component but in-place.
*/
char *fz_urldecode(char *url);
/*
fz_format_output_path: create output file name using a template.
If the path contains %[0-9]*d, the first such pattern will be replaced
with the page number. If the template does not contain such a pattern, the page
number will be inserted before the file suffix. If the template does not have
a file suffix, the page number will be added to the end.
/**
* Return a new string representing the unencoded version of the given URI.
* This decodes all escape sequences except those that would result in a reserved
* character that are part of the URI syntax (; / ? : @ & = + $ , #).
*/
char *fz_decode_uri(fz_context *ctx, const char *s);
/**
* Return a new string representing the unencoded version of the given URI component.
* This decodes all escape sequences!
*/
char *fz_decode_uri_component(fz_context *ctx, const char *s);
/**
* Return a new string representing the provided string encoded as a URI.
*/
char *fz_encode_uri(fz_context *ctx, const char *s);
/**
* Return a new string representing the provided string encoded as an URI component.
* This also encodes the special reserved characters (; / ? : @ & = + $ , #).
*/
char *fz_encode_uri_component(fz_context *ctx, const char *s);
/**
* Return a new string representing the provided string encoded as an URI path name.
* This also encodes the special reserved characters except /.
*/
char *fz_encode_uri_pathname(fz_context *ctx, const char *s);
/**
create output file name using a template.
If the path contains %[0-9]*d, the first such pattern will be
replaced with the page number. If the template does not contain
such a pattern, the page number will be inserted before the
filename extension. If the template does not have a filename
extension, the page number will be added to the end.
*/
void fz_format_output_path(fz_context *ctx, char *path, size_t size, const char *fmt, int page);
/*
fz_cleanname: rewrite path to the shortest string that names the same path.
/**
rewrite path to the shortest string that names the same path.
Eliminates multiple and trailing slashes, interprets "." and "..".
Overwrites the string in place.
Eliminates multiple and trailing slashes, interprets "." and
"..". Overwrites the string in place.
*/
char *fz_cleanname(char *name);
/*
/**
rewrite path to the shortest string that names the same path.
Eliminates multiple and trailing slashes, interprets "." and
"..". Allocates a new string that the caller must free.
*/
char *fz_cleanname_strdup(fz_context *ctx, const char *name);
/**
Resolve a path to an absolute file name.
The resolved path buffer must be of at least PATH_MAX size.
*/
char *fz_realpath(const char *path, char *resolved_path);
/**
Case insensitive (ASCII only) string comparison.
*/
int fz_strcasecmp(const char *a, const char *b);
int fz_strncasecmp(const char *a, const char *b, size_t n);
/*
FZ_UTFMAX: Maximum number of bytes in a decoded rune (maximum length returned by fz_chartorune).
/**
FZ_UTFMAX: Maximum number of bytes in a decoded rune (maximum
length returned by fz_chartorune).
*/
enum { FZ_UTFMAX = 4 };
/*
fz_chartorune: UTF8 decode a single rune from a sequence of chars.
/**
UTF8 decode a single rune from a sequence of chars.
rune: Pointer to an int to assign the decoded 'rune' to.
@@ -103,8 +194,8 @@ enum { FZ_UTFMAX = 4 };
*/
int fz_chartorune(int *rune, const char *str);
/*
fz_runetochar: UTF8 encode a rune to a sequence of chars.
/**
UTF8 encode a rune to a sequence of chars.
str: Pointer to a place to put the UTF8 encoded character.
@@ -114,17 +205,42 @@ int fz_chartorune(int *rune, const char *str);
*/
int fz_runetochar(char *str, int rune);
/*
fz_runelen: Count how many chars are required to represent a rune.
/**
Count how many chars are required to represent a rune.
rune: The rune to encode.
Returns the number of bytes required to represent this run in UTF8.
Returns the number of bytes required to represent this run in
UTF8.
*/
int fz_runelen(int rune);
/*
fz_utflen: Count how many runes the UTF-8 encoded string
/**
Compute the index of a rune in a string.
str: Pointer to beginning of a string.
p: Pointer to a char in str.
Returns the index of the rune pointed to by p in str.
*/
int fz_runeidx(const char *str, const char *p);
/**
Obtain a pointer to the char representing the rune
at a given index.
str: Pointer to beginning of a string.
idx: Index of a rune to return a char pointer to.
Returns a pointer to the char where the desired rune starts,
or NULL if the string ends before the index is reached.
*/
const char *fz_runeptr(const char *str, int idx);
/**
Count how many runes the UTF-8 encoded string
consists of.
s: The UTF-8 encoded, NUL-terminated text string.
@@ -134,33 +250,37 @@ int fz_runelen(int rune);
int fz_utflen(const char *s);
/*
fz_strtof: Locale-independent decimal to binary
conversion. On overflow return (-)INFINITY and set errno to ERANGE. On
underflow return 0 and set errno to ERANGE. Special inputs (case
insensitive): "NAN", "INF" or "INFINITY".
Convert a wchar string into a new heap allocated utf8 one.
*/
char *fz_utf8_from_wchar(fz_context *ctx, const wchar_t *s);
/*
Convert a utf8 string into a new heap allocated wchar one.
*/
wchar_t *fz_wchar_from_utf8(fz_context *ctx, const char *path);
/**
Locale-independent decimal to binary conversion. On overflow
return (-)INFINITY and set errno to ERANGE. On underflow return
0 and set errno to ERANGE. Special inputs (case insensitive):
"NAN", "INF" or "INFINITY".
*/
float fz_strtof(const char *s, char **es);
/*
fz_strtof_no_exp: Like fz_strtof, but does not recognize exponent
format. So fz_strtof_no_exp("1.5e20", &tail) will return 1.5 and tail
will point to "e20".
*/
float fz_strtof_no_exp(const char *string, char **tailptr);
/*
fz_grisu: Compute decimal integer m, exp such that:
f = m * 10^exp
m is as short as possible without losing exactness
Assumes special cases (0, NaN, +Inf, -Inf) have been handled.
*/
int fz_grisu(float f, char *s, int *exp);
/*
/**
Check and parse string into page ranges:
( ','? ([0-9]+|'N') ( '-' ([0-9]+|N) )? )+
/,?(-?\d+|N)(-(-?\d+|N))?/
*/
int fz_is_page_range(fz_context *ctx, const char *s);
const char *fz_parse_page_range(fz_context *ctx, const char *s, int *a, int *b, int n);
/**
Unicode aware tolower and toupper functions.
*/
int fz_tolower(int c);
int fz_toupper(int c);
#endif