init commit
This commit is contained in:
@@ -1,88 +1,397 @@
|
||||
// Copyright (C) 2004-2022 Artifex Software, Inc.
|
||||
//
|
||||
// This file is part of MuPDF.
|
||||
//
|
||||
// MuPDF is free software: you can redistribute it and/or modify it under the
|
||||
// terms of the GNU Affero General Public License as published by the Free
|
||||
// Software Foundation, either version 3 of the License, or (at your option)
|
||||
// any later version.
|
||||
//
|
||||
// MuPDF is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
// WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
||||
// FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for more
|
||||
// details.
|
||||
//
|
||||
// You should have received a copy of the GNU Affero General Public License
|
||||
// along with MuPDF. If not, see <https://www.gnu.org/licenses/agpl-3.0.en.html>
|
||||
//
|
||||
// Alternative licensing terms are available from the licensor.
|
||||
// For commercial licensing, see <https://www.artifex.com/> or contact
|
||||
// Artifex Software, Inc., 39 Mesa Street, Suite 108A, San Francisco,
|
||||
// CA 94129, USA, for further information.
|
||||
|
||||
#ifndef MUPDF_FITZ_XML_H
|
||||
#define MUPDF_FITZ_XML_H
|
||||
|
||||
#include "mupdf/fitz/system.h"
|
||||
#include "mupdf/fitz/context.h"
|
||||
#include "mupdf/fitz/buffer.h"
|
||||
#include "mupdf/fitz/pool.h"
|
||||
#include "mupdf/fitz/archive.h"
|
||||
|
||||
/*
|
||||
/**
|
||||
XML document model
|
||||
*/
|
||||
|
||||
typedef struct fz_xml_doc_s fz_xml_doc;
|
||||
typedef struct fz_xml_s fz_xml;
|
||||
typedef struct fz_xml fz_xml;
|
||||
|
||||
/*
|
||||
fz_parse_xml: Parse the contents of buffer into a tree of xml nodes.
|
||||
/* For backwards compatibility */
|
||||
typedef fz_xml fz_xml_doc;
|
||||
|
||||
/**
|
||||
Parse the contents of buffer into a tree of xml nodes.
|
||||
|
||||
preserve_white: whether to keep or delete all-whitespace nodes.
|
||||
*/
|
||||
fz_xml_doc *fz_parse_xml(fz_context *ctx, fz_buffer *buf, int preserve_white);
|
||||
fz_xml *fz_parse_xml(fz_context *ctx, fz_buffer *buf, int preserve_white);
|
||||
|
||||
/*
|
||||
fz_drop_xml: Free the XML node and all its children and siblings.
|
||||
/**
|
||||
Parse the contents of buffer into a tree of xml nodes.
|
||||
|
||||
preserve_white: whether to keep or delete all-whitespace nodes.
|
||||
*/
|
||||
void fz_drop_xml(fz_context *ctx, fz_xml_doc *xml);
|
||||
fz_xml *fz_parse_xml_stream(fz_context *ctx, fz_stream *stream, int preserve_white);
|
||||
|
||||
/*
|
||||
fz_detach_xml: Detach a node from the tree, unlinking it from its parent,
|
||||
/**
|
||||
Parse the contents of an archive entry into a tree of xml nodes.
|
||||
|
||||
preserve_white: whether to keep or delete all-whitespace nodes.
|
||||
*/
|
||||
fz_xml *fz_parse_xml_archive_entry(fz_context *ctx, fz_archive *dir, const char *filename, int preserve_white);
|
||||
|
||||
/**
|
||||
Try and parse the contents of an archive entry into a tree of xml nodes.
|
||||
|
||||
preserve_white: whether to keep or delete all-whitespace nodes.
|
||||
|
||||
Will return NULL if the archive entry can't be found. Otherwise behaves
|
||||
the same as fz_parse_xml_archive_entry. May throw exceptions.
|
||||
*/
|
||||
fz_xml *fz_try_parse_xml_archive_entry(fz_context *ctx, fz_archive *dir, const char *filename, int preserve_white);
|
||||
|
||||
/**
|
||||
Parse the contents of a buffer into a tree of XML nodes,
|
||||
using the HTML5 parsing algorithm.
|
||||
*/
|
||||
fz_xml *fz_parse_xml_from_html5(fz_context *ctx, fz_buffer *buf);
|
||||
|
||||
/**
|
||||
Add a reference to the XML.
|
||||
*/
|
||||
fz_xml *fz_keep_xml(fz_context *ctx, fz_xml *xml);
|
||||
|
||||
/**
|
||||
Drop a reference to the XML. When the last reference is
|
||||
dropped, the node and all its children and siblings will
|
||||
be freed.
|
||||
*/
|
||||
void fz_drop_xml(fz_context *ctx, fz_xml *xml);
|
||||
|
||||
/**
|
||||
Detach a node from the tree, unlinking it from its parent,
|
||||
and setting the document root to the node.
|
||||
*/
|
||||
void fz_detach_xml(fz_context *ctx, fz_xml_doc *xml, fz_xml *node);
|
||||
void fz_detach_xml(fz_context *ctx, fz_xml *node);
|
||||
|
||||
/*
|
||||
fz_xml_root: Get the root node for the document.
|
||||
/**
|
||||
Return the topmost XML node of a document.
|
||||
*/
|
||||
fz_xml *fz_xml_root(fz_xml_doc *xml);
|
||||
|
||||
/*
|
||||
fz_xml_prev: Return previous sibling of XML node.
|
||||
/**
|
||||
Return previous sibling of XML node.
|
||||
*/
|
||||
fz_xml *fz_xml_prev(fz_xml *item);
|
||||
|
||||
/*
|
||||
fz_xml_next: Return next sibling of XML node.
|
||||
/**
|
||||
Return next sibling of XML node.
|
||||
*/
|
||||
fz_xml *fz_xml_next(fz_xml *item);
|
||||
|
||||
/*
|
||||
fz_xml_up: Return parent of XML node.
|
||||
/**
|
||||
Return parent of XML node.
|
||||
*/
|
||||
fz_xml *fz_xml_up(fz_xml *item);
|
||||
|
||||
/*
|
||||
fz_xml_down: Return first child of XML node.
|
||||
/**
|
||||
Return first child of XML node.
|
||||
*/
|
||||
fz_xml *fz_xml_down(fz_xml *item);
|
||||
|
||||
/*
|
||||
fz_xml_is_tag: Return true if the tag name matches.
|
||||
/**
|
||||
Return true if the tag name matches.
|
||||
*/
|
||||
int fz_xml_is_tag(fz_xml *item, const char *name);
|
||||
|
||||
/*
|
||||
fz_xml_tag: Return tag of XML node. Return NULL for text nodes.
|
||||
/**
|
||||
Return tag of XML node. Return NULL for text nodes.
|
||||
*/
|
||||
char *fz_xml_tag(fz_xml *item);
|
||||
|
||||
/*
|
||||
fz_xml_att: Return the value of an attribute of an XML node.
|
||||
/**
|
||||
Return the value of an attribute of an XML node.
|
||||
NULL if the attribute doesn't exist.
|
||||
*/
|
||||
char *fz_xml_att(fz_xml *item, const char *att);
|
||||
|
||||
/*
|
||||
fz_xml_text: Return the text content of an XML node.
|
||||
/**
|
||||
Return the value of an attribute of an XML node.
|
||||
If the first attribute doesn't exist, try the second.
|
||||
NULL if neither attribute exists.
|
||||
*/
|
||||
char *fz_xml_att_alt(fz_xml *item, const char *one, const char *two);
|
||||
|
||||
/**
|
||||
Check for a matching attribute on an XML node.
|
||||
|
||||
If the node has the requested attribute (name), and the value
|
||||
matches (match) then return 1. Otherwise, 0.
|
||||
*/
|
||||
int fz_xml_att_eq(fz_xml *item, const char *name, const char *match);
|
||||
|
||||
/**
|
||||
Add an attribute to an XML node.
|
||||
*/
|
||||
void fz_xml_add_att(fz_context *ctx, fz_pool *pool, fz_xml *node, const char *key, const char *val);
|
||||
|
||||
/**
|
||||
Return the text content of an XML node.
|
||||
Return NULL if the node is a tag.
|
||||
*/
|
||||
char *fz_xml_text(fz_xml *item);
|
||||
|
||||
/*
|
||||
fz_debug_xml: Pretty-print an XML tree to stdout.
|
||||
/**
|
||||
Pretty-print an XML tree to given output.
|
||||
*/
|
||||
void fz_output_xml(fz_context *ctx, fz_output *out, fz_xml *item, int level);
|
||||
|
||||
/**
|
||||
Pretty-print an XML tree to stdout. (Deprecated, use
|
||||
fz_output_xml in preference).
|
||||
*/
|
||||
void fz_debug_xml(fz_xml *item, int level);
|
||||
|
||||
/**
|
||||
Search the siblings of XML nodes starting with item looking for
|
||||
the first with the given tag.
|
||||
|
||||
Return NULL if none found.
|
||||
*/
|
||||
fz_xml *fz_xml_find(fz_xml *item, const char *tag);
|
||||
|
||||
/**
|
||||
Search the siblings of XML nodes starting with the first sibling
|
||||
of item looking for the first with the given tag.
|
||||
|
||||
Return NULL if none found.
|
||||
*/
|
||||
fz_xml *fz_xml_find_next(fz_xml *item, const char *tag);
|
||||
|
||||
/**
|
||||
Search the siblings of XML nodes starting with the first child
|
||||
of item looking for the first with the given tag.
|
||||
|
||||
Return NULL if none found.
|
||||
*/
|
||||
fz_xml *fz_xml_find_down(fz_xml *item, const char *tag);
|
||||
|
||||
/**
|
||||
Search the siblings of XML nodes starting with item looking for
|
||||
the first with the given tag (or any tag if tag is NULL), and
|
||||
with a matching attribute.
|
||||
|
||||
Return NULL if none found.
|
||||
*/
|
||||
fz_xml *fz_xml_find_match(fz_xml *item, const char *tag, const char *att, const char *match);
|
||||
|
||||
/**
|
||||
Search the siblings of XML nodes starting with the first sibling
|
||||
of item looking for the first with the given tag (or any tag if tag
|
||||
is NULL), and with a matching attribute.
|
||||
|
||||
Return NULL if none found.
|
||||
*/
|
||||
fz_xml *fz_xml_find_next_match(fz_xml *item, const char *tag, const char *att, const char *match);
|
||||
|
||||
/**
|
||||
Search the siblings of XML nodes starting with the first child
|
||||
of item looking for the first with the given tag (or any tag if
|
||||
tag is NULL), and with a matching attribute.
|
||||
|
||||
Return NULL if none found.
|
||||
*/
|
||||
fz_xml *fz_xml_find_down_match(fz_xml *item, const char *tag, const char *att, const char *match);
|
||||
|
||||
/**
|
||||
Perform a depth first search from item, returning the first
|
||||
child that matches the given tag (or any tag if tag is NULL),
|
||||
with the given attribute (if att is non NULL), that matches
|
||||
match (if match is non NULL).
|
||||
*/
|
||||
fz_xml *fz_xml_find_dfs(fz_xml *item, const char *tag, const char *att, const char *match);
|
||||
|
||||
/**
|
||||
Perform a depth first search from item, returning the first
|
||||
child that matches the given tag (or any tag if tag is NULL),
|
||||
with the given attribute (if att is non NULL), that matches
|
||||
match (if match is non NULL). The search stops if it ever
|
||||
reaches the top of the tree, or the declared 'top' item.
|
||||
*/
|
||||
fz_xml *fz_xml_find_dfs_top(fz_xml *item, const char *tag, const char *att, const char *match, fz_xml *top);
|
||||
|
||||
/**
|
||||
Perform a depth first search onwards from item, returning the first
|
||||
child that matches the given tag (or any tag if tag is NULL),
|
||||
with the given attribute (if att is non NULL), that matches
|
||||
match (if match is non NULL).
|
||||
*/
|
||||
fz_xml *fz_xml_find_next_dfs(fz_xml *item, const char *tag, const char *att, const char *match);
|
||||
|
||||
/**
|
||||
Perform a depth first search onwards from item, returning the first
|
||||
child that matches the given tag (or any tag if tag is NULL),
|
||||
with the given attribute (if att is non NULL), that matches
|
||||
match (if match is non NULL). The search stops if it ever reaches
|
||||
the top of the tree, or the declared 'top' item.
|
||||
*/
|
||||
fz_xml *fz_xml_find_next_dfs_top(fz_xml *item, const char *tag, const char *att, const char *match, fz_xml *top);
|
||||
|
||||
/**
|
||||
DOM-like functions for html in xml.
|
||||
*/
|
||||
|
||||
/**
|
||||
Return a borrowed reference for the 'body' element of
|
||||
the given DOM.
|
||||
*/
|
||||
fz_xml *fz_dom_body(fz_context *ctx, fz_xml *dom);
|
||||
|
||||
/**
|
||||
Return a borrowed reference for the document (the top
|
||||
level element) of the DOM.
|
||||
*/
|
||||
fz_xml *fz_dom_document_element(fz_context *ctx, fz_xml *dom);
|
||||
|
||||
/**
|
||||
Create an element of a given tag type for the given DOM.
|
||||
|
||||
The element is not linked into the DOM yet.
|
||||
*/
|
||||
fz_xml *fz_dom_create_element(fz_context *ctx, fz_xml *dom, const char *tag);
|
||||
|
||||
/**
|
||||
Create a text node for the given DOM.
|
||||
|
||||
The element is not linked into the DOM yet.
|
||||
*/
|
||||
fz_xml *fz_dom_create_text_node(fz_context *ctx, fz_xml *dom, const char *text);
|
||||
|
||||
/**
|
||||
Find the first element matching the requirements in a depth first traversal from elt.
|
||||
|
||||
The tagname must match tag, unless tag is NULL, when all tag names are considered to match.
|
||||
|
||||
If att is NULL, then all tags match.
|
||||
Otherwise:
|
||||
If match is NULL, then only nodes that have an att attribute match.
|
||||
If match is non-NULL, then only nodes that have an att attribute that matches match match.
|
||||
|
||||
Returns NULL (if no match found), or a borrowed reference to the first matching element.
|
||||
*/
|
||||
fz_xml *fz_dom_find(fz_context *ctx, fz_xml *elt, const char *tag, const char *att, const char *match);
|
||||
|
||||
/**
|
||||
Find the next element matching the requirements.
|
||||
*/
|
||||
fz_xml *fz_dom_find_next(fz_context *ctx, fz_xml *elt, const char *tag, const char *att, const char *match);
|
||||
|
||||
/**
|
||||
Insert an element as the last child of a parent, unlinking the
|
||||
child from its current position if required.
|
||||
*/
|
||||
void fz_dom_append_child(fz_context *ctx, fz_xml *parent, fz_xml *child);
|
||||
|
||||
/**
|
||||
Insert an element (new_elt), before another element (node),
|
||||
unlinking the new_elt from its current position if required.
|
||||
*/
|
||||
void fz_dom_insert_before(fz_context *ctx, fz_xml *node, fz_xml *new_elt);
|
||||
|
||||
/**
|
||||
Insert an element (new_elt), after another element (node),
|
||||
unlinking the new_elt from its current position if required.
|
||||
*/
|
||||
void fz_dom_insert_after(fz_context *ctx, fz_xml *node, fz_xml *new_elt);
|
||||
|
||||
/**
|
||||
Remove an element from the DOM. The element can be added back elsewhere
|
||||
if required.
|
||||
|
||||
No reference counting changes for the element.
|
||||
*/
|
||||
void fz_dom_remove(fz_context *ctx, fz_xml *elt);
|
||||
|
||||
/**
|
||||
Clone an element (and its children).
|
||||
|
||||
A borrowed reference to the clone is returned. The clone is not
|
||||
yet linked into the DOM.
|
||||
*/
|
||||
fz_xml *fz_dom_clone(fz_context *ctx, fz_xml *elt);
|
||||
|
||||
/**
|
||||
Return a borrowed reference to the first child of a node,
|
||||
or NULL if there isn't one.
|
||||
*/
|
||||
fz_xml *fz_dom_first_child(fz_context *ctx, fz_xml *elt);
|
||||
|
||||
/**
|
||||
Return a borrowed reference to the parent of a node,
|
||||
or NULL if there isn't one.
|
||||
*/
|
||||
fz_xml *fz_dom_parent(fz_context *ctx, fz_xml *elt);
|
||||
|
||||
/**
|
||||
Return a borrowed reference to the next sibling of a node,
|
||||
or NULL if there isn't one.
|
||||
*/
|
||||
fz_xml *fz_dom_next(fz_context *ctx, fz_xml *elt);
|
||||
|
||||
/**
|
||||
Return a borrowed reference to the previous sibling of a node,
|
||||
or NULL if there isn't one.
|
||||
*/
|
||||
fz_xml *fz_dom_previous(fz_context *ctx, fz_xml *elt);
|
||||
|
||||
/**
|
||||
Add an attribute to an element.
|
||||
|
||||
Ownership of att and value remain with the caller.
|
||||
*/
|
||||
void fz_dom_add_attribute(fz_context *ctx, fz_xml *elt, const char *att, const char *value);
|
||||
|
||||
/**
|
||||
Remove an attribute from an element.
|
||||
*/
|
||||
void fz_dom_remove_attribute(fz_context *ctx, fz_xml *elt, const char *att);
|
||||
|
||||
/**
|
||||
Retrieve the value of a given attribute from a given element.
|
||||
|
||||
Returns a borrowed pointer to the value or NULL if not found.
|
||||
*/
|
||||
const char *fz_dom_attribute(fz_context *ctx, fz_xml *elt, const char *att);
|
||||
|
||||
/**
|
||||
Enumerate through the attributes of an element.
|
||||
|
||||
Call with i=0,1,2,3... to enumerate attributes.
|
||||
|
||||
On return *att and the return value will be NULL if there are not
|
||||
that many attributes to read. Otherwise, *att will be filled in
|
||||
with a borrowed pointer to the attribute name, and the return
|
||||
value will be a borrowed pointer to the value.
|
||||
*/
|
||||
const char *fz_dom_get_attribute(fz_context *ctx, fz_xml *elt, int i, const char **att);
|
||||
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user