HTML Tidy
5.8.0
The HTACG Tidy HTML Project
parser.h
Go to the documentation of this file.
1
#ifndef __PARSER_H__
2
#define __PARSER_H__
3
4
/**************************************************************************/
/**
5
* @file
6
* HTML and XML Parsers.
7
*
8
* Tidy's HTML parser corrects many conditions and enforces certain user
9
* preferences during the parsing process. The XML parser produces a tree
10
* of nodes useful to Tidy but also suitable for use in other XML processing
11
* applications.
12
*
13
* @author HTACG, et al (consult git log)
14
*
15
* @copyright
16
* Copyright (c) 1998-2017 World Wide Web Consortium (Massachusetts
17
* Institute of Technology, European Research Consortium for Informatics
18
* and Mathematics, Keio University) and HTACG.
19
* @par
20
* All Rights Reserved.
21
* @par
22
* See `tidy.h` for the complete license.
23
*
24
* @date Additional updates: consult git log
25
*
26
******************************************************************************/
27
28
#include "
forward.h
"
29
30
/** @addtogroup internal_api */
31
/** @{ */
32
33
34
/***************************************************************************/
/**
35
** @defgroup parser_h HTML and XML Parsing
36
**
37
** These functions and structures form the internal API for document
38
** parsing.
39
**
40
** @{
41
******************************************************************************/
42
43
44
/**
45
* Is used to perform a node integrity check recursively after parsing
46
* an HTML or XML document.
47
* @note Actual performance of this check can be disabled by defining the
48
* macro NO_NODE_INTEGRITY_CHECK.
49
* @param node The root node for the integrity check.
50
* @returns Returns yes or no indicating integrity of the node structure.
51
*/
52
TY_PRIVATE
Bool
TY_
(CheckNodeIntegrity)(Node *node);
53
54
55
/**
56
* Indicates whether or not a text node ends with a space or newline.
57
* @note Implementation of this method is found in `pprint.c` for
58
* some reason.
59
* @param lexer A reference to the lexer used to lex the document.
60
* @param node The node to check.
61
* @returns The result of the check.
62
*/
63
TY_PRIVATE
Bool
TY_
(TextNodeEndWithSpace)( Lexer *lexer, Node *node );
64
65
66
/**
67
* Used to check if a node uses CM_NEW, which determines how attributes
68
* without values should be printed. This was introduced to deal with
69
* user-defined tags e.g. ColdFusion.
70
* @param node The node to check.
71
* @returns The result of the check.
72
*/
73
TY_PRIVATE
Bool
TY_
(IsNewNode)(Node *node);
74
75
76
/**
77
* Transforms a given node to another element, for example, from a `p`
78
* to a `br`.
79
* @param doc The document which the node belongs to.
80
* @param node The node to coerce.
81
* @param tid The tag type to coerce the node into.
82
* @param obsolete If the old node was obsolete, a report will be generated.
83
* @param expected If the old node was not expected to be found in this
84
* particular location, a report will be generated.
85
*/
86
TY_PRIVATE
void
TY_
(CoerceNode)(TidyDocImpl* doc, Node *node,
TidyTagId
tid,
Bool
obsolete,
Bool
expected);
87
88
89
/**
90
* Extract a node and its children from a markup tree.
91
* @param node The node to remove.
92
* @returns Returns the removed node.
93
*/
94
TY_PRIVATE
Node *
TY_
(RemoveNode)(Node *node);
95
96
97
/**
98
* Remove node from markup tree and discard it.
99
* @param doc The Tidy document from which to discarb the node.
100
* @param element The node to discard.
101
* @returns Returns the next node.
102
*/
103
TY_PRIVATE
Node *
TY_
(DiscardElement)( TidyDocImpl* doc, Node *element);
104
105
106
/**
107
* Insert node into markup tree as the firt element of content of element.
108
* @param element The new destination node.
109
* @param node The node to insert.
110
*/
111
TY_PRIVATE
void
TY_
(InsertNodeAtStart)(Node *element, Node *node);
112
113
114
/**
115
* Insert node into markup tree as the last element of content of element.
116
* @param element The new destination node.
117
* @param node The node to insert.
118
*/
119
TY_PRIVATE
void
TY_
(InsertNodeAtEnd)(Node *element, Node *node);
120
121
122
/**
123
* Insert node into markup tree before element.
124
* @param element The node before which the node is inserted.
125
* @param node The node to insert.
126
*/
127
TY_PRIVATE
void
TY_
(InsertNodeBeforeElement)(Node *element, Node *node);
128
129
130
/**
131
* Insert node into markup tree after element.
132
* @param element The node after which the node is inserted.
133
* @param node The node to insert.
134
*/
135
TY_PRIVATE
void
TY_
(InsertNodeAfterElement)(Node *element, Node *node);
136
137
138
/**
139
* Trims a single, empty element, returning the next node.
140
* @param doc The Tidy document.
141
* @param element The element to trim.
142
* @returns Returns the next node.
143
*/
144
TY_PRIVATE
Node *
TY_
(TrimEmptyElement)( TidyDocImpl* doc, Node *element );
145
146
147
/**
148
* Trims a tree of empty elements recursively, returning the next node.
149
* @param doc The Tidy document.
150
* @param node The element to trim.
151
* @returns Returns the next node.
152
*/
153
TY_PRIVATE
Node*
TY_
(DropEmptyElements)(TidyDocImpl* doc, Node* node);
154
155
156
/**
157
* Indicates whether or not a text node is blank, meaning that it consists
158
* of nothing, or a single space.
159
* @param lexer The lexer used to lex the document.
160
* @param node The node to test.
161
* @returns Returns the result of the test.
162
*/
163
TY_PRIVATE
Bool
TY_
(IsBlank)(Lexer *lexer, Node *node);
164
165
166
/**
167
* Indicates whether or not a node is declared as containing javascript
168
* code.
169
* @param node The node to test.
170
* @returns Returns the result of the test.
171
*/
172
TY_PRIVATE
Bool
TY_
(IsJavaScript)(Node *node);
173
174
175
/**
176
* Parses a document after lexing using the HTML parser. It begins by properly
177
* configuring the overall HTML structure, and subsequently processes all
178
* remaining nodes. HTML is the root node.
179
* @param doc The Tidy document.
180
*/
181
TY_PRIVATE
void
TY_
(ParseDocument)( TidyDocImpl* doc );
182
183
184
/**
185
* Indicates whether or not whitespace is to be preserved in XHTML/XML
186
* documents.
187
* @param doc The Tidy document.
188
* @param element The node to test.
189
* @returns Returns the result of the test.
190
*/
191
TY_PRIVATE
Bool
TY_
(XMLPreserveWhiteSpace)( TidyDocImpl* doc, Node *element );
192
193
194
/**
195
* Parses a document after lexing using the XML parser.
196
* @param doc The Tidy document.
197
*/
198
TY_PRIVATE
void
TY_
(ParseXMLDocument)( TidyDocImpl* doc );
199
200
201
/** @} end parser_h group */
202
/** @} end internal_api group */
203
204
#endif
/* __PARSER_H__ */
205
forward.h
TY_PRIVATE
#define TY_PRIVATE
Definition:
forward.h:29
TY_
#define TY_(str)
Definition:
forward.h:23
TidyTagId
TidyTagId
Known HTML element types.
Definition:
tidyenum.h:857
Bool
Bool
Definition:
tidyplatform.h:647
tidy-html5
src
parser.h
Generated on Sat Jul 10 2021 16:57:54 for HTML Tidy by
1.9.1