HTML Tidy  5.6.0
The HTACG Tidy HTML Project
parser.h
Go to the documentation of this file.
1 #ifndef __PARSER_H__
2 #define __PARSER_H__
3 
4 /**************************************************************************//**
5  * @file
6  * HTML and XML Parsers.
7  *
8  * Tidy's HTML parser corrects many conditions and enforces certain user
9  * preferences during the parsing process. The XML parser produces a tree
10  * of nodes useful to Tidy but also suitable for use in other XML processing
11  * applications.
12  *
13  * @author HTACG, et al (consult git log)
14  *
15  * @copyright
16  * Copyright (c) 1998-2017 World Wide Web Consortium (Massachusetts
17  * Institute of Technology, European Research Consortium for Informatics
18  * and Mathematics, Keio University) and HTACG.
19  * @par
20  * All Rights Reserved.
21  * @par
22  * See `tidy.h` for the complete license.
23  *
24  * @date Additional updates: consult git log
25  *
26  ******************************************************************************/
27 
28 #include "forward.h"
29 
30 /** @addtogroup internal_api */
31 /** @{ */
32 
33 
34 /***************************************************************************//**
35  ** @defgroup parser_h HTML and XML Parsing
36  **
37  ** These functions and structures form the internal API for document
38  ** parsing.
39  **
40  ** @{
41  ******************************************************************************/
42 
43 
44 /**
45  * Is used to perform a node integrity check recursively after parsing
46  * an HTML or XML document.
47  * @note Actual performance of this check can be disabled by defining the
48  * macro NO_NODE_INTEGRITY_CHECK.
49  * @param node The root node for the integrity check.
50  * @returns Returns yes or no indicating integrity of the node structure.
51  */
52 Bool TY_(CheckNodeIntegrity)(Node *node);
53 
54 
55 /**
56  * Indicates whether or not a text node ends with a space or newline.
57  * @note Implementation of this method is found in `pprint.c` for
58  * some reason.
59  * @param lexer A reference to the lexer used to lex the document.
60  * @param node The node to check.
61  * @returns The result of the check.
62  */
63 Bool TY_(TextNodeEndWithSpace)( Lexer *lexer, Node *node );
64 
65 
66 /**
67  * Used to check if a node uses CM_NEW, which determines how attributes
68  * without values should be printed. This was introduced to deal with
69  * user-defined tags e.g. ColdFusion.
70  * @param node The node to check.
71  * @returns The result of the check.
72  */
73 Bool TY_(IsNewNode)(Node *node);
74 
75 
76 /**
77  * Transforms a given node to another element, for example, from a `p`
78  * to a `br`.
79  * @param doc The document which the node belongs to.
80  * @param node The node to coerce.
81  * @param tid The tag type to coerce the node into.
82  * @param obsolete If the old node was obsolete, a report will be generated.
83  * @param expected If the old node was not expected to be found in this
84  * particular location, a report will be generated.
85  */
86 void TY_(CoerceNode)(TidyDocImpl* doc, Node *node, TidyTagId tid, Bool obsolete, Bool expected);
87 
88 
89 /**
90  * Extract a node and its children from a markup tree.
91  * @param node The node to remove.
92  * @returns Returns the removed node.
93  */
94 Node *TY_(RemoveNode)(Node *node);
95 
96 
97 /**
98  * Remove node from markup tree and discard it.
99  * @param doc The Tidy document from which to discarb the node.
100  * @param element The node to discard.
101  * @returns Returns the next node.
102  */
103 Node *TY_(DiscardElement)( TidyDocImpl* doc, Node *element);
104 
105 
106 /**
107  * Insert node into markup tree as the firt element of content of element.
108  * @param element The new destination node.
109  * @param node The node to insert.
110  */
111 void TY_(InsertNodeAtStart)(Node *element, Node *node);
112 
113 
114 /**
115  * Insert node into markup tree as the last element of content of element.
116  * @param element The new destination node.
117  * @param node The node to insert.
118  */
119 void TY_(InsertNodeAtEnd)(Node *element, Node *node);
120 
121 
122 /**
123  * Insert node into markup tree before element.
124  * @param element The node before which the node is inserted.
125  * @param node The node to insert.
126  */
127 void TY_(InsertNodeBeforeElement)(Node *element, Node *node);
128 
129 
130 /**
131  * Insert node into markup tree after element.
132  * @param element The node after which the node is inserted.
133  * @param node The node to insert.
134  */
135 void TY_(InsertNodeAfterElement)(Node *element, Node *node);
136 
137 
138 /**
139  * Trims a single, empty element, returning the next node.
140  * @param doc The Tidy document.
141  * @param element The element to trim.
142  * @returns Returns the next node.
143  */
144 Node *TY_(TrimEmptyElement)( TidyDocImpl* doc, Node *element );
145 
146 
147 /**
148  * Trims a tree of empty elements recursively, returning the next node.
149  * @param doc The Tidy document.
150  * @param node The element to trim.
151  * @returns Returns the next node.
152  */
153 Node* TY_(DropEmptyElements)(TidyDocImpl* doc, Node* node);
154 
155 
156 /**
157  * Indicates whether or not a text node is blank, meaning that it consists
158  * of nothing, or a single space.
159  * @param lexer The lexer used to lex the document.
160  * @param node The node to test.
161  * @returns Returns the result of the test.
162  */
163 Bool TY_(IsBlank)(Lexer *lexer, Node *node);
164 
165 
166 /**
167  * Indicates whether or not a node is declared as containing javascript
168  * code.
169  * @param node The node to test.
170  * @returns Returns the result of the test.
171  */
172 Bool TY_(IsJavaScript)(Node *node);
173 
174 
175 /**
176  * Parses a document after lexing using the HTML parser. It begins by properly
177  * configuring the overall HTML structure, and subsequently processes all
178  * remaining nodes. HTML is the root node.
179  * @param doc The Tidy document.
180  */
181 void TY_(ParseDocument)( TidyDocImpl* doc );
182 
183 
184 /**
185  * Indicates whether or not whitespace is to be preserved in XHTML/XML
186  * documents.
187  * @param doc The Tidy document.
188  * @param element The node to test.
189  * @returns Returns the result of the test.
190  */
191 Bool TY_(XMLPreserveWhiteSpace)( TidyDocImpl* doc, Node *element );
192 
193 
194 /**
195  * Parses a document after lexing using the XML parser.
196  * @param doc The Tidy document.
197  */
198 void TY_(ParseXMLDocument)( TidyDocImpl* doc );
199 
200 
201 /** @} end parser_h group */
202 /** @} end internal_api group */
203 
204 #endif /* __PARSER_H__ */
205 
TidyTagId
Known HTML element types.
Definition: tidyenum.h:852
Bool
Definition: tidyplatform.h:631
#define TY_(str)
Definition: forward.h:23