139 #define CM_EMPTY (1 << 0)
140 #define CM_HTML (1 << 1)
141 #define CM_HEAD (1 << 2)
142 #define CM_BLOCK (1 << 3)
143 #define CM_INLINE (1 << 4)
144 #define CM_LIST (1 << 5)
145 #define CM_DEFLIST (1 << 6)
146 #define CM_TABLE (1 << 7)
147 #define CM_ROWGRP (1 << 8)
148 #define CM_ROW (1 << 9)
149 #define CM_FIELD (1 << 10)
150 #define CM_OBJECT (1 << 11)
151 #define CM_PARAM (1 << 12)
152 #define CM_FRAMES (1 << 13)
153 #define CM_HEADING (1 << 14)
154 #define CM_OPT (1 << 15)
155 #define CM_IMG (1 << 16)
156 #define CM_MIXED (1 << 17)
157 #define CM_NO_INDENT (1 << 18)
158 #define CM_OBSOLETE (1 << 19)
159 #define CM_NEW (1 << 20)
160 #define CM_OMITST (1 << 21)
161 #define CM_VOID (1 << 22)
192 #define VERS_SUN 8192u
193 #define VERS_NETSCAPE 16384u
194 #define VERS_MICROSOFT 32768u
197 #define VERS_XML 65536u
204 #define VERS_UNKNOWN (xxxx)
205 #define VERS_HTML20 (HT20)
206 #define VERS_HTML32 (HT32)
207 #define VERS_HTML40_STRICT (H40S|H41S|X10S)
208 #define VERS_HTML40_LOOSE (H40T|H41T|X10T)
209 #define VERS_FRAMESET (H40F|H41F|X10F)
210 #define VERS_XHTML11 (XH11)
211 #define VERS_BASIC (XB10)
213 #define VERS_HTML5 (HT50|XH50)
216 #define VERS_HTML40 (VERS_HTML40_STRICT|VERS_HTML40_LOOSE|VERS_FRAMESET)
217 #define VERS_IFRAME (VERS_HTML40_LOOSE|VERS_FRAMESET)
218 #define VERS_LOOSE (VERS_HTML20|VERS_HTML32|VERS_IFRAME)
219 #define VERS_EVENTS (VERS_HTML40|VERS_XHTML11)
220 #define VERS_FROM32 (VERS_HTML32|VERS_HTML40|HT50)
221 #define VERS_FROM40 (VERS_HTML40|VERS_XHTML11|VERS_BASIC|VERS_HTML5)
222 #define VERS_XHTML (X10S|X10T|X10F|XH11|XB10|XH50)
225 #define VERS_STRICT (VERS_HTML5|VERS_HTML40_STRICT)
228 #define VERS_ALL (VERS_HTML20|VERS_HTML32|VERS_FROM40|XH50|HT50)
231 #define VERS_PROPRIETARY (VERS_NETSCAPE|VERS_MICROSOFT|VERS_SUN)
238 typedef struct _Style TagStyle;
489 TY_PRIVATE void TY_(RemoveAttribute)( TidyDocImpl* doc, Node *node, AttVal *attr );
681 typedef struct _Stack {
#define TY_PRIVATE
Definition: forward.h:29
#define TY_(str)
Definition: forward.h:23
Node * next
Definition: lexer.h:309
tmbstr element
name (NULL for text nodes)
Definition: lexer.h:297
Node * parent
tree structure
Definition: lexer.h:307
uint istackbase
start of frame
Definition: lexer.h:381
Node * last
Definition: lexer.h:311
Bool seenEndBody
true if a </body> tag has been encountered
Definition: lexer.h:359
uint txtstart
start of current node
Definition: lexer.h:350
const Attribute * dict
Definition: lexer.h:269
LexerState state
state of lexer's finite state machine
Definition: lexer.h:352
IStack * next
Definition: lexer.h:295
AttVal * attributes
Definition: lexer.h:298
Node * root
remember root node of the document
Definition: lexer.h:356
tmbstr tag
Definition: lexer.h:242
uint istacksize
used
Definition: lexer.h:380
NodeType type
TextNode, StartTag, EndTag etc.
Definition: lexer.h:321
uint columns
at start of current token
Definition: lexer.h:339
TagStyle * styles
used for cleaning up presentation markup
Definition: lexer.h:383
AttVal * next
Definition: lexer.h:268
uint lexlength
allocated
Definition: lexer.h:372
Bool pushed
true after token has been pushed back
Definition: lexer.h:341
tmbstr tag_class
Definition: lexer.h:243
Bool insertspace
when space is moved after end tag
Definition: lexer.h:342
uint lines
lines seen
Definition: lexer.h:338
int delim
Definition: lexer.h:272
uint versions
bit vector of HTML versions
Definition: lexer.h:346
tmbstr attribute
Definition: lexer.h:273
Bool bad_doctype
e.g.
Definition: lexer.h:349
uint versionEmitted
version of doctype emitted
Definition: lexer.h:348
Bool seenEndHtml
true if a </html> tag has been encountered
Definition: lexer.h:360
Node * inode
for deferring text node
Definition: lexer.h:376
tmbstr value
Definition: lexer.h:258
IStack * insert
for inferring inline tags
Definition: lexer.h:377
uint txtend
end of current node
Definition: lexer.h:351
TagStyle * next
Definition: lexer.h:245
uint end
end of span onto text array
Definition: lexer.h:320
uint column
current column of document
Definition: lexer.h:324
TidyAllocator * allocator
allocator
Definition: lexer.h:385
const Dict * tag
tag's dictionary definition
Definition: lexer.h:296
Bool exiled
true if moved out of table
Definition: lexer.h:344
uint lexsize
used
Definition: lexer.h:373
Bool closed
true if closed by explicit end tag
Definition: lexer.h:326
uint istacklength
allocated
Definition: lexer.h:379
unsigned capacity
Current capacity.
Definition: lexer.h:683
tmbstr properties
Definition: lexer.h:244
Bool excludeBlocks
Netscape compatibility.
Definition: lexer.h:343
Node * prev
Definition: lexer.h:308
Node ** firstNode
Definition: lexer.h:684
Node * token
last token returned by GetToken()
Definition: lexer.h:354
Bool waswhite
used to collapse contiguous white space
Definition: lexer.h:340
StyleProp * next
Definition: lexer.h:259
Node * asp
Definition: lexer.h:270
uint doctype
version as given by doctype (if any)
Definition: lexer.h:347
Bool implicit
true if inferred
Definition: lexer.h:327
tmbstr lexbuf
MB character buffer.
Definition: lexer.h:371
Node * php
Definition: lexer.h:271
Bool isvoyager
true if xmlns attribute on html element (i.e., "Voyager" was the W3C codename for XHTML).
Definition: lexer.h:345
IStack * istack
Definition: lexer.h:378
const Dict * was
old tag when it was changed
Definition: lexer.h:314
Node * itoken
last duplicate inline returned by GetToken()
Definition: lexer.h:355
uint start
start of span onto text array
Definition: lexer.h:319
uint line
current line of document
Definition: lexer.h:323
Bool linebreak
true if followed by a line break
Definition: lexer.h:328
int top
Current top position.
Definition: lexer.h:682
tmbstr name
Definition: lexer.h:257
Node * content
Definition: lexer.h:310
LexerState
Lexer GetToken() states.
Definition: lexer.h:104
GetTokenMode
modes for GetToken()
Definition: lexer.h:397
ParseDocTypeDeclState
ParseDocTypeDecl state constants.
Definition: lexer.h:125
NodeType
node->type is one of these values
Definition: lexer.h:82
@ LEX_PROCINSTR
Definition: lexer.h:111
@ LEX_CDATA
Definition: lexer.h:112
@ LEX_DOCTYPE
Definition: lexer.h:110
@ LEX_CONTENT
Definition: lexer.h:105
@ LEX_STARTTAG
Definition: lexer.h:108
@ LEX_JSTE
Definition: lexer.h:115
@ LEX_SECTION
Definition: lexer.h:113
@ LEX_COMMENT
Definition: lexer.h:109
@ LEX_ASP
Definition: lexer.h:114
@ LEX_ENDTAG
Definition: lexer.h:107
@ LEX_XMLDECL
Definition: lexer.h:117
@ LEX_GT
Definition: lexer.h:106
@ LEX_PHP
Definition: lexer.h:116
@ OtherNamespace
Definition: lexer.h:402
@ Preformatted
Definition: lexer.h:400
@ IgnoreWhitespace
Definition: lexer.h:398
@ CdataContent
Definition: lexer.h:403
@ MixedContent
Definition: lexer.h:399
@ IgnoreMarkup
Definition: lexer.h:401
@ DT_QUOTEDSTRING
Definition: lexer.h:129
@ DT_DOCTYPENAME
Definition: lexer.h:127
@ DT_INTERMEDIATE
Definition: lexer.h:126
@ DT_INTSUBSET
Definition: lexer.h:130
@ DT_PUBLICSYSTEM
Definition: lexer.h:128
@ CommentTag
Definition: lexer.h:85
@ StartEndTag
Definition: lexer.h:90
@ XmlDecl
Definition: lexer.h:96
@ ProcInsTag
Definition: lexer.h:86
@ RootNode
Definition: lexer.h:83
@ SectionTag
Definition: lexer.h:92
@ AspTag
Definition: lexer.h:93
@ StartTag
Definition: lexer.h:88
@ PhpTag
Definition: lexer.h:95
@ CDATATag
Definition: lexer.h:91
@ TextNode
Definition: lexer.h:87
@ JsteTag
Definition: lexer.h:94
@ EndTag
Definition: lexer.h:89
@ DocTypeTag
Definition: lexer.h:84
This typedef represents a stack of addresses to nodes.
Definition: lexer.h:681
Attribute/Value linked list node.
Definition: lexer.h:267
Mosaic handles inlines via a separate stack from other elements We duplicate this to recover from inl...
Definition: lexer.h:294
The following are private to the lexer.
Definition: lexer.h:337
HTML/XHTML/XML Element, Comment, PI, DOCTYPE, XML Decl, etc., etc.
Definition: lexer.h:306
TidyTagId
Known HTML element types.
Definition: tidyenum.h:857