These functions and structures form the internal API for document lexing.
Data Structures | |
struct | AttVal |
Attribute/Value linked list node. More... | |
struct | IStack |
Mosaic handles inlines via a separate stack from other elements We duplicate this to recover from inline markup errors such as: More... | |
struct | Lexer |
The following are private to the lexer. More... | |
struct | Node |
HTML/XHTML/XML Element, Comment, PI, DOCTYPE, XML Decl, etc., etc. More... | |
struct | TagStyle |
struct | StyleProp |
struct | Stack |
This typedef represents a stack of addresses to nodes. More... | |
Macros | |
#define | CM_BLOCK (1 << 3) |
HTML "block" elements. More... | |
#define | CM_DEFLIST (1 << 6) |
Elements that mark definition list item ("DL", "DT"). More... | |
#define | CM_EMPTY (1 << 0) |
Elements with no content. More... | |
#define | CM_FIELD (1 << 10) |
Elements whose content must be protected against white space movement. More... | |
#define | CM_FRAMES (1 << 13) |
"FRAME", "FRAMESET", "NOFRAMES". More... | |
#define | CM_HEAD (1 << 2) |
Elements that can appear within HEAD. More... | |
#define | CM_HEADING (1 << 14) |
Heading elements (h1, h2, ...). More... | |
#define | CM_HTML (1 << 1) |
Elements that appear outside of "BODY". More... | |
#define | CM_IMG (1 << 16) |
Elements that use "align" attribute for vertical position. More... | |
#define | CM_INLINE (1 << 4) |
HTML "inline" elements. More... | |
#define | CM_LIST (1 << 5) |
Elements that mark list item ("LI"). More... | |
#define | CM_MIXED (1 << 17) |
Elements with inline and block model. More... | |
#define | CM_NEW (1 << 20) |
User defined elements. More... | |
#define | CM_NO_INDENT (1 << 18) |
Elements whose content needs to be indented only if containing one CM_BLOCK element. More... | |
#define | CM_OBJECT (1 << 11) |
Used to avoid propagating inline emphasis inside some elements such as OBJECT or APPLET. More... | |
#define | CM_OBSOLETE (1 << 19) |
Elements that are obsolete (such as "dir", "menu"). More... | |
#define | CM_OMITST (1 << 21) |
Elements that cannot be omitted. More... | |
#define | CM_OPT (1 << 15) |
Elements with an optional end tag. More... | |
#define | CM_PARAM (1 << 12) |
Elements that allows "PARAM". More... | |
#define | CM_ROW (1 << 9) |
Used for "TD", "TH". More... | |
#define | CM_ROWGRP (1 << 8) |
Used for "THEAD", "TFOOT" or "TBODY". More... | |
#define | CM_TABLE (1 << 7) |
Elements that can appear inside TABLE. More... | |
#define | CM_UNKNOWN 0 |
Content model shortcut encoding. More... | |
#define | CM_VOID (1 << 22) |
Elements that are void per https://www.w3.org/TR/2011/WD-html-markup-20110113/syntax.html#syntax-elements. More... | |
#define | digit 1u |
Lexer character types. More... | |
#define | digithex 128u |
#define | H40F 16u |
#define | H40S 4u |
#define | H40T 8u |
#define | H41F 128u |
#define | H41S 32u |
#define | H41T 64u |
#define | HT20 1u |
#define | HT32 2u |
#define | HT50 131072u |
#define | letter 2u |
#define | lowercase 32u |
#define | namechar 4u |
#define | newline 16u |
#define | uppercase 64u |
#define | VERS_ALL (VERS_HTML20|VERS_HTML32|VERS_FROM40|XH50|HT50) |
#define | VERS_BASIC (XB10) |
#define | VERS_EVENTS (VERS_HTML40|VERS_XHTML11) |
#define | VERS_FRAMESET (H40F|H41F|X10F) |
#define | VERS_FROM32 (VERS_HTML32|VERS_HTML40|HT50) |
#define | VERS_FROM40 (VERS_HTML40|VERS_XHTML11|VERS_BASIC|VERS_HTML5) |
#define | VERS_HTML20 (HT20) |
#define | VERS_HTML32 (HT32) |
#define | VERS_HTML40 (VERS_HTML40_STRICT|VERS_HTML40_LOOSE|VERS_FRAMESET) |
#define | VERS_HTML40_LOOSE (H40T|H41T|X10T) |
#define | VERS_HTML40_STRICT (H40S|H41S|X10S) |
#define | VERS_HTML5 (HT50|XH50) |
#define | VERS_IFRAME (VERS_HTML40_LOOSE|VERS_FRAMESET) |
#define | VERS_LOOSE (VERS_HTML20|VERS_HTML32|VERS_IFRAME) |
#define | VERS_MICROSOFT 32768u |
#define | VERS_NETSCAPE 16384u |
#define | VERS_PROPRIETARY (VERS_NETSCAPE|VERS_MICROSOFT|VERS_SUN) |
#define | VERS_STRICT (VERS_HTML5|VERS_HTML40_STRICT) |
#define | VERS_SUN 8192u |
#define | VERS_UNKNOWN (xxxx) |
#define | VERS_XHTML (X10S|X10T|X10F|XH11|XB10|XH50) |
#define | VERS_XHTML11 (XH11) |
#define | VERS_XML 65536u |
#define | white 8u |
#define | X10F 1024u |
#define | X10S 256u |
#define | X10T 512u |
#define | XB10 4096u |
#define | XH11 2048u |
#define | XH50 262144u |
#define | xxxx 0u |
If the document uses just HTML 2.0 tags and attributes described it is HTML 2.0. More... | |
Enumerations | |
enum | GetTokenMode { IgnoreWhitespace , MixedContent , Preformatted , IgnoreMarkup , OtherNamespace , CdataContent } |
modes for GetToken() More... | |
enum | LexerState { LEX_CONTENT , LEX_GT , LEX_ENDTAG , LEX_STARTTAG , LEX_COMMENT , LEX_DOCTYPE , LEX_PROCINSTR , LEX_CDATA , LEX_SECTION , LEX_ASP , LEX_JSTE , LEX_PHP , LEX_XMLDECL } |
Lexer GetToken() states. More... | |
enum | NodeType { RootNode , DocTypeTag , CommentTag , ProcInsTag , TextNode , StartTag , EndTag , StartEndTag , CDATATag , SectionTag , AspTag , JsteTag , PhpTag , XmlDecl } |
node->type is one of these values More... | |
enum | ParseDocTypeDeclState { DT_INTERMEDIATE , DT_DOCTYPENAME , DT_PUBLICSYSTEM , DT_QUOTEDSTRING , DT_INTSUBSET } |
ParseDocTypeDecl state constants. More... | |
Lexer Functions | |
TY_PRIVATE int | TY_❪HTMLVersion❫ (TidyDocImpl *doc) |
Choose what version to use for new doctype. More... | |
TY_PRIVATE void | TY_❪ConstrainVersion❫ (TidyDocImpl *doc, uint vers) |
Everything is allowed in proprietary version of HTML. More... | |
TY_PRIVATE Bool | TY_❪IsWhite❫ (uint c) |
TY_PRIVATE Bool | TY_❪IsDigit❫ (uint c) |
TY_PRIVATE Bool | TY_❪IsLetter❫ (uint c) |
TY_PRIVATE Bool | TY_❪IsHTMLSpace❫ (uint c) |
TY_PRIVATE Bool | TY_❪IsNewline❫ (uint c) |
TY_PRIVATE Bool | TY_❪IsNamechar❫ (uint c) |
TY_PRIVATE Bool | TY_❪IsXMLLetter❫ (uint c) |
TY_PRIVATE Bool | TY_❪IsXMLNamechar❫ (uint c) |
TY_PRIVATE Bool | TY_❪IsUpper❫ (uint c) |
TY_PRIVATE uint | TY_❪ToLower❫ (uint c) |
TY_PRIVATE uint | TY_❪ToUpper❫ (uint c) |
TY_PRIVATE Lexer * | TY_❪NewLexer❫ (TidyDocImpl *doc) |
TY_PRIVATE void | TY_❪FreeLexer❫ (TidyDocImpl *doc) |
TY_PRIVATE void | TY_❪AddCharToLexer❫ (Lexer *lexer, uint c) |
Store character c as UTF-8 encoded byte stream. More... | |
TY_PRIVATE Node * | TY_❪NewNode❫ (TidyAllocator *allocator, Lexer *lexer) |
Used for elements and text nodes. More... | |
TY_PRIVATE Node * | TY_❪CloneNode❫ (TidyDocImpl *doc, Node *element) |
Used to clone heading nodes when split by an <HR> More... | |
TY_PRIVATE void | TY_❪FreeAttrs❫ (TidyDocImpl *doc, Node *node) |
Free node's attributes. More... | |
TY_PRIVATE void | TY_❪FreeAttribute❫ (TidyDocImpl *doc, AttVal *av) |
Doesn't repair attribute list linkage. More... | |
TY_PRIVATE void | TY_❪DetachAttribute❫ (Node *node, AttVal *attr) |
Detach attribute from node. More... | |
TY_PRIVATE void | TY_❪RemoveAttribute❫ (TidyDocImpl *doc, Node *node, AttVal *attr) |
Detach attribute from node then free it. More... | |
TY_PRIVATE void | TY_❪FreeNode❫ (TidyDocImpl *doc, Node *node) |
Free document nodes by iterating through peers and recursing through children. More... | |
TY_PRIVATE Node * | TY_❪TextToken❫ (Lexer *lexer) |
TY_PRIVATE Node * | TY_❪NewLineNode❫ (Lexer *lexer) |
Used for creating preformatted text from Word2000. More... | |
TY_PRIVATE Node * | TY_❪NewLiteralTextNode❫ (Lexer *lexer, ctmbstr txt) |
Used for adding a for Word2000. More... | |
TY_PRIVATE void | TY_❪AddStringLiteral❫ (Lexer *lexer, ctmbstr str) |
TY_PRIVATE Node * | TY_❪FindDocType❫ (TidyDocImpl *doc) |
TY_PRIVATE Node * | TY_❪FindHTML❫ (TidyDocImpl *doc) |
TY_PRIVATE Node * | TY_❪FindHEAD❫ (TidyDocImpl *doc) |
TY_PRIVATE Node * | TY_❪FindTITLE❫ (TidyDocImpl *doc) |
TY_PRIVATE Node * | TY_❪FindBody❫ (TidyDocImpl *doc) |
TY_PRIVATE Node * | TY_❪FindXmlDecl❫ (TidyDocImpl *doc) |
TY_PRIVATE Node * | TY_❪FindContainer❫ (Node *node) |
Returns containing block element, if any. More... | |
TY_PRIVATE Bool | TY_❪AddGenerator❫ (TidyDocImpl *doc) |
Add meta element for Tidy. More... | |
TY_PRIVATE uint | TY_❪ApparentVersion❫ (TidyDocImpl *doc) |
TY_PRIVATE ctmbstr | TY_❪HTMLVersionNameFromCode❫ (uint vers, Bool isXhtml) |
TY_PRIVATE uint | TY_❪HTMLVersionNumberFromCode❫ (uint vers) |
TY_PRIVATE Bool | TY_❪WarnMissingSIInEmittedDocType❫ (TidyDocImpl *doc) |
TY_PRIVATE Bool | TY_❪SetXHTMLDocType❫ (TidyDocImpl *doc) |
TY_PRIVATE Bool | TY_❪FixDocType❫ (TidyDocImpl *doc) |
Fixup doctype if missing. More... | |
TY_PRIVATE Bool | TY_❪FixXmlDecl❫ (TidyDocImpl *doc) |
Ensure XML document starts with <?xml version="1.0"?>,and add encoding attribute if not using ASCII or UTF-8 output. More... | |
TY_PRIVATE Node * | TY_❪InferredTag❫ (TidyDocImpl *doc, TidyTagId id) |
TY_PRIVATE void | TY_❪UngetToken❫ (TidyDocImpl *doc) |
TY_PRIVATE Node * | TY_❪GetToken❫ (TidyDocImpl *doc, GetTokenMode mode) |
TY_PRIVATE void | TY_❪InitMap❫ (void) |
TY_PRIVATE AttVal * | TY_❪NewAttribute❫ (TidyDocImpl *doc) |
Create a new attribute. More... | |
TY_PRIVATE AttVal * | TY_❪NewAttributeEx❫ (TidyDocImpl *doc, ctmbstr name, ctmbstr value, int delim) |
Create a new attribute with given name and value. More... | |
TY_PRIVATE void | TY_❪InsertAttributeAtEnd❫ (Node *node, AttVal *av) |
Insert attribute at the end of attribute list of a node. More... | |
TY_PRIVATE void | TY_❪InsertAttributeAtStart❫ (Node *node, AttVal *av) |
Insert attribute at the start of attribute list of a node. More... | |
Inline Stack Functions | |
TY_PRIVATE AttVal * | TY_❪DupAttrs❫ (TidyDocImpl *doc, AttVal *attrs) |
Duplicate attributes. More... | |
TY_PRIVATE void | TY_❪PushInline❫ (TidyDocImpl *doc, Node *node) |
Push a copy of an inline node onto stack, but don't push if implicit or OBJECT or APPLET (implicit tags are ones generated from the istack). More... | |
TY_PRIVATE void | TY_❪PopInline❫ (TidyDocImpl *doc, Node *node) |
Pop inline stack. More... | |
TY_PRIVATE Bool | TY_❪IsPushed❫ (TidyDocImpl *doc, Node *node) |
TY_PRIVATE Bool | TY_❪IsPushedLast❫ (TidyDocImpl *doc, Node *element, Node *node) |
TY_PRIVATE int | TY_❪InlineDup❫ (TidyDocImpl *doc, Node *node) |
This has the effect of inserting "missing" inline elements around the contents of blocklevel elements such as P, TD, TH, DIV, PRE etc. More... | |
TY_PRIVATE void | TY_❪DeferDup❫ (TidyDocImpl *doc) |
Defer duplicates when entering a table or other element where the inlines shouldn't be duplicated. More... | |
TY_PRIVATE Node * | TY_❪InsertedToken❫ (TidyDocImpl *doc) |
TY_PRIVATE Bool | TY_❪SwitchInline❫ (TidyDocImpl *doc, Node *element, Node *node) |
Stack manipulation for inline elements. More... | |
TY_PRIVATE Bool | TY_❪InlineDup1❫ (TidyDocImpl *doc, Node *node, Node *element) |
Generic stack of nodes. | |
TY_PRIVATE Stack * | TY_❪newStack❫ (TidyDocImpl *doc, uint capacity) |
Create a new stack with a given starting capacity. More... | |
TY_PRIVATE void | TY_❪growStack❫ (Stack *stack) |
Increase the stack size. More... | |
TY_PRIVATE Bool | TY_❪stackFull❫ (Stack *stack) |
Stack is full when top is equal to the last index. More... | |
TY_PRIVATE Bool | TY_❪stackEmpty❫ (Stack *stack) |
Stack is empty when top is equal to -1. More... | |
TY_PRIVATE void | TY_❪push❫ (Stack *stack, Node *node) |
Push an item to the stack. More... | |
TY_PRIVATE Node * | TY_❪pop❫ (Stack *stack) |
Pop an item from the stack. More... | |
TY_PRIVATE Node * | TY_❪peek❫ (Stack *stack) |
Peek at the stack. More... | |
TY_PRIVATE void | TY_❪freeStack❫ (Stack *stack) |
Frees the stack when done. More... | |
struct _AttVal |
struct _IStack |
Mosaic handles inlines via a separate stack from other elements We duplicate this to recover from inline markup errors such as:
which for compatibility with Mosaic is mapped to:
Note that any inline end tag pop's the effect of the current inline start tag, so that </b>
pop's <i>
in the above example.
Data Fields | ||
---|---|---|
AttVal * | attributes | |
tmbstr | element | name (NULL for text nodes) |
IStack * | next | |
const Dict * | tag | tag's dictionary definition |
struct _Lexer |
The following are private to the lexer.
Use NewLexer()
to create a lexer, and FreeLexer()
to free it.
Data Fields | ||
---|---|---|
TidyAllocator * | allocator | allocator |
Bool | bad_doctype |
e.g. if html or PUBLIC is missing |
uint | columns | at start of current token |
uint | doctype | version as given by doctype (if any) |
Bool | excludeBlocks | Netscape compatibility. |
Bool | exiled | true if moved out of table |
Node * | inode | for deferring text node |
IStack * | insert | for inferring inline tags |
Bool | insertspace | when space is moved after end tag |
IStack * | istack | |
uint | istackbase | start of frame |
uint | istacklength | allocated |
uint | istacksize | used |
Bool | isvoyager | true if xmlns attribute on html element (i.e., "Voyager" was the W3C codename for XHTML). |
Node * | itoken | last duplicate inline returned by GetToken() |
tmbstr | lexbuf | MB character buffer. |
uint | lexlength | allocated |
uint | lexsize | used |
uint | lines | lines seen |
Node * | parent | remember parent node for CDATA elements |
Bool | pushed | true after token has been pushed back |
Node * | root | remember root node of the document |
Bool | seenEndBody |
true if a </body> tag has been encountered |
Bool | seenEndHtml |
true if a </html> tag has been encountered |
LexerState | state | state of lexer's finite state machine |
TagStyle * | styles | used for cleaning up presentation markup |
Node * | token | last token returned by GetToken() |
uint | txtend | end of current node |
uint | txtstart | start of current node |
uint | versionEmitted | version of doctype emitted |
uint | versions | bit vector of HTML versions |
Bool | waswhite | used to collapse contiguous white space |
struct _Node |
HTML/XHTML/XML Element, Comment, PI, DOCTYPE, XML Decl, etc., etc.
Data Fields | ||
---|---|---|
AttVal * | attributes | |
Bool | closed | true if closed by explicit end tag |
uint | column | current column of document |
Node * | content | |
tmbstr | element | name (NULL for text nodes) |
uint | end | end of span onto text array |
Bool | implicit | true if inferred |
Node * | last | |
uint | line | current line of document |
Bool | linebreak | true if followed by a line break |
Node * | next | |
Node * | parent | tree structure |
Node * | prev | |
uint | start | start of span onto text array |
const Dict * | tag | tag's dictionary definition |
NodeType | type | TextNode, StartTag, EndTag etc. |
const Dict * | was | old tag when it was changed |
struct Stack |
This typedef represents a stack of addresses to nodes.
Tidy uses these to try to limit recursion by pushing nodes to a stack when possible instead of recursing.
#define CM_BLOCK (1 << 3) |
HTML "block" elements.
#define CM_DEFLIST (1 << 6) |
Elements that mark definition list item ("DL", "DT").
#define CM_EMPTY (1 << 0) |
Elements with no content.
Map to HTML specification.
#define CM_FIELD (1 << 10) |
Elements whose content must be protected against white space movement.
Includes some elements that can found in forms.
#define CM_FRAMES (1 << 13) |
"FRAME", "FRAMESET", "NOFRAMES".
Used in ParseFrameSet.
#define CM_HEAD (1 << 2) |
Elements that can appear within HEAD.
#define CM_HEADING (1 << 14) |
Heading elements (h1, h2, ...).
#define CM_HTML (1 << 1) |
Elements that appear outside of "BODY".
#define CM_IMG (1 << 16) |
Elements that use "align" attribute for vertical position.
#define CM_INLINE (1 << 4) |
HTML "inline" elements.
#define CM_LIST (1 << 5) |
Elements that mark list item ("LI").
#define CM_MIXED (1 << 17) |
Elements with inline and block model.
Used to avoid calling InlineDup.
#define CM_NEW (1 << 20) |
User defined elements.
Used to determine how attributes without value should be printed.
#define CM_NO_INDENT (1 << 18) |
Elements whose content needs to be indented only if containing one CM_BLOCK element.
#define CM_OBJECT (1 << 11) |
Used to avoid propagating inline emphasis inside some elements such as OBJECT or APPLET.
#define CM_OBSOLETE (1 << 19) |
Elements that are obsolete (such as "dir", "menu").
#define CM_OMITST (1 << 21) |
Elements that cannot be omitted.
#define CM_OPT (1 << 15) |
Elements with an optional end tag.
#define CM_PARAM (1 << 12) |
Elements that allows "PARAM".
#define CM_ROW (1 << 9) |
Used for "TD", "TH".
#define CM_ROWGRP (1 << 8) |
Used for "THEAD", "TFOOT" or "TBODY".
#define CM_TABLE (1 << 7) |
Elements that can appear inside TABLE.
#define CM_UNKNOWN 0 |
Content model shortcut encoding.
Descriptions are tentative.
#define CM_VOID (1 << 22) |
Elements that are void per https://www.w3.org/TR/2011/WD-html-markup-20110113/syntax.html#syntax-elements.
#define digit 1u |
Lexer character types.
#define digithex 128u |
#define H40F 16u |
#define H40S 4u |
#define H40T 8u |
#define H41F 128u |
#define H41S 32u |
#define H41T 64u |
#define HT20 1u |
#define HT32 2u |
#define HT50 131072u |
#define letter 2u |
#define lowercase 32u |
#define namechar 4u |
#define newline 16u |
#define uppercase 64u |
#define VERS_ALL (VERS_HTML20|VERS_HTML32|VERS_FROM40|XH50|HT50) |
#define VERS_BASIC (XB10) |
#define VERS_EVENTS (VERS_HTML40|VERS_XHTML11) |
#define VERS_FROM32 (VERS_HTML32|VERS_HTML40|HT50) |
#define VERS_FROM40 (VERS_HTML40|VERS_XHTML11|VERS_BASIC|VERS_HTML5) |
#define VERS_HTML20 (HT20) |
#define VERS_HTML32 (HT32) |
#define VERS_HTML40 (VERS_HTML40_STRICT|VERS_HTML40_LOOSE|VERS_FRAMESET) |
#define VERS_IFRAME (VERS_HTML40_LOOSE|VERS_FRAMESET) |
#define VERS_LOOSE (VERS_HTML20|VERS_HTML32|VERS_IFRAME) |
#define VERS_MICROSOFT 32768u |
#define VERS_NETSCAPE 16384u |
#define VERS_PROPRIETARY (VERS_NETSCAPE|VERS_MICROSOFT|VERS_SUN) |
#define VERS_STRICT (VERS_HTML5|VERS_HTML40_STRICT) |
#define VERS_SUN 8192u |
#define VERS_UNKNOWN (xxxx) |
#define VERS_XHTML11 (XH11) |
#define VERS_XML 65536u |
#define white 8u |
#define X10F 1024u |
#define X10S 256u |
#define X10T 512u |
#define XB10 4096u |
#define XH11 2048u |
#define XH50 262144u |
#define xxxx 0u |
If the document uses just HTML 2.0 tags and attributes described it is HTML 2.0.
Similarly for HTML 3.2 and the 3 flavors of HTML 4.0. If there are proprietary tags and attributes then describe it as HTML Proprietary. If it includes the xml-lang or xmlns attributes but is otherwise HTML 2.0, 3.2 or 4.0 then describe it as one of the flavors of Voyager (strict, loose or frameset).
enum GetTokenMode |
enum LexerState |
enum NodeType |
TY_PRIVATE void TY_❪AddCharToLexer❫ | ( | Lexer * | lexer, |
uint | c | ||
) |
Store character c as UTF-8 encoded byte stream.
TY_PRIVATE Bool TY_❪AddGenerator❫ | ( | TidyDocImpl * | doc | ) |
Add meta element for Tidy.
TY_PRIVATE void TY_❪AddStringLiteral❫ | ( | Lexer * | lexer, |
ctmbstr | str | ||
) |
TY_PRIVATE uint TY_❪ApparentVersion❫ | ( | TidyDocImpl * | doc | ) |
TY_PRIVATE Node* TY_❪CloneNode❫ | ( | TidyDocImpl * | doc, |
Node * | element | ||
) |
Used to clone heading nodes when split by an <HR>
TY_PRIVATE void TY_❪ConstrainVersion❫ | ( | TidyDocImpl * | doc, |
uint | vers | ||
) |
Everything is allowed in proprietary version of HTML.
This is handled here rather than in the tag/attr dicts
TY_PRIVATE void TY_❪DeferDup❫ | ( | TidyDocImpl * | doc | ) |
Defer duplicates when entering a table or other element where the inlines shouldn't be duplicated.
TY_PRIVATE void TY_❪DetachAttribute❫ | ( | Node * | node, |
AttVal * | attr | ||
) |
Detach attribute from node.
TY_PRIVATE AttVal* TY_❪DupAttrs❫ | ( | TidyDocImpl * | doc, |
AttVal * | attrs | ||
) |
Duplicate attributes.
TY_PRIVATE Node* TY_❪FindBody❫ | ( | TidyDocImpl * | doc | ) |
TY_PRIVATE Node* TY_❪FindContainer❫ | ( | Node * | node | ) |
Returns containing block element, if any.
TY_PRIVATE Node* TY_❪FindDocType❫ | ( | TidyDocImpl * | doc | ) |
TY_PRIVATE Node* TY_❪FindHEAD❫ | ( | TidyDocImpl * | doc | ) |
TY_PRIVATE Node* TY_❪FindHTML❫ | ( | TidyDocImpl * | doc | ) |
TY_PRIVATE Node* TY_❪FindTITLE❫ | ( | TidyDocImpl * | doc | ) |
TY_PRIVATE Node* TY_❪FindXmlDecl❫ | ( | TidyDocImpl * | doc | ) |
TY_PRIVATE Bool TY_❪FixDocType❫ | ( | TidyDocImpl * | doc | ) |
Fixup doctype if missing.
TY_PRIVATE Bool TY_❪FixXmlDecl❫ | ( | TidyDocImpl * | doc | ) |
Ensure XML document starts with <?xml version="1.0"?>,and add encoding attribute if not using ASCII or UTF-8 output.
TY_PRIVATE void TY_❪FreeAttribute❫ | ( | TidyDocImpl * | doc, |
AttVal * | av | ||
) |
Doesn't repair attribute list linkage.
TY_PRIVATE void TY_❪FreeAttrs❫ | ( | TidyDocImpl * | doc, |
Node * | node | ||
) |
Free node's attributes.
TY_PRIVATE void TY_❪FreeLexer❫ | ( | TidyDocImpl * | doc | ) |
TY_PRIVATE void TY_❪FreeNode❫ | ( | TidyDocImpl * | doc, |
Node * | node | ||
) |
Free document nodes by iterating through peers and recursing through children.
Set next
to NULL
before calling FreeNode()
to avoid freeing peer nodes. Doesn't patch up prev/next links.
TY_PRIVATE void TY_❪freeStack❫ | ( | Stack * | stack | ) |
Frees the stack when done.
TY_PRIVATE Node* TY_❪GetToken❫ | ( | TidyDocImpl * | doc, |
GetTokenMode | mode | ||
) |
TY_PRIVATE void TY_❪growStack❫ | ( | Stack * | stack | ) |
Increase the stack size.
This will be called automatically when the current stack is full. If memory allocation fails, then the allocator will panic the program automatically.
TY_PRIVATE ctmbstr TY_❪HTMLVersionNameFromCode❫ | ( | uint | vers, |
Bool | isXhtml | ||
) |
TY_PRIVATE uint TY_❪HTMLVersionNumberFromCode❫ | ( | uint | vers | ) |
TY_PRIVATE int TY_❪HTMLVersion❫ | ( | TidyDocImpl * | doc | ) |
Choose what version to use for new doctype.
TY_PRIVATE Node* TY_❪InferredTag❫ | ( | TidyDocImpl * | doc, |
TidyTagId | id | ||
) |
TY_PRIVATE void TY_❪InitMap❫ | ( | void | ) |
TY_PRIVATE Bool TY_❪InlineDup1❫ | ( | TidyDocImpl * | doc, |
Node * | node, | ||
Node * | element | ||
) |
TY_PRIVATE int TY_❪InlineDup❫ | ( | TidyDocImpl * | doc, |
Node * | node | ||
) |
This has the effect of inserting "missing" inline elements around the contents of blocklevel elements such as P, TD, TH, DIV, PRE etc.
This procedure is called at the start of ParseBlock
, when the inline stack is not empty, as will be the case in:
which is then treated as equivalent to
This is implemented by setting the lexer into a mode where it gets tokens from the inline stack rather than from the input stream.
TY_PRIVATE void TY_❪InsertAttributeAtEnd❫ | ( | Node * | node, |
AttVal * | av | ||
) |
Insert attribute at the end of attribute list of a node.
TY_PRIVATE void TY_❪InsertAttributeAtStart❫ | ( | Node * | node, |
AttVal * | av | ||
) |
Insert attribute at the start of attribute list of a node.
TY_PRIVATE Node* TY_❪InsertedToken❫ | ( | TidyDocImpl * | doc | ) |
TY_PRIVATE Bool TY_❪IsDigit❫ | ( | uint | c | ) |
TY_PRIVATE Bool TY_❪IsHTMLSpace❫ | ( | uint | c | ) |
TY_PRIVATE Bool TY_❪IsLetter❫ | ( | uint | c | ) |
TY_PRIVATE Bool TY_❪IsNamechar❫ | ( | uint | c | ) |
TY_PRIVATE Bool TY_❪IsNewline❫ | ( | uint | c | ) |
TY_PRIVATE Bool TY_❪IsPushedLast❫ | ( | TidyDocImpl * | doc, |
Node * | element, | ||
Node * | node | ||
) |
TY_PRIVATE Bool TY_❪IsPushed❫ | ( | TidyDocImpl * | doc, |
Node * | node | ||
) |
TY_PRIVATE Bool TY_❪IsUpper❫ | ( | uint | c | ) |
TY_PRIVATE Bool TY_❪IsWhite❫ | ( | uint | c | ) |
TY_PRIVATE Bool TY_❪IsXMLLetter❫ | ( | uint | c | ) |
TY_PRIVATE Bool TY_❪IsXMLNamechar❫ | ( | uint | c | ) |
TY_PRIVATE AttVal* TY_❪NewAttributeEx❫ | ( | TidyDocImpl * | doc, |
ctmbstr | name, | ||
ctmbstr | value, | ||
int | delim | ||
) |
Create a new attribute with given name and value.
TY_PRIVATE AttVal* TY_❪NewAttribute❫ | ( | TidyDocImpl * | doc | ) |
Create a new attribute.
TY_PRIVATE Lexer* TY_❪NewLexer❫ | ( | TidyDocImpl * | doc | ) |
TY_PRIVATE Node* TY_❪NewLineNode❫ | ( | Lexer * | lexer | ) |
Used for creating preformatted text from Word2000.
TY_PRIVATE Node* TY_❪NewLiteralTextNode❫ | ( | Lexer * | lexer, |
ctmbstr | txt | ||
) |
Used for adding a for Word2000.
TY_PRIVATE Node* TY_❪NewNode❫ | ( | TidyAllocator * | allocator, |
Lexer * | lexer | ||
) |
Used for elements and text nodes.
TY_PRIVATE Stack* TY_❪newStack❫ | ( | TidyDocImpl * | doc, |
uint | capacity | ||
) |
Create a new stack with a given starting capacity.
If memory allocation fails, then the allocator will panic the program automatically.
TY_PRIVATE Node* TY_❪peek❫ | ( | Stack * | stack | ) |
Peek at the stack.
TY_PRIVATE void TY_❪PopInline❫ | ( | TidyDocImpl * | doc, |
Node * | node | ||
) |
Pop inline stack.
TY_PRIVATE Node* TY_❪pop❫ | ( | Stack * | stack | ) |
Pop an item from the stack.
TY_PRIVATE void TY_❪PushInline❫ | ( | TidyDocImpl * | doc, |
Node * | node | ||
) |
Push a copy of an inline node onto stack, but don't push if implicit or OBJECT or APPLET (implicit tags are ones generated from the istack).
One issue arises with pushing inlines when the tag is already pushed. For instance:
Shouldn't be mapped to
TY_PRIVATE void TY_❪push❫ | ( | Stack * | stack, |
Node * | node | ||
) |
Push an item to the stack.
TY_PRIVATE void TY_❪RemoveAttribute❫ | ( | TidyDocImpl * | doc, |
Node * | node, | ||
AttVal * | attr | ||
) |
Detach attribute from node then free it.
TY_PRIVATE Bool TY_❪SetXHTMLDocType❫ | ( | TidyDocImpl * | doc | ) |
TY_PRIVATE Bool TY_❪stackEmpty❫ | ( | Stack * | stack | ) |
Stack is empty when top is equal to -1.
TY_PRIVATE Bool TY_❪stackFull❫ | ( | Stack * | stack | ) |
Stack is full when top is equal to the last index.
TY_PRIVATE Bool TY_❪SwitchInline❫ | ( | TidyDocImpl * | doc, |
Node * | element, | ||
Node * | node | ||
) |
Stack manipulation for inline elements.
TY_PRIVATE Node* TY_❪TextToken❫ | ( | Lexer * | lexer | ) |
TY_PRIVATE uint TY_❪ToLower❫ | ( | uint | c | ) |
TY_PRIVATE uint TY_❪ToUpper❫ | ( | uint | c | ) |
TY_PRIVATE void TY_❪UngetToken❫ | ( | TidyDocImpl * | doc | ) |
TY_PRIVATE Bool TY_❪WarnMissingSIInEmittedDocType❫ | ( | TidyDocImpl * | doc | ) |