HTML Tidy  5.6.0
The HTACG Tidy HTML Project
utf8.h
Go to the documentation of this file.
1 #ifndef __UTF8_H__
2 #define __UTF8_H__
3 
4 /* utf8.h -- convert characters to/from UTF-8
5 
6  (c) 1998-2006 (W3C) MIT, ERCIM, Keio University
7  See tidy.h for the copyright notice.
8 
9 */
10 
11 #include "tidyplatform.h"
12 #include "tidybuffio.h"
13 
14 /* UTF-8 encoding/decoding support
15 ** Does not convert character "codepoints", i.e. to/from 10646.
16 */
17 
18 int TY_(DecodeUTF8BytesToChar)( uint* c, uint firstByte, ctmbstr successorBytes,
19  TidyInputSource* inp, int* count );
20 
21 int TY_(EncodeCharToUTF8Bytes)( uint c, tmbstr encodebuf,
22  TidyOutputSink* outp, int* count );
23 
24 
25 uint TY_(GetUTF8)( ctmbstr str, uint *ch );
26 tmbstr TY_(PutUTF8)( tmbstr buf, uint c );
27 
28 #define UNICODE_BOM_BE 0xFEFF /* big-endian (default) UNICODE BOM */
29 #define UNICODE_BOM UNICODE_BOM_BE
30 #define UNICODE_BOM_LE 0xFFFE /* little-endian UNICODE BOM */
31 #define UNICODE_BOM_UTF8 0xEFBBBF /* UTF-8 UNICODE BOM */
32 
33 
34 Bool TY_(IsValidUTF16FromUCS4)( tchar ucs4 );
35 Bool TY_(IsHighSurrogate)( tchar ch );
36 Bool TY_(IsLowSurrogate)( tchar ch );
37 
38 Bool TY_(IsCombinedChar)( tchar ch );
39 Bool TY_(IsValidCombinedChar)( tchar ch );
40 
41 tchar TY_(CombineSurrogatePair)( tchar high, tchar low );
42 Bool TY_(SplitSurrogatePair)( tchar utf16, tchar* high, tchar* low );
43 
44 
45 
46 #endif /* __UTF8_H__ */
Treat buffer as a stream that Tidy can use for I/O operations.
const tmbchar * ctmbstr
Definition: tidyplatform.h:594
This type defines an input source capable of delivering raw bytes of input.
Definition: tidy.h:1078
Bool
Definition: tidyplatform.h:631
Platform specific definitions, specifics, and headers.
tmbchar * tmbstr
Definition: tidyplatform.h:593
unsigned int uint
Definition: tidyplatform.h:554
This type defines an output destination capable of accepting raw bytes of output. ...
Definition: tidy.h:1128
#define TY_(str)
Definition: forward.h:23
uint tchar
Definition: tidyplatform.h:590