HTML Tidy  5.7.0
The HTACG Tidy HTML Project
streamio.h
Go to the documentation of this file.
1 #ifndef __STREAMIO_H__
2 #define __STREAMIO_H__
3 
4 /* streamio.h -- handles character stream I/O
5 
6  (c) 1998-2007 (W3C) MIT, ERCIM, Keio University
7  See tidy.h for the copyright notice.
8 
9  Wrapper around Tidy input source and output sink
10  that calls appropriate interfaces, and applies
11  necessary char encoding transformations: to/from
12  ISO-10646 and/or UTF-8.
13 
14 */
15 
16 #include "forward.h"
17 #include "tidybuffio.h"
18 #include "fileio.h"
19 
20 #ifdef __cplusplus
21 extern "C"
22 {
23 #endif
24 typedef enum
25 {
29 } IOType;
30 
31 /* states for ISO 2022
32 
33  A document in ISO-2022 based encoding uses some ESC sequences called
34  "designator" to switch character sets. The designators defined and
35  used in ISO-2022-JP are:
36 
37  "ESC" + "(" + ? for ISO646 variants
38 
39  "ESC" + "$" + ? and
40  "ESC" + "$" + "(" + ? for multibyte character sets
41 */
42 typedef enum
43 {
50 } ISO2022State;
51 
52 /************************
53 ** Source
54 ************************/
55 
56 enum
57 {
60 };
61 
62 /* non-raw input is cleaned up*/
63 struct _StreamIn
64 {
65  ISO2022State state; /* FSM for ISO2022 */
67  TidyAllocator *allocator;
71  int tabs;
73  unsigned short curlastpos; /* current last position in lastcols */
74  unsigned short firstlastpos; /* first valid last position in lastcols */
75  int curcol;
76  int curline;
77  int encoding;
78  IOType iotype;
79 
81 
82  /* Pointer back to document for error reporting */
83  TidyDocImpl* doc;
84 };
85 
86 StreamIn* TY_(initStreamIn)( TidyDocImpl* doc, int encoding );
87 void TY_(freeStreamIn)(StreamIn* in);
88 
89 StreamIn* TY_(FileInput)( TidyDocImpl* doc, FILE* fp, int encoding );
90 StreamIn* TY_(BufferInput)( TidyDocImpl* doc, TidyBuffer* content, int encoding );
91 StreamIn* TY_(UserInput)( TidyDocImpl* doc, TidyInputSource* source, int encoding );
92 
93 int TY_(ReadBOMEncoding)(StreamIn *in);
94 uint TY_(ReadChar)( StreamIn* in );
95 void TY_(UngetChar)( uint c, StreamIn* in );
96 Bool TY_(IsEOF)( StreamIn* in );
97 
98 
99 /************************
100 ** Sink
101 ************************/
102 
104 {
105  int encoding;
106  ISO2022State state; /* for ISO 2022 */
108  IOType iotype;
110 };
111 
112 StreamOut* TY_(FileOutput)( TidyDocImpl *doc, FILE* fp, int encoding, uint newln );
113 StreamOut* TY_(BufferOutput)( TidyDocImpl *doc, TidyBuffer* buf, int encoding, uint newln );
114 StreamOut* TY_(UserOutput)( TidyDocImpl *doc, TidyOutputSink* sink, int encoding, uint newln );
115 
116 StreamOut* TY_(StdErrOutput)(void);
117 /* StreamOut* StdOutOutput(void); */
118 void TY_(ReleaseStreamOut)( TidyDocImpl *doc, StreamOut* out );
119 
120 void TY_(WriteChar)( uint c, StreamOut* out );
121 void TY_(outBOM)( StreamOut *out );
122 
123 ctmbstr TY_(GetEncodingNameFromTidyId)(uint id);
124 ctmbstr TY_(GetEncodingOptNameFromTidyId)(uint id);
125 int TY_(GetCharEncodingFromOptName)(ctmbstr charenc);
126 
127 /************************
128 ** Misc
129 ************************/
130 
131 /* character encodings
132 */
133 #define RAW 0
134 #define ASCII 1
135 #define LATIN0 2
136 #define LATIN1 3
137 #define UTF8 4
138 #define ISO2022 5
139 #define MACROMAN 6
140 #define WIN1252 7
141 #define IBM858 8
142 #define UTF16LE 9
143 #define UTF16BE 10
144 #define UTF16 11
145 #define BIG5 12
146 #define SHIFTJIS 13
147 
148 /* Function for conversion from Windows-1252 to Unicode */
149 uint TY_(DecodeWin1252)(uint c);
150 
151 /* Function to convert from MacRoman to Unicode */
152 uint TY_(DecodeMacRoman)(uint c);
153 
154 #ifdef __cplusplus
155 }
156 #endif
157 
158 
159 /* Use numeric constants as opposed to escape chars (\r, \n)
160 ** to avoid conflict Mac compilers that may re-define these.
161 */
162 #define CR 0xD
163 #define LF 0xA
164 
165 #if defined(MAC_OS_CLASSIC)
166 # define DEFAULT_NL_CONFIG TidyCR
167 #elif defined(_WIN32) || defined(OS2_OS)
168 # define DEFAULT_NL_CONFIG TidyCRLF
169 #else
170 # define DEFAULT_NL_CONFIG TidyLF
171 #endif
172 
173 
174 #endif /* __STREAMIO_H__ */
TidyDocImpl * doc
Definition: streamio.h:83
Definition: streamio.h:59
uint bufpos
Definition: streamio.h:69
Treat buffer as a stream that Tidy can use for I/O operations.
tchar * charbuf
Definition: streamio.h:68
ISO2022State state
Definition: streamio.h:65
const tmbchar * ctmbstr
Definition: tidyplatform.h:594
uint nl
Definition: streamio.h:107
unsigned short firstlastpos
Definition: streamio.h:74
int encoding
Definition: streamio.h:77
IOType
Definition: streamio.h:24
Definition: streamio.h:45
Definition: streamio.h:48
ISO2022State
Definition: streamio.h:42
Definition: streamio.h:49
TidyInputSource source
Definition: streamio.h:80
Definition: streamio.h:27
Bool pushed
Definition: streamio.h:66
This type defines an input source capable of delivering raw bytes of input.
Definition: tidy.h:1078
Definition: streamio.h:63
int curline
Definition: streamio.h:76
Bool
Definition: tidyplatform.h:631
Definition: streamio.h:46
unsigned short curlastpos
Definition: streamio.h:73
Definition: streamio.h:103
Definition: streamio.h:47
int lastcols[LASTPOS_SIZE]
Definition: streamio.h:72
int curcol
Definition: streamio.h:75
does standard C I/O
uint bufsize
Definition: streamio.h:70
Definition: streamio.h:44
unsigned int uint
Definition: tidyplatform.h:554
This type defines an output destination capable of accepting raw bytes of output. ...
Definition: tidy.h:1128
Definition: streamio.h:58
TidyOutputSink sink
Definition: streamio.h:109
Definition: streamio.h:26
Definition: streamio.h:28
TidyAllocator * allocator
Definition: streamio.h:67
int tabs
Definition: streamio.h:71
#define TY_(str)
Definition: forward.h:23
uint tchar
Definition: tidyplatform.h:590
IOType iotype
Definition: streamio.h:78