HTML Tidy  5.8.0
The HTACG Tidy HTML Project
streamio.h
Go to the documentation of this file.
1 #ifndef __STREAMIO_H__
2 #define __STREAMIO_H__
3 
4 /* streamio.h -- handles character stream I/O
5 
6  (c) 1998-2007 (W3C) MIT, ERCIM, Keio University
7  See tidy.h for the copyright notice.
8 
9  Wrapper around Tidy input source and output sink
10  that calls appropriate interfaces, and applies
11  necessary char encoding transformations: to/from
12  ISO-10646 and/or UTF-8.
13 
14 */
15 
16 #include "forward.h"
17 #include "tidybuffio.h"
18 #include "fileio.h"
19 
20 #ifdef __cplusplus
21 extern "C"
22 {
23 #endif
24 typedef enum
25 {
28  UserIO
30 
31 /* states for ISO 2022
32 
33  A document in ISO-2022 based encoding uses some ESC sequences called
34  "designator" to switch character sets. The designators defined and
35  used in ISO-2022-JP are:
36 
37  "ESC" + "(" + ? for ISO646 variants
38 
39  "ESC" + "$" + ? and
40  "ESC" + "$" + "(" + ? for multibyte character sets
41 */
42 typedef enum
43 {
51 
52 /************************
53 ** Source
54 ************************/
55 
56 enum
57 {
59  LASTPOS_SIZE=64
60 };
61 
62 /* non-raw input is cleaned up*/
63 struct _StreamIn
64 {
65  ISO2022State state; /* FSM for ISO2022 */
67  TidyAllocator *allocator;
71  int tabs;
73  unsigned short curlastpos; /* current last position in lastcols */
74  unsigned short firstlastpos; /* first valid last position in lastcols */
75  int curcol;
76  int curline;
77  int encoding;
79 
81 
82  /* Pointer back to document for error reporting */
83  TidyDocImpl* doc;
84 };
85 
86 TY_PRIVATE StreamIn* TY_(initStreamIn)( TidyDocImpl* doc, int encoding );
87 TY_PRIVATE void TY_(freeStreamIn)(StreamIn* in);
88 
89 TY_PRIVATE StreamIn* TY_(FileInput)( TidyDocImpl* doc, FILE* fp, int encoding );
90 TY_PRIVATE StreamIn* TY_(BufferInput)( TidyDocImpl* doc, TidyBuffer* content, int encoding );
91 TY_PRIVATE StreamIn* TY_(UserInput)( TidyDocImpl* doc, TidyInputSource* source, int encoding );
92 
93 TY_PRIVATE int TY_(ReadBOMEncoding)(StreamIn *in);
94 TY_PRIVATE uint TY_(ReadChar)( StreamIn* in );
95 TY_PRIVATE void TY_(UngetChar)( uint c, StreamIn* in );
96 TY_PRIVATE Bool TY_(IsEOF)( StreamIn* in );
97 
98 
99 /************************
100 ** Sink
101 ************************/
102 
104 {
105  int encoding;
106  ISO2022State state; /* for ISO 2022 */
110 };
111 
112 TY_PRIVATE StreamOut* TY_(FileOutput)( TidyDocImpl *doc, FILE* fp, int encoding, uint newln );
113 TY_PRIVATE StreamOut* TY_(BufferOutput)( TidyDocImpl *doc, TidyBuffer* buf, int encoding, uint newln );
114 TY_PRIVATE StreamOut* TY_(UserOutput)( TidyDocImpl *doc, TidyOutputSink* sink, int encoding, uint newln );
115 
116 TY_PRIVATE StreamOut* TY_(StdErrOutput)(void);
117 /* StreamOut* StdOutOutput(void); */
118 TY_PRIVATE void TY_(ReleaseStreamOut)( TidyDocImpl *doc, StreamOut* out );
119 
120 TY_PRIVATE void TY_(WriteChar)( uint c, StreamOut* out );
121 TY_PRIVATE void TY_(outBOM)( StreamOut *out );
122 
123 TY_PRIVATE ctmbstr TY_(GetEncodingNameFromTidyId)(uint id);
124 TY_PRIVATE ctmbstr TY_(GetEncodingOptNameFromTidyId)(uint id);
125 TY_PRIVATE int TY_(GetCharEncodingFromOptName)(ctmbstr charenc);
126 
127 /************************
128 ** Misc
129 ************************/
130 
131 /* character encodings
132 */
133 #define RAW 0
134 #define ASCII 1
135 #define LATIN0 2
136 #define LATIN1 3
137 #define UTF8 4
138 #define ISO2022 5
139 #define MACROMAN 6
140 #define WIN1252 7
141 #define IBM858 8
142 #define UTF16LE 9
143 #define UTF16BE 10
144 #define UTF16 11
145 #define BIG5 12
146 #define SHIFTJIS 13
147 
148 /* Function for conversion from Windows-1252 to Unicode */
149 TY_PRIVATE uint TY_(DecodeWin1252)(uint c);
150 
151 /* Function to convert from MacRoman to Unicode */
152 TY_PRIVATE uint TY_(DecodeMacRoman)(uint c);
153 
154 #ifdef __cplusplus
155 }
156 #endif
157 
158 
159 /* Use numeric constants as opposed to escape chars (\r, \n)
160 ** to avoid conflict Mac compilers that may re-define these.
161 */
162 #define CR 0xD
163 #define LF 0xA
164 
165 #if defined(MAC_OS_CLASSIC)
166 # define DEFAULT_NL_CONFIG TidyCR
167 #elif defined(_WIN32) || defined(OS2_OS)
168 # define DEFAULT_NL_CONFIG TidyCRLF
169 #else
170 # define DEFAULT_NL_CONFIG TidyLF
171 #endif
172 
173 
174 #endif /* __STREAMIO_H__ */
does standard C I/O
#define TY_PRIVATE
Definition: forward.h:29
#define TY_(str)
Definition: forward.h:23
This type defines an input source capable of delivering raw bytes of input.
Definition: tidy.h:1079
This type defines an output destination capable of accepting raw bytes of output.
Definition: tidy.h:1129
IOType
Definition: streamio.h:25
@ FileIO
Definition: streamio.h:26
@ UserIO
Definition: streamio.h:28
@ BufferIO
Definition: streamio.h:27
int curline
Definition: streamio.h:76
uint bufsize
Definition: streamio.h:70
int curcol
Definition: streamio.h:75
unsigned short firstlastpos
Definition: streamio.h:74
uint bufpos
Definition: streamio.h:69
uint nl
Definition: streamio.h:107
int lastcols[LASTPOS_SIZE]
Definition: streamio.h:72
Bool pushed
Definition: streamio.h:66
tchar * charbuf
Definition: streamio.h:68
IOType iotype
Definition: streamio.h:78
TidyInputSource source
Definition: streamio.h:80
unsigned short curlastpos
Definition: streamio.h:73
TidyOutputSink sink
Definition: streamio.h:109
TidyAllocator * allocator
Definition: streamio.h:67
@ CHARBUF_SIZE
Definition: streamio.h:58
@ LASTPOS_SIZE
Definition: streamio.h:59
ISO2022State state
Definition: streamio.h:65
int encoding
Definition: streamio.h:77
int tabs
Definition: streamio.h:71
TidyDocImpl * doc
Definition: streamio.h:83
ISO2022State
Definition: streamio.h:43
@ FSM_ESCD
Definition: streamio.h:46
@ FSM_ESC
Definition: streamio.h:45
@ FSM_ESCP
Definition: streamio.h:48
@ FSM_NONASCII
Definition: streamio.h:49
@ FSM_ESCDP
Definition: streamio.h:47
@ FSM_ASCII
Definition: streamio.h:44
Definition: streamio.h:64
Definition: streamio.h:104
Treat buffer as a stream that Tidy can use for I/O operations.
Bool
Definition: tidyplatform.h:647
uint tchar
Definition: tidyplatform.h:605
unsigned int uint
Definition: tidyplatform.h:569
const tmbchar * ctmbstr
Definition: tidyplatform.h:609