root/trunk/Utf8_16.h

Revision 5, 3.4 kB (checked in by qbert, 6 years ago)

Initial ( and last :( ) commit

Line 
1 // Utf8_16.h
2 // Copyright (C) 2002 Scott Kirkwood
3 //
4 // Permission to use, copy, modify, distribute and sell this code
5 // and its documentation for any purpose is hereby granted without fee,
6 // provided that the above copyright notice appear in all copies or
7 // any derived copies.  Scott Kirkwood makes no representations
8 // about the suitability of this software for any purpose.
9 // It is provided "as is" without express or implied warranty.
10 //
11 // Notes: Used the UTF information I found at:
12 //   http://www.cl.cam.ac.uk/~mgk25/unicode.html
13 ////////////////////////////////////////////////////////////////////////////////
14
15 #ifndef UTF8_16
16 #define UTF8_16
17
18 #include <stdio.h>
19 #include <assert.h>
20
21 #ifdef _MSC_VER
22 #pragma warning(disable: 4514) // nreferenced inline function has been removed
23 #endif
24
25 class Utf8_16 {
26 public:
27     typedef unsigned short utf16; // 16 bits
28     typedef unsigned char utf8; // 8 bits
29     typedef unsigned char ubyte;
30     enum encodingType {
31         eUnknown,
32         eUtf16BigEndian,
33         eUtf16LittleEndian,  // Default on Windows
34         eUtf8,
35         eLast
36     };
37     static const utf8 k_Boms[eLast][3];
38 };
39
40 // Reads UTF-16 and outputs UTF-8
41 class Utf16_Iter : public Utf8_16 {
42 public:
43     Utf16_Iter();
44     void reset();
45     void set(const ubyte* pBuf, size_t nLen, encodingType eEncoding);
46     utf8 get() const {
47         return m_nCur;
48     }
49     void operator++();
50     operator bool() { return m_pRead <= m_pEnd; }
51
52 protected:
53     void toStart(); // Put to start state, swap bytes if necessary
54     enum eState {
55         eStart,
56         e2Bytes2,
57         e3Bytes2,
58         e3Bytes3
59     };
60 protected:
61     encodingType m_eEncoding;
62     eState m_eState;
63     utf8 m_nCur;
64     utf16 m_nCur16;
65     const ubyte* m_pBuf;
66     const ubyte* m_pRead;
67     const ubyte* m_pEnd;
68 };
69
70 // Reads UTF-8 and outputs UTF-16
71 class Utf8_Iter : public Utf8_16 {
72 public:
73     Utf8_Iter();
74     void reset();
75     void set(const ubyte* pBuf, size_t nLen, encodingType eEncoding);
76     utf16 get() const {
77 #ifdef _DEBUG
78         assert(m_eState == eStart);
79 #endif
80         return m_nCur;
81     }
82     bool canGet() const { return m_eState == eStart; }
83     void operator++();
84     operator bool() { return m_pRead <= m_pEnd; }
85
86 protected:
87     void swap();
88     void toStart(); // Put to start state, swap bytes if necessary
89     enum eState {
90         eStart,
91         e2Bytes_Byte2,
92         e3Bytes_Byte2,
93         e3Bytes_Byte3
94     };
95 protected:
96     encodingType m_eEncoding;
97     eState m_eState;
98     utf16 m_nCur;
99     const ubyte* m_pBuf;
100     const ubyte* m_pRead;
101     const ubyte* m_pEnd;
102 };
103
104 // Reads UTF16 and outputs UTF8
105 class Utf8_16_Read : public Utf8_16 {
106 public:
107     Utf8_16_Read();
108     ~Utf8_16_Read();
109
110     size_t convert(char* buf, size_t len);
111     char* getNewBuf() { return reinterpret_cast<char*>(m_pNewBuf); }
112
113     encodingType getEncoding() const { return m_eEncoding; }
114 protected:
115     int determineEncoding();
116 private:
117     encodingType m_eEncoding;
118     ubyte* m_pBuf;
119     ubyte* m_pNewBuf;
120     size_t m_nBufSize;
121     bool m_bFirstRead;
122     size_t m_nLen;
123     Utf16_Iter m_Iter16;
124 };
125
126 // Read in a UTF-8 buffer and write out to UTF-16 or UTF-8
127 class Utf8_16_Write : public Utf8_16 {
128 public:
129     Utf8_16_Write();
130     ~Utf8_16_Write();
131
132     void setEncoding(encodingType eType);
133
134     FILE * fopen(const char *_name, const char *_type);
135     size_t fwrite(const void* p, size_t _size);
136     void fclose();
137 protected:
138     encodingType m_eEncoding;
139     FILE* m_pFile;
140     utf16* m_pBuf;
141     size_t m_nBufSize;
142     bool m_bFirstWrite;
143 };
144
145
146 #endif
Note: See TracBrowser for help on using the browser.