root/trunk/src/semitwist/util/io.d

Revision 240, 6.6 kB (checked in by Abscissa, 2 years ago)

Fixed for 64bit

  • Property svn:eol-style set to native
Line 
1 // SemiTwist Library
2 // Written in the D programming language.
3
4 module semitwist.util.io;
5
6 import std.traits;
7 import std.path;
8 import std.conv;
9 import std.file;
10 import std.stdio;
11 import std.stream;
12 import std.string;
13 import std.system;
14
15 import semitwist.util.all;
16
17 version(Win32)
18     import std.c.windows.windows;
19 else version(OSX)
20     private extern(C) int _NSGetExecutablePath(char* buf, uint* bufsize);
21 else
22     import std.c.linux.linux;
23
24 /++
25 Reads any type of Unicode/UTF text file (UTF-8, UTF-16, UTF-32, big or little
26 endian), detects BOM, and automatically converts it to native endianness and
27 whatever string type is specified in TOut.
28     
29 Examples:
30     string  utf8  = readUTFFile!string ( "ANY_unicode_file.txt" );
31     wstring utf16 = readUTFFile!wstring( "ANY_unicode_file.txt" );
32     dstring utf32 = readUTFFile!dstring( "ANY_unicode_file.txt" );
33 +/
34 TOut readUTFFile(TOut, TFilename)(TFilename filename)
35     if(isSomeString!TOut && isSomeString!TFilename)
36 {
37     auto data = cast(immutable(ubyte)[])read(filename);
38     return utfConvert!TOut(data);
39 }
40
41 /++
42 Converts any type of Unicode/UTF string with or without a BOM (UTF-8, UTF-16,
43 UTF-32, big or little endian), strips the BOM (if it exists), and automatically
44 converts it to native endianness and whatever string type is specified in TOut.
45
46 If there is no BOM, then UTF-8 is assumed.
47     
48 Examples:
49     string  utf8  = utfConvert!string ( anyUTFDataWithBOM );
50     wstring utf16 = utfConvert!wstring( anyUTFDataWithBOM );
51     dstring utf32 = utfConvert!dstring( anyUTFDataWithBOM );
52 +/
53 TOut utfConvert(TOut, TInChar)(immutable(TInChar)[] data)
54     if( isSomeString!TOut && (isSomeString!(immutable(TInChar)[]) || is(TInChar==ubyte)) )
55 {
56     auto bom = bomOf(cast(immutable(ubyte)[])data);
57     auto bomCode = bomCodeOf(bom);
58    
59     // Strip BOM if it exists
60     if(data.length >= bomCode.length && data[0..bomCode.length] == bomCode)
61         data = data[bomCode.length..$];
62    
63     if(isNonNativeEndian(bom))
64     {
65         auto tempData = data.dup;
66         if(is16Bit(bom))
67             byteSwap16(tempData);
68         else if(is32Bit(bom))
69             byteSwap32(tempData);
70        
71         return to!TOut(tempData);
72     }
73
74     // No references to 'data' are maintained
75     if(is8Bit(bom))
76         return to!TOut(cast(string)data);
77     else if(is16Bit(bom))
78         return to!TOut(cast(wstring)data);
79     else if(is32Bit(bom))
80         return to!TOut(cast(dstring)data);
81     else
82         throw new Exception("Unhandled BOM type '%s'".format(bom));
83 }
84
85 ushort byteSwapVal16(ushort value)
86 {
87     return cast(ushort)( (value >> 8) | ((value & 0x00FF) << 8) );
88 }
89
90 uint byteSwapVal32(uint value)
91 {
92     return
93         (value >> 24) |
94         ((value & 0x00FF_0000) >>  8) |
95         ((value & 0x0000_FF00) <<  8) |
96         ((value & 0x0000_00FF) << 24);
97 }
98
99 private T byteSwap(T)(T value) if(is(T==ushort) || is(T==uint))
100 {
101     static if(is(T==ushort))
102         return byteSwapVal16(value);
103     else static if(is(T==uint))
104         return byteSwapVal32(value);
105     else
106         static assert(0, "T=='"+T.stringof+"' not handled");
107 }
108
109 void byteSwapInPlace(T)(T[] data) if(is(T==ushort) || is(T==uint))
110 {
111     foreach(ref value; data)
112         value = byteSwap(value);
113 }
114
115 private immutable(T)[] byteSwap(T)(immutable(T)[] data) if(is(T==ushort) || is(T==uint))
116 {
117     T[] mutableData = data.dup;
118     byteSwapInPlace(mutableData);
119    
120     // Neither this nor byteSwapInPlace squirrels away a copy
121     return cast(immutable(T)[])mutableData;
122 }
123
124 immutable(T)[] byteSwap16(T)(const(T)[] data)
125 {
126     return cast(immutable(T)[])byteSwap(cast(immutable(ushort)[])data);
127 }
128
129 immutable(T)[] byteSwap32(T)(const(T)[] data)
130 {
131     return cast(immutable(T)[])byteSwap(cast(immutable(uint)[])data);
132 }
133
134 T readStringz(T)(std.stream.File reader) if(isSomeString!T)
135 {
136     Unqual!T str;
137     static if(is(T==string))
138         alias char TElem;
139     else static if(is(T==wstring))
140         alias wchar TElem;
141     else static if(is(T==dstring))
142         alias dchar TElem;
143     else
144         static assert("'"~T.stringof~"' not allowed.");
145        
146     TElem c;
147    
148     do
149     {
150         reader.read(c);
151         str ~= c;
152     } while(c != 0);
153
154     // No references saved, nothing can change it.
155     return cast(T)(str[0..$-1]);
156 }
157
158 //TODO*: Unittest this
159 // This assumes that data is already in native endianness
160 T toEndian(T)(T data, Endian en) if(is(T==ushort) || is(T==uint))
161 {
162     if(en == endian)
163         return data;
164     else
165         return byteSwap(data);
166 }
167
168 /// Gets the full path to the currently running executable,
169 /// regardless of working directory or PATH env var or anything else.
170 /// Note that this is far more accurate and reliable than using args[0].
171 /+FilePath getExecFilePath()
172 {
173     string file = new char[4*1024];
174     int filenameLength;
175     version (Win32)
176         filenameLength = GetModuleFileNameA(null, file.ptr, file.length-1);
177     else version(OSX)
178     {
179         filenameLength = file.length-1;
180         _NSGetExecutablePath(file.ptr, &filenameLength);
181     }
182     else
183         filenameLength = readlink(toStringz(selfExeLink), file.ptr, file.length-1);
184
185     auto fp = new FilePath(file[0..filenameLength]);
186     fp.native();
187     return fp;
188 }+/
189 /// ditto
190 string getExec()
191 {
192     auto file = new char[4*1024];
193     size_t filenameLength;
194     version (Win32)
195         filenameLength = GetModuleFileNameA(null, file.ptr, file.length-1);
196     else version(OSX)
197     {
198         filenameLength = file.length-1;
199         _NSGetExecutablePath(file.ptr, &filenameLength);
200     }
201     else
202         filenameLength = readlink(toStringz(selfExeLink), file.ptr, file.length-1);
203
204     //auto fp = new FilePath(file[0..filenameLength]);
205     return to!string(file[0..filenameLength]);
206 //  return getExecFilePath().toString().trim();
207 }
208
209 /// Like getExec, but doesn't include the path.
210 string getExecName()
211 {
212     return getExec().basename();
213 //  return getExecFilePath().file().trim();
214 }
215
216 /// Like getExec, but only returns the path (including trailing path separator).
217 string getExecPath()
218 {
219     return getExec().dirname() ~ pathSep;
220     //return getExecFilePath().path().trim();
221 }
222
223 mixin(unittestSemiTwistDLib(q{
224     // byteSwap
225     mixin(deferEnsure!(q{ byteSwapVal16(0x1234     ) }, q{ _ == 0x3412      }));
226     mixin(deferEnsure!(q{ byteSwapVal32(0x1234_5678) }, q{ _ == 0x7856_3412 }));
227
228     mixin(deferEnsure!(q{ byteSwap16(cast(immutable(ushort)[])[0x1234, 0x5678, 0x9ABC, 0xDEF0]) }, q{ _ == cast(ushort[])[0x3412, 0x7856, 0xBC9A, 0xF0DE] }));
229     mixin(deferEnsure!(q{ byteSwap32(cast(immutable(uint)[]  )[0x1234____5678, 0x9ABC____DEF0]) }, q{ _ == cast(uint[]  )[0x7856_3412, 0xF0DE_BC9A]       }));
230    
231     // utfConvert
232     mixin(deferEnsure!(q{ utfConvert!string(cast(string)bomCodeOf(semitwist.util.text.BOM.UTF8)~("AB\nCD"~"\r"~"\nEF")) }, q{ _== ("AB\nCD"~"\r"~"\nEF") }));
233     mixin(deferEnsure!(q{ utfConvert!string ("ABCDEF") }, q{ _== ("ABCDEF" ) }));
234     mixin(deferEnsure!(q{ utfConvert!dstring("ABCDEF") }, q{ _== ("ABCDEF"d) }));
235     //TODO: Check into the weird disappearing \r:
236     //mixin(traceVal!(q{ ("AB\nCD"~"\r"~"\nEF").escapeDDQS() }));
237     //mixin(traceVal!(q{ ("AB\nCD"~"\r"~"\nEF").length }));
238     //mixin(traceVal!(q{ utfConvert!string(cast(string)bomCodeOf(semitwist.util.text.BOM.UTF8)~("AB\nCD"~"\r"~"\nEF")).escapeDDQS() }));
239 }));
Note: See TracBrowser for help on using the browser.