| 1 |
// SemiTwist Library |
|---|
| 2 |
// Written in the D programming language. |
|---|
| 3 |
|
|---|
| 4 |
module semitwist.util.io; |
|---|
| 5 |
|
|---|
| 6 |
import std.traits; |
|---|
| 7 |
import std.path; |
|---|
| 8 |
import std.conv; |
|---|
| 9 |
import std.file; |
|---|
| 10 |
import std.stdio; |
|---|
| 11 |
import std.stream; |
|---|
| 12 |
import std.string; |
|---|
| 13 |
import std.system; |
|---|
| 14 |
|
|---|
| 15 |
import semitwist.util.all; |
|---|
| 16 |
|
|---|
| 17 |
version(Win32) |
|---|
| 18 |
import std.c.windows.windows; |
|---|
| 19 |
else version(OSX) |
|---|
| 20 |
private extern(C) int _NSGetExecutablePath(char* buf, uint* bufsize); |
|---|
| 21 |
else |
|---|
| 22 |
import std.c.linux.linux; |
|---|
| 23 |
|
|---|
| 24 |
/++ |
|---|
| 25 |
Reads any type of Unicode/UTF text file (UTF-8, UTF-16, UTF-32, big or little |
|---|
| 26 |
endian), detects BOM, and automatically converts it to native endianness and |
|---|
| 27 |
whatever string type is specified in TOut. |
|---|
| 28 |
|
|---|
| 29 |
Examples: |
|---|
| 30 |
string utf8 = readUTFFile!string ( "ANY_unicode_file.txt" ); |
|---|
| 31 |
wstring utf16 = readUTFFile!wstring( "ANY_unicode_file.txt" ); |
|---|
| 32 |
dstring utf32 = readUTFFile!dstring( "ANY_unicode_file.txt" ); |
|---|
| 33 |
+/ |
|---|
| 34 |
TOut readUTFFile(TOut, TFilename)(TFilename filename) |
|---|
| 35 |
if(isSomeString!TOut && isSomeString!TFilename) |
|---|
| 36 |
{ |
|---|
| 37 |
auto data = cast(immutable(ubyte)[])read(filename); |
|---|
| 38 |
return utfConvert!TOut(data); |
|---|
| 39 |
} |
|---|
| 40 |
|
|---|
| 41 |
/++ |
|---|
| 42 |
Converts any type of Unicode/UTF string with or without a BOM (UTF-8, UTF-16, |
|---|
| 43 |
UTF-32, big or little endian), strips the BOM (if it exists), and automatically |
|---|
| 44 |
converts it to native endianness and whatever string type is specified in TOut. |
|---|
| 45 |
|
|---|
| 46 |
If there is no BOM, then UTF-8 is assumed. |
|---|
| 47 |
|
|---|
| 48 |
Examples: |
|---|
| 49 |
string utf8 = utfConvert!string ( anyUTFDataWithBOM ); |
|---|
| 50 |
wstring utf16 = utfConvert!wstring( anyUTFDataWithBOM ); |
|---|
| 51 |
dstring utf32 = utfConvert!dstring( anyUTFDataWithBOM ); |
|---|
| 52 |
+/ |
|---|
| 53 |
TOut utfConvert(TOut, TInChar)(immutable(TInChar)[] data) |
|---|
| 54 |
if( isSomeString!TOut && (isSomeString!(immutable(TInChar)[]) || is(TInChar==ubyte)) ) |
|---|
| 55 |
{ |
|---|
| 56 |
auto bom = bomOf(cast(immutable(ubyte)[])data); |
|---|
| 57 |
auto bomCode = bomCodeOf(bom); |
|---|
| 58 |
|
|---|
| 59 |
// Strip BOM if it exists |
|---|
| 60 |
if(data.length >= bomCode.length && data[0..bomCode.length] == bomCode) |
|---|
| 61 |
data = data[bomCode.length..$]; |
|---|
| 62 |
|
|---|
| 63 |
if(isNonNativeEndian(bom)) |
|---|
| 64 |
{ |
|---|
| 65 |
auto tempData = data.dup; |
|---|
| 66 |
if(is16Bit(bom)) |
|---|
| 67 |
byteSwap16(tempData); |
|---|
| 68 |
else if(is32Bit(bom)) |
|---|
| 69 |
byteSwap32(tempData); |
|---|
| 70 |
|
|---|
| 71 |
return to!TOut(tempData); |
|---|
| 72 |
} |
|---|
| 73 |
|
|---|
| 74 |
// No references to 'data' are maintained |
|---|
| 75 |
if(is8Bit(bom)) |
|---|
| 76 |
return to!TOut(cast(string)data); |
|---|
| 77 |
else if(is16Bit(bom)) |
|---|
| 78 |
return to!TOut(cast(wstring)data); |
|---|
| 79 |
else if(is32Bit(bom)) |
|---|
| 80 |
return to!TOut(cast(dstring)data); |
|---|
| 81 |
else |
|---|
| 82 |
throw new Exception("Unhandled BOM type '%s'".format(bom)); |
|---|
| 83 |
} |
|---|
| 84 |
|
|---|
| 85 |
ushort byteSwapVal16(ushort value) |
|---|
| 86 |
{ |
|---|
| 87 |
return cast(ushort)( (value >> 8) | ((value & 0x00FF) << 8) ); |
|---|
| 88 |
} |
|---|
| 89 |
|
|---|
| 90 |
uint byteSwapVal32(uint value) |
|---|
| 91 |
{ |
|---|
| 92 |
return |
|---|
| 93 |
(value >> 24) | |
|---|
| 94 |
((value & 0x00FF_0000) >> 8) | |
|---|
| 95 |
((value & 0x0000_FF00) << 8) | |
|---|
| 96 |
((value & 0x0000_00FF) << 24); |
|---|
| 97 |
} |
|---|
| 98 |
|
|---|
| 99 |
private T byteSwap(T)(T value) if(is(T==ushort) || is(T==uint)) |
|---|
| 100 |
{ |
|---|
| 101 |
static if(is(T==ushort)) |
|---|
| 102 |
return byteSwapVal16(value); |
|---|
| 103 |
else static if(is(T==uint)) |
|---|
| 104 |
return byteSwapVal32(value); |
|---|
| 105 |
else |
|---|
| 106 |
static assert(0, "T=='"+T.stringof+"' not handled"); |
|---|
| 107 |
} |
|---|
| 108 |
|
|---|
| 109 |
void byteSwapInPlace(T)(T[] data) if(is(T==ushort) || is(T==uint)) |
|---|
| 110 |
{ |
|---|
| 111 |
foreach(ref value; data) |
|---|
| 112 |
value = byteSwap(value); |
|---|
| 113 |
} |
|---|
| 114 |
|
|---|
| 115 |
private immutable(T)[] byteSwap(T)(immutable(T)[] data) if(is(T==ushort) || is(T==uint)) |
|---|
| 116 |
{ |
|---|
| 117 |
T[] mutableData = data.dup; |
|---|
| 118 |
byteSwapInPlace(mutableData); |
|---|
| 119 |
|
|---|
| 120 |
// Neither this nor byteSwapInPlace squirrels away a copy |
|---|
| 121 |
return cast(immutable(T)[])mutableData; |
|---|
| 122 |
} |
|---|
| 123 |
|
|---|
| 124 |
immutable(T)[] byteSwap16(T)(const(T)[] data) |
|---|
| 125 |
{ |
|---|
| 126 |
return cast(immutable(T)[])byteSwap(cast(immutable(ushort)[])data); |
|---|
| 127 |
} |
|---|
| 128 |
|
|---|
| 129 |
immutable(T)[] byteSwap32(T)(const(T)[] data) |
|---|
| 130 |
{ |
|---|
| 131 |
return cast(immutable(T)[])byteSwap(cast(immutable(uint)[])data); |
|---|
| 132 |
} |
|---|
| 133 |
|
|---|
| 134 |
T readStringz(T)(std.stream.File reader) if(isSomeString!T) |
|---|
| 135 |
{ |
|---|
| 136 |
Unqual!T str; |
|---|
| 137 |
static if(is(T==string)) |
|---|
| 138 |
alias char TElem; |
|---|
| 139 |
else static if(is(T==wstring)) |
|---|
| 140 |
alias wchar TElem; |
|---|
| 141 |
else static if(is(T==dstring)) |
|---|
| 142 |
alias dchar TElem; |
|---|
| 143 |
else |
|---|
| 144 |
static assert("'"~T.stringof~"' not allowed."); |
|---|
| 145 |
|
|---|
| 146 |
TElem c; |
|---|
| 147 |
|
|---|
| 148 |
do |
|---|
| 149 |
{ |
|---|
| 150 |
reader.read(c); |
|---|
| 151 |
str ~= c; |
|---|
| 152 |
} while(c != 0); |
|---|
| 153 |
|
|---|
| 154 |
// No references saved, nothing can change it. |
|---|
| 155 |
return cast(T)(str[0..$-1]); |
|---|
| 156 |
} |
|---|
| 157 |
|
|---|
| 158 |
//TODO*: Unittest this |
|---|
| 159 |
// This assumes that data is already in native endianness |
|---|
| 160 |
T toEndian(T)(T data, Endian en) if(is(T==ushort) || is(T==uint)) |
|---|
| 161 |
{ |
|---|
| 162 |
if(en == endian) |
|---|
| 163 |
return data; |
|---|
| 164 |
else |
|---|
| 165 |
return byteSwap(data); |
|---|
| 166 |
} |
|---|
| 167 |
|
|---|
| 168 |
/// Gets the full path to the currently running executable, |
|---|
| 169 |
/// regardless of working directory or PATH env var or anything else. |
|---|
| 170 |
/// Note that this is far more accurate and reliable than using args[0]. |
|---|
| 171 |
/+FilePath getExecFilePath() |
|---|
| 172 |
{ |
|---|
| 173 |
string file = new char[4*1024]; |
|---|
| 174 |
int filenameLength; |
|---|
| 175 |
version (Win32) |
|---|
| 176 |
filenameLength = GetModuleFileNameA(null, file.ptr, file.length-1); |
|---|
| 177 |
else version(OSX) |
|---|
| 178 |
{ |
|---|
| 179 |
filenameLength = file.length-1; |
|---|
| 180 |
_NSGetExecutablePath(file.ptr, &filenameLength); |
|---|
| 181 |
} |
|---|
| 182 |
else |
|---|
| 183 |
filenameLength = readlink(toStringz(selfExeLink), file.ptr, file.length-1); |
|---|
| 184 |
|
|---|
| 185 |
auto fp = new FilePath(file[0..filenameLength]); |
|---|
| 186 |
fp.native(); |
|---|
| 187 |
return fp; |
|---|
| 188 |
}+/ |
|---|
| 189 |
/// ditto |
|---|
| 190 |
string getExec() |
|---|
| 191 |
{ |
|---|
| 192 |
auto file = new char[4*1024]; |
|---|
| 193 |
size_t filenameLength; |
|---|
| 194 |
version (Win32) |
|---|
| 195 |
filenameLength = GetModuleFileNameA(null, file.ptr, file.length-1); |
|---|
| 196 |
else version(OSX) |
|---|
| 197 |
{ |
|---|
| 198 |
filenameLength = file.length-1; |
|---|
| 199 |
_NSGetExecutablePath(file.ptr, &filenameLength); |
|---|
| 200 |
} |
|---|
| 201 |
else |
|---|
| 202 |
filenameLength = readlink(toStringz(selfExeLink), file.ptr, file.length-1); |
|---|
| 203 |
|
|---|
| 204 |
//auto fp = new FilePath(file[0..filenameLength]); |
|---|
| 205 |
return to!string(file[0..filenameLength]); |
|---|
| 206 |
// return getExecFilePath().toString().trim(); |
|---|
| 207 |
} |
|---|
| 208 |
|
|---|
| 209 |
/// Like getExec, but doesn't include the path. |
|---|
| 210 |
string getExecName() |
|---|
| 211 |
{ |
|---|
| 212 |
return getExec().basename(); |
|---|
| 213 |
// return getExecFilePath().file().trim(); |
|---|
| 214 |
} |
|---|
| 215 |
|
|---|
| 216 |
/// Like getExec, but only returns the path (including trailing path separator). |
|---|
| 217 |
string getExecPath() |
|---|
| 218 |
{ |
|---|
| 219 |
return getExec().dirname() ~ pathSep; |
|---|
| 220 |
//return getExecFilePath().path().trim(); |
|---|
| 221 |
} |
|---|
| 222 |
|
|---|
| 223 |
mixin(unittestSemiTwistDLib(q{ |
|---|
| 224 |
// byteSwap |
|---|
| 225 |
mixin(deferEnsure!(q{ byteSwapVal16(0x1234 ) }, q{ _ == 0x3412 })); |
|---|
| 226 |
mixin(deferEnsure!(q{ byteSwapVal32(0x1234_5678) }, q{ _ == 0x7856_3412 })); |
|---|
| 227 |
|
|---|
| 228 |
mixin(deferEnsure!(q{ byteSwap16(cast(immutable(ushort)[])[0x1234, 0x5678, 0x9ABC, 0xDEF0]) }, q{ _ == cast(ushort[])[0x3412, 0x7856, 0xBC9A, 0xF0DE] })); |
|---|
| 229 |
mixin(deferEnsure!(q{ byteSwap32(cast(immutable(uint)[] )[0x1234____5678, 0x9ABC____DEF0]) }, q{ _ == cast(uint[] )[0x7856_3412, 0xF0DE_BC9A] })); |
|---|
| 230 |
|
|---|
| 231 |
// utfConvert |
|---|
| 232 |
mixin(deferEnsure!(q{ utfConvert!string(cast(string)bomCodeOf(semitwist.util.text.BOM.UTF8)~("AB\nCD"~"\r"~"\nEF")) }, q{ _== ("AB\nCD"~"\r"~"\nEF") })); |
|---|
| 233 |
mixin(deferEnsure!(q{ utfConvert!string ("ABCDEF") }, q{ _== ("ABCDEF" ) })); |
|---|
| 234 |
mixin(deferEnsure!(q{ utfConvert!dstring("ABCDEF") }, q{ _== ("ABCDEF"d) })); |
|---|
| 235 |
//TODO: Check into the weird disappearing \r: |
|---|
| 236 |
//mixin(traceVal!(q{ ("AB\nCD"~"\r"~"\nEF").escapeDDQS() })); |
|---|
| 237 |
//mixin(traceVal!(q{ ("AB\nCD"~"\r"~"\nEF").length })); |
|---|
| 238 |
//mixin(traceVal!(q{ utfConvert!string(cast(string)bomCodeOf(semitwist.util.text.BOM.UTF8)~("AB\nCD"~"\r"~"\nEF")).escapeDDQS() })); |
|---|
| 239 |
})); |
|---|