| 1 |
/** |
|---|
| 2 |
This module allows the parsing of CSV files and otherwise similar delimited |
|---|
| 3 |
files using alternative delimiters on-the-fly, with O(1) memory usage if the |
|---|
| 4 |
files are processed as they are read from disk. It attempts to handle issues |
|---|
| 5 |
like newlines and delimiters in quoted fields and escaped quotes correctly. |
|---|
| 6 |
|
|---|
| 7 |
There are two recognized ways of escaping a quote in this library: The |
|---|
| 8 |
backslash method (\") and the double-quote ("") method. According to Wikipedia |
|---|
| 9 |
the double-quote method is teh standard. On the other hand, in my observations |
|---|
| 10 |
the backslash method occurs more often in practice. |
|---|
| 11 |
|
|---|
| 12 |
References: |
|---|
| 13 |
|
|---|
| 14 |
http://en.wikipedia.org/wiki/Comma-separated_values |
|---|
| 15 |
|
|---|
| 16 |
Copyright (C) 2011 David Simcha |
|---|
| 17 |
|
|---|
| 18 |
License: |
|---|
| 19 |
|
|---|
| 20 |
Boost Software License - Version 1.0 - August 17th, 2003 |
|---|
| 21 |
|
|---|
| 22 |
Permission is hereby granted, free of charge, to any person or organization |
|---|
| 23 |
obtaining a copy of the software and accompanying documentation covered by |
|---|
| 24 |
this license (the "Software") to use, reproduce, display, distribute, |
|---|
| 25 |
execute, and transmit the Software, and to prepare derivative works of the |
|---|
| 26 |
Software, and to permit third-parties to whom the Software is furnished to |
|---|
| 27 |
do so, all subject to the following: |
|---|
| 28 |
|
|---|
| 29 |
The copyright notices in the Software and this entire statement, including |
|---|
| 30 |
the above license grant, this restriction and the following disclaimer, |
|---|
| 31 |
must be included in all copies of the Software, in whole or in part, and |
|---|
| 32 |
all derivative works of the Software, unless such copies or derivative |
|---|
| 33 |
works are solely in the form of machine-executable object code generated by |
|---|
| 34 |
a source language processor. |
|---|
| 35 |
|
|---|
| 36 |
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|---|
| 37 |
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|---|
| 38 |
FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT |
|---|
| 39 |
SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE |
|---|
| 40 |
FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, |
|---|
| 41 |
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
|---|
| 42 |
DEALINGS IN THE SOFTWARE. |
|---|
| 43 |
*/ |
|---|
| 44 |
module csvrange; |
|---|
| 45 |
|
|---|
| 46 |
import std.stdio, std.array, std.range, std.exception, std.algorithm, std.conv, |
|---|
| 47 |
std.traits, std.string; |
|---|
| 48 |
|
|---|
| 49 |
// Gives a range that may have value semantics reference semantics. |
|---|
| 50 |
private final class RefRange(R) |
|---|
| 51 |
{ |
|---|
| 52 |
R _range; |
|---|
| 53 |
|
|---|
| 54 |
this(R range) |
|---|
| 55 |
{ |
|---|
| 56 |
this._range = range; |
|---|
| 57 |
} |
|---|
| 58 |
|
|---|
| 59 |
bool empty() @property |
|---|
| 60 |
{ |
|---|
| 61 |
return _range.empty; |
|---|
| 62 |
} |
|---|
| 63 |
|
|---|
| 64 |
void popFront() |
|---|
| 65 |
{ |
|---|
| 66 |
_range.popFront(); |
|---|
| 67 |
} |
|---|
| 68 |
|
|---|
| 69 |
ElementType!R front() @property |
|---|
| 70 |
{ |
|---|
| 71 |
return _range.front; |
|---|
| 72 |
} |
|---|
| 73 |
} |
|---|
| 74 |
|
|---|
| 75 |
/** |
|---|
| 76 |
Tests whether a type is a ranges of characters. |
|---|
| 77 |
*/ |
|---|
| 78 |
template isCharRange(R) |
|---|
| 79 |
{ |
|---|
| 80 |
enum bool isCharRange = isInputRange!R && is(ElementType!R : dchar); |
|---|
| 81 |
} |
|---|
| 82 |
|
|---|
| 83 |
/** |
|---|
| 84 |
Tests whether a range is a range of ranges of character ranges. |
|---|
| 85 |
*/ |
|---|
| 86 |
template isStringMatrixLike(R) |
|---|
| 87 |
{ |
|---|
| 88 |
enum bool isStringMatrixLike = isInputRange!R |
|---|
| 89 |
&& isInputRange!(ElementType!R) |
|---|
| 90 |
&& isCharRange!(ElementType!(ElementType!R)); |
|---|
| 91 |
} |
|---|
| 92 |
|
|---|
| 93 |
/** |
|---|
| 94 |
This class is an input range that iterates over the columns of a single line of |
|---|
| 95 |
a CSV file. It is meant to be instantiated only by the CsvFile object, |
|---|
| 96 |
and may be recycled when popFront() is called on the CsvFile object. Also, |
|---|
| 97 |
when popFront() is called on this object, the buffer is recycled. For |
|---|
| 98 |
usage details see csvFile(). |
|---|
| 99 |
*/ |
|---|
| 100 |
final class CsvLine(R) if(isCharRange!R) |
|---|
| 101 |
{ |
|---|
| 102 |
private typeof(appender!(char[])())_front; |
|---|
| 103 |
private R _rest; |
|---|
| 104 |
private dchar _delim; |
|---|
| 105 |
private bool _nextEmpty; |
|---|
| 106 |
private bool _empty; |
|---|
| 107 |
|
|---|
| 108 |
private this(R range, dchar delim) |
|---|
| 109 |
{ |
|---|
| 110 |
this._rest = range; |
|---|
| 111 |
this._delim = delim; |
|---|
| 112 |
_front = appender!(char[])(); |
|---|
| 113 |
prime(); |
|---|
| 114 |
} |
|---|
| 115 |
|
|---|
| 116 |
private void reset() |
|---|
| 117 |
{ |
|---|
| 118 |
_nextEmpty = false; |
|---|
| 119 |
_empty = false; |
|---|
| 120 |
prime(); |
|---|
| 121 |
} |
|---|
| 122 |
|
|---|
| 123 |
@property char[] front() |
|---|
| 124 |
{ |
|---|
| 125 |
return _front.data; |
|---|
| 126 |
} |
|---|
| 127 |
|
|---|
| 128 |
@property bool empty() |
|---|
| 129 |
{ |
|---|
| 130 |
return _empty; |
|---|
| 131 |
} |
|---|
| 132 |
|
|---|
| 133 |
void popFront() |
|---|
| 134 |
{ |
|---|
| 135 |
if(_nextEmpty) |
|---|
| 136 |
{ |
|---|
| 137 |
_empty = true; |
|---|
| 138 |
} |
|---|
| 139 |
else |
|---|
| 140 |
{ |
|---|
| 141 |
prime(); |
|---|
| 142 |
} |
|---|
| 143 |
} |
|---|
| 144 |
|
|---|
| 145 |
void prime() |
|---|
| 146 |
{ |
|---|
| 147 |
bool inQuotes = false; |
|---|
| 148 |
bool backslashEscape = false; |
|---|
| 149 |
bool firstChar = true; |
|---|
| 150 |
_front.shrinkTo(0); |
|---|
| 151 |
|
|---|
| 152 |
while(true) |
|---|
| 153 |
{ |
|---|
| 154 |
scope(exit) firstChar = false; |
|---|
| 155 |
|
|---|
| 156 |
if(_rest.empty) |
|---|
| 157 |
{ |
|---|
| 158 |
_nextEmpty = true; |
|---|
| 159 |
return; |
|---|
| 160 |
} |
|---|
| 161 |
|
|---|
| 162 |
dchar cur = _rest.front; |
|---|
| 163 |
if(cur == '"' && !backslashEscape) |
|---|
| 164 |
{ |
|---|
| 165 |
// To allow quotes in quoted fields, the CSV spec allows them |
|---|
| 166 |
// to be escaped with a second quote. |
|---|
| 167 |
if(inQuotes) |
|---|
| 168 |
{ |
|---|
| 169 |
// Handle double quoting. |
|---|
| 170 |
_rest.popFront(); |
|---|
| 171 |
if(_rest.empty) |
|---|
| 172 |
{ |
|---|
| 173 |
_nextEmpty = true; |
|---|
| 174 |
return; |
|---|
| 175 |
} |
|---|
| 176 |
|
|---|
| 177 |
if(_rest.front == '"') |
|---|
| 178 |
{ |
|---|
| 179 |
_front.put('"'); |
|---|
| 180 |
_rest.popFront(); |
|---|
| 181 |
continue; |
|---|
| 182 |
} |
|---|
| 183 |
|
|---|
| 184 |
inQuotes = false; |
|---|
| 185 |
|
|---|
| 186 |
if(_rest.front == _delim) |
|---|
| 187 |
{ |
|---|
| 188 |
_rest.popFront(); |
|---|
| 189 |
return; |
|---|
| 190 |
} |
|---|
| 191 |
else if(_rest.front == '\r' || _rest.front == '\n') |
|---|
| 192 |
{ |
|---|
| 193 |
continue; |
|---|
| 194 |
} |
|---|
| 195 |
else |
|---|
| 196 |
{ |
|---|
| 197 |
enforce(0, "Invalid single quote within quoted field."); |
|---|
| 198 |
} |
|---|
| 199 |
} |
|---|
| 200 |
else |
|---|
| 201 |
{ |
|---|
| 202 |
_rest.popFront(); |
|---|
| 203 |
if(_rest.empty || _rest.front == '\r' || _rest.front == '\n') |
|---|
| 204 |
{ |
|---|
| 205 |
// Assume the quote was meant literally. |
|---|
| 206 |
_front.put('"'); |
|---|
| 207 |
if(_rest.empty) |
|---|
| 208 |
{ |
|---|
| 209 |
_nextEmpty = true; |
|---|
| 210 |
return; |
|---|
| 211 |
} |
|---|
| 212 |
else |
|---|
| 213 |
{ |
|---|
| 214 |
continue; |
|---|
| 215 |
} |
|---|
| 216 |
} |
|---|
| 217 |
else if(_rest.front == '"') |
|---|
| 218 |
{ |
|---|
| 219 |
// Assume the first quote was an escape even though |
|---|
| 220 |
// we're not inside quotes. |
|---|
| 221 |
_front.put('"'); |
|---|
| 222 |
_rest.popFront(); |
|---|
| 223 |
continue; |
|---|
| 224 |
} |
|---|
| 225 |
else if(firstChar) |
|---|
| 226 |
{ |
|---|
| 227 |
inQuotes = true; |
|---|
| 228 |
continue; |
|---|
| 229 |
} |
|---|
| 230 |
else |
|---|
| 231 |
{ |
|---|
| 232 |
// Again, assume it's a literal b/c it's not at |
|---|
| 233 |
// the beginning of a field. |
|---|
| 234 |
_front.put('"'); |
|---|
| 235 |
_rest.popFront(); |
|---|
| 236 |
continue; |
|---|
| 237 |
} |
|---|
| 238 |
} |
|---|
| 239 |
} |
|---|
| 240 |
|
|---|
| 241 |
if(backslashEscape) |
|---|
| 242 |
{ |
|---|
| 243 |
if(cur == '"' || cur == '\\') |
|---|
| 244 |
{ |
|---|
| 245 |
// Then nust turn backslashEscape off and let the normal |
|---|
| 246 |
// processing handle it. |
|---|
| 247 |
backslashEscape = false; |
|---|
| 248 |
} |
|---|
| 249 |
else if(cur == 'n') |
|---|
| 250 |
{ |
|---|
| 251 |
// Then the file was output with newlines written as '\n'. |
|---|
| 252 |
// Substitute a newline. |
|---|
| 253 |
backslashEscape = false; |
|---|
| 254 |
cur = '\n'; |
|---|
| 255 |
// Let normal flow take over. |
|---|
| 256 |
} |
|---|
| 257 |
else |
|---|
| 258 |
{ |
|---|
| 259 |
// Assume it's intended to be taken literally. |
|---|
| 260 |
_front.put('\\'); |
|---|
| 261 |
// Let normal flow take over. |
|---|
| 262 |
} |
|---|
| 263 |
} |
|---|
| 264 |
|
|---|
| 265 |
if(cur == '\\') |
|---|
| 266 |
{ |
|---|
| 267 |
backslashEscape = true; |
|---|
| 268 |
_rest.popFront(); |
|---|
| 269 |
continue; |
|---|
| 270 |
} |
|---|
| 271 |
|
|---|
| 272 |
if(cur == '\n' && !inQuotes) |
|---|
| 273 |
{ |
|---|
| 274 |
_rest.popFront(); |
|---|
| 275 |
_nextEmpty = true; |
|---|
| 276 |
return; |
|---|
| 277 |
} |
|---|
| 278 |
|
|---|
| 279 |
if(cur == '\r' && !inQuotes) |
|---|
| 280 |
{ |
|---|
| 281 |
// Get rid of the \n, too, if there is one, then return. |
|---|
| 282 |
_rest.popFront(); |
|---|
| 283 |
if(!_rest.empty && _rest.front == '\n') |
|---|
| 284 |
{ |
|---|
| 285 |
_rest.popFront(); |
|---|
| 286 |
} |
|---|
| 287 |
|
|---|
| 288 |
_nextEmpty = true; |
|---|
| 289 |
return; |
|---|
| 290 |
} |
|---|
| 291 |
|
|---|
| 292 |
if(cur == _delim && !inQuotes) |
|---|
| 293 |
{ |
|---|
| 294 |
_rest.popFront(); |
|---|
| 295 |
return; |
|---|
| 296 |
} |
|---|
| 297 |
|
|---|
| 298 |
// Finally the typical case: Just append the thing. |
|---|
| 299 |
_front.put(cur); |
|---|
| 300 |
_rest.popFront(); |
|---|
| 301 |
} |
|---|
| 302 |
} |
|---|
| 303 |
} |
|---|
| 304 |
|
|---|
| 305 |
/// |
|---|
| 306 |
struct CsvFile(R) |
|---|
| 307 |
{ |
|---|
| 308 |
private RefRange!R _rest; |
|---|
| 309 |
private bool _empty; |
|---|
| 310 |
|
|---|
| 311 |
alias typeof(_rest) R2; |
|---|
| 312 |
alias CsvLine!(R2) Line; |
|---|
| 313 |
|
|---|
| 314 |
private dchar _delim; |
|---|
| 315 |
Line _front; |
|---|
| 316 |
|
|---|
| 317 |
this(R range, dchar delim) |
|---|
| 318 |
{ |
|---|
| 319 |
this._rest = new RefRange!R(range); |
|---|
| 320 |
this._delim = delim; |
|---|
| 321 |
_front = new Line(_rest, _delim); |
|---|
| 322 |
} |
|---|
| 323 |
|
|---|
| 324 |
bool empty() @property |
|---|
| 325 |
{ |
|---|
| 326 |
return _empty; |
|---|
| 327 |
} |
|---|
| 328 |
|
|---|
| 329 |
void popFront() |
|---|
| 330 |
{ |
|---|
| 331 |
if(_rest.empty) |
|---|
| 332 |
{ |
|---|
| 333 |
_empty = true; |
|---|
| 334 |
return; |
|---|
| 335 |
} |
|---|
| 336 |
|
|---|
| 337 |
// Have to iterate over _front to figure out where the thing ends. |
|---|
| 338 |
while(!_front.empty) |
|---|
| 339 |
{ |
|---|
| 340 |
_front.popFront(); |
|---|
| 341 |
} |
|---|
| 342 |
_front.reset; |
|---|
| 343 |
} |
|---|
| 344 |
|
|---|
| 345 |
Line front() @property |
|---|
| 346 |
{ |
|---|
| 347 |
return _front; |
|---|
| 348 |
} |
|---|
| 349 |
} |
|---|
| 350 |
|
|---|
| 351 |
/** |
|---|
| 352 |
Returns a struct that iterates over the rows of a CSV file (or more generally a |
|---|
| 353 |
similar file with any delimiter character). It is an input range of input |
|---|
| 354 |
ranges. Each call to front() produces a CsvLine object that can be used to |
|---|
| 355 |
iterate over the columns of the current row. Note that the CsvLine object is |
|---|
| 356 |
recycled with every call to popFront(). |
|---|
| 357 |
|
|---|
| 358 |
Examples: |
|---|
| 359 |
--- |
|---|
| 360 |
// charRange must be a range of characters representing a CSV file. |
|---|
| 361 |
auto charRange = getCharRange(); |
|---|
| 362 |
auto csvIter = csvFile(charRange, ','); |
|---|
| 363 |
|
|---|
| 364 |
// Iterate over the lines of the CSV file, excluding line breaks embedded in |
|---|
| 365 |
// quotes. |
|---|
| 366 |
string[] rowArrays; |
|---|
| 367 |
foreach(row; csvIter) |
|---|
| 368 |
{ |
|---|
| 369 |
// Convert the row to an array of strings. |
|---|
| 370 |
rowArrays ~= array( |
|---|
| 371 |
map!"a.idup"( // Necessary because CsvLine recycles buffers. |
|---|
| 372 |
row |
|---|
| 373 |
) |
|---|
| 374 |
); |
|---|
| 375 |
|
|---|
| 376 |
// row is now empty. The next call to csvIter.popFront() will recycle |
|---|
| 377 |
// the object. |
|---|
| 378 |
} |
|---|
| 379 |
--- |
|---|
| 380 |
*/ |
|---|
| 381 |
CsvFile!(R) csvFile(R)(R range, dchar delim) |
|---|
| 382 |
{ |
|---|
| 383 |
return typeof(return)(range, delim); |
|---|
| 384 |
} |
|---|
| 385 |
|
|---|
| 386 |
/// |
|---|
| 387 |
final class CsvStructRange(S, R) |
|---|
| 388 |
{ |
|---|
| 389 |
private: |
|---|
| 390 |
S _front; |
|---|
| 391 |
R _csvRange; |
|---|
| 392 |
size_t[] indices; |
|---|
| 393 |
Malformed _malformed; |
|---|
| 394 |
|
|---|
| 395 |
public: |
|---|
| 396 |
this(R csvRange, string[] colHeaders, Malformed malformed) |
|---|
| 397 |
{ |
|---|
| 398 |
enforce(colHeaders.length <= S.tupleof.length, text( |
|---|
| 399 |
"Can't have ", colHeaders.length, " fields in a ", |
|---|
| 400 |
S.tupleof.length, " field struct.")); |
|---|
| 401 |
|
|---|
| 402 |
this._malformed = malformed; |
|---|
| 403 |
indices.length = colHeaders.length; |
|---|
| 404 |
size_t[string] colToIndex; |
|---|
| 405 |
foreach(i, h; colHeaders) |
|---|
| 406 |
{ |
|---|
| 407 |
colToIndex[h] = size_t.max; |
|---|
| 408 |
} |
|---|
| 409 |
|
|---|
| 410 |
size_t colIndex; |
|---|
| 411 |
foreach(col; csvRange.front) |
|---|
| 412 |
{ |
|---|
| 413 |
auto ptr = col in colToIndex; |
|---|
| 414 |
if(ptr) |
|---|
| 415 |
{ |
|---|
| 416 |
*ptr = colIndex; |
|---|
| 417 |
} |
|---|
| 418 |
|
|---|
| 419 |
colIndex++; |
|---|
| 420 |
} |
|---|
| 421 |
|
|---|
| 422 |
indices.length = colHeaders.length; |
|---|
| 423 |
foreach(i, h; colHeaders) |
|---|
| 424 |
{ |
|---|
| 425 |
immutable index = colToIndex[h]; |
|---|
| 426 |
enforce(index < size_t.max, "Header not found: " ~ h); |
|---|
| 427 |
indices[i] = index; |
|---|
| 428 |
} |
|---|
| 429 |
|
|---|
| 430 |
csvRange.popFront(); |
|---|
| 431 |
this._csvRange = csvRange; |
|---|
| 432 |
prime(); |
|---|
| 433 |
} |
|---|
| 434 |
|
|---|
| 435 |
void prime() |
|---|
| 436 |
{ |
|---|
| 437 |
while(true) |
|---|
| 438 |
{ |
|---|
| 439 |
try |
|---|
| 440 |
{ |
|---|
| 441 |
primeImpl(); |
|---|
| 442 |
return; |
|---|
| 443 |
} |
|---|
| 444 |
catch(Exception e) |
|---|
| 445 |
{ |
|---|
| 446 |
if(_malformed == Malformed.throwException) |
|---|
| 447 |
{ |
|---|
| 448 |
throw e; |
|---|
| 449 |
} |
|---|
| 450 |
else |
|---|
| 451 |
{ |
|---|
| 452 |
_csvRange.popFront(); |
|---|
| 453 |
if(empty) |
|---|
| 454 |
{ |
|---|
| 455 |
return; |
|---|
| 456 |
} |
|---|
| 457 |
else |
|---|
| 458 |
{ |
|---|
| 459 |
continue; |
|---|
| 460 |
} |
|---|
| 461 |
} |
|---|
| 462 |
} |
|---|
| 463 |
} |
|---|
| 464 |
} |
|---|
| 465 |
|
|---|
| 466 |
|
|---|
| 467 |
void primeImpl() |
|---|
| 468 |
{ |
|---|
| 469 |
size_t colIndex; |
|---|
| 470 |
_front = S.init; |
|---|
| 471 |
|
|---|
| 472 |
foreach(colData; _csvRange.front) |
|---|
| 473 |
{ |
|---|
| 474 |
scope(exit) colIndex++; |
|---|
| 475 |
|
|---|
| 476 |
foreach(ti, ToType; typeof(_front.tupleof)) |
|---|
| 477 |
{ |
|---|
| 478 |
|
|---|
| 479 |
if(ti >= indices.length) |
|---|
| 480 |
{ |
|---|
| 481 |
continue; |
|---|
| 482 |
} |
|---|
| 483 |
|
|---|
| 484 |
if(indices[ti] == colIndex) |
|---|
| 485 |
{ |
|---|
| 486 |
static if(isNumeric!(ToType)) |
|---|
| 487 |
{ |
|---|
| 488 |
colData = colData.strip(); |
|---|
| 489 |
} |
|---|
| 490 |
else static if |
|---|
| 491 |
(is(ToType == char[]) || is(ToType == const(char)[]) |
|---|
| 492 |
|| is(ToType == const(char[]))) |
|---|
| 493 |
{ |
|---|
| 494 |
// colData gets overwritten and to!(char[])(char[]) |
|---|
| 495 |
// will not copy it. |
|---|
| 496 |
colData = colData.dup; |
|---|
| 497 |
} |
|---|
| 498 |
|
|---|
| 499 |
_front.tupleof[ti] = to!ToType(colData); |
|---|
| 500 |
} |
|---|
| 501 |
} |
|---|
| 502 |
} |
|---|
| 503 |
} |
|---|
| 504 |
|
|---|
| 505 |
void popFront() |
|---|
| 506 |
{ |
|---|
| 507 |
_csvRange.popFront(); |
|---|
| 508 |
if(!empty) |
|---|
| 509 |
{ |
|---|
| 510 |
prime(); |
|---|
| 511 |
} |
|---|
| 512 |
} |
|---|
| 513 |
|
|---|
| 514 |
@property S front() |
|---|
| 515 |
{ |
|---|
| 516 |
return _front; |
|---|
| 517 |
} |
|---|
| 518 |
|
|---|
| 519 |
@property bool empty() |
|---|
| 520 |
{ |
|---|
| 521 |
return _csvRange.empty; |
|---|
| 522 |
} |
|---|
| 523 |
} |
|---|
| 524 |
|
|---|
| 525 |
/** |
|---|
| 526 |
Determines whether CsvStructRange will ignore malformed lines or throw an |
|---|
| 527 |
exception. |
|---|
| 528 |
*/ |
|---|
| 529 |
enum Malformed |
|---|
| 530 |
{ |
|---|
| 531 |
/// |
|---|
| 532 |
ignore, |
|---|
| 533 |
|
|---|
| 534 |
/// |
|---|
| 535 |
throwException |
|---|
| 536 |
} |
|---|
| 537 |
|
|---|
| 538 |
|
|---|
| 539 |
/** |
|---|
| 540 |
Given a struct type S, a range of characters representing a CSV file, and |
|---|
| 541 |
an array of strings representing relevant column headers, read the CSV |
|---|
| 542 |
file into an array of structs, one for each row. The order of fields in |
|---|
| 543 |
colHeaders must correspond to the order of the fields in the struct. |
|---|
| 544 |
However, it is acceptable for colHeaders to include only a subset of the |
|---|
| 545 |
columns in the CSV file and to only contain information for the first |
|---|
| 546 |
colHeaders.length elements of the struct. |
|---|
| 547 |
|
|---|
| 548 |
If a specified column header is not found, an exception is thrown. |
|---|
| 549 |
|
|---|
| 550 |
All type conversions are done automatically, and if a type is to be converted |
|---|
| 551 |
to a numeric type (an integer or floating point number), leading and trailing |
|---|
| 552 |
whitespace are removed from the cell before this is attempted. The Malformed |
|---|
| 553 |
enum controls whether an exception is thrown when a type conversion fails, |
|---|
| 554 |
or whether the line is simply ignored. The default is to throw an exception. |
|---|
| 555 |
|
|---|
| 556 |
Examples: |
|---|
| 557 |
--- |
|---|
| 558 |
struct Person |
|---|
| 559 |
{ |
|---|
| 560 |
uint programmingSkill; |
|---|
| 561 |
float sleepHours; |
|---|
| 562 |
string favoriteLanguage; |
|---|
| 563 |
float reserved; |
|---|
| 564 |
} |
|---|
| 565 |
|
|---|
| 566 |
auto csvRange = csvFile(getCharRange(), '\t'); |
|---|
| 567 |
auto personRange = csvStructRange!Person(csvRange, |
|---|
| 568 |
["Programming Skill", "Sleep Hours", "Favorite Language"] |
|---|
| 569 |
); |
|---|
| 570 |
|
|---|
| 571 |
foreach(person; personRange) |
|---|
| 572 |
{ |
|---|
| 573 |
if(person.sleepHours > 7 && person.programmingSkill > 8) |
|---|
| 574 |
{ |
|---|
| 575 |
assert(person.favoriteLanguage == "D"); |
|---|
| 576 |
} |
|---|
| 577 |
|
|---|
| 578 |
assert(reserved == float.init); // No header for it, so it's not populated. |
|---|
| 579 |
} |
|---|
| 580 |
--- |
|---|
| 581 |
*/ |
|---|
| 582 |
CsvStructRange!(S, R) csvStructRange(S, R) |
|---|
| 583 |
(R csvRange, string[] colHeaders, Malformed malformed = Malformed.throwException) |
|---|
| 584 |
if(is(S == struct) && isStringMatrixLike!R) |
|---|
| 585 |
{ |
|---|
| 586 |
return new CsvStructRange!(S, R)(csvRange, colHeaders, malformed); |
|---|
| 587 |
} |
|---|
| 588 |
|
|---|
| 589 |
struct S |
|---|
| 590 |
{ |
|---|
| 591 |
uint a; |
|---|
| 592 |
string c; |
|---|
| 593 |
float b; |
|---|
| 594 |
uint dummy; |
|---|
| 595 |
|
|---|
| 596 |
string toString() |
|---|
| 597 |
{ |
|---|
| 598 |
return text(a, '\t', b, '\t', c); |
|---|
| 599 |
} |
|---|
| 600 |
|
|---|
| 601 |
bool opEquals(const ref typeof(this) rhs) const |
|---|
| 602 |
{ |
|---|
| 603 |
return a == rhs.a && b == rhs.b && c == rhs.c; |
|---|
| 604 |
} |
|---|
| 605 |
} |
|---|
| 606 |
|
|---|
| 607 |
unittest |
|---|
| 608 |
{ |
|---|
| 609 |
auto str1 = |
|---|
| 610 |
"a\tb\tc |
|---|
| 611 |
1\t2.5\t\"foo \t stuff \n \"\" bar \"\"\" |
|---|
| 612 |
2\t5\tbar |
|---|
| 613 |
3\t7.5\tbaz |
|---|
| 614 |
"; |
|---|
| 615 |
|
|---|
| 616 |
// Same as str1 but put crazy quoting stuff in middle and use backslash escape |
|---|
| 617 |
// for soem quotes. |
|---|
| 618 |
auto str2 = |
|---|
| 619 |
"a\tc\tb |
|---|
| 620 |
1\t\"foo \t stuff \n \\\" bar \"\"\"\t2.5 |
|---|
| 621 |
2\tbar\t5 |
|---|
| 622 |
3\tbaz\t7.5 |
|---|
| 623 |
"; |
|---|
| 624 |
|
|---|
| 625 |
alias typeof(csvFile(str1, '\t')) Foo; |
|---|
| 626 |
auto res1 = array( |
|---|
| 627 |
csvStructRange!S(csvFile(str1, '\t'), ["a", "c", "b"]) |
|---|
| 628 |
); |
|---|
| 629 |
|
|---|
| 630 |
assert(res1[0] == S(1, "foo \t stuff \n \" bar \"", 2.5)); |
|---|
| 631 |
assert(res1[1] == S(2, "bar", 5)); |
|---|
| 632 |
assert(res1[2] == S(3, "baz", 7.5)); |
|---|
| 633 |
|
|---|
| 634 |
auto res2 = array( |
|---|
| 635 |
csvStructRange!S(csvFile(str2, '\t'), ["a", "c", "b"]) |
|---|
| 636 |
); |
|---|
| 637 |
|
|---|
| 638 |
assert(res1 == res2); |
|---|
| 639 |
} |
|---|
| 640 |
|
|---|
| 641 |
void main(){} |
|---|