| 1 |
Ddoc |
|---|
| 2 |
|
|---|
| 3 |
$(COMMUNITY D Strings vs C++ Strings, |
|---|
| 4 |
|
|---|
| 5 |
|
|---|
| 6 |
Why have strings built-in to the core language of D rather than entirely in |
|---|
| 7 |
a library as in C++ Strings? What's the point? Where's the improvement? |
|---|
| 8 |
|
|---|
| 9 |
<h4>Concatenation Operator</h4> |
|---|
| 10 |
|
|---|
| 11 |
$(P C++ Strings are stuck with overloading existing operators. The |
|---|
| 12 |
obvious choice for concatenation is += and +. |
|---|
| 13 |
But someone just looking at the code will see + and think "addition". |
|---|
| 14 |
He'll have to look up the types (and types are frequently buried |
|---|
| 15 |
behind multiple typedef's) to see that it's a string type, and |
|---|
| 16 |
it's not adding strings but concatenating them. |
|---|
| 17 |
) |
|---|
| 18 |
$(P Additionally, if one has an array of floats, is $(SINGLEQUOTE +) overloaded to |
|---|
| 19 |
be the same as a vector addition, or an array concatenation? |
|---|
| 20 |
) |
|---|
| 21 |
$(P In D, these problems are avoided by introducing a new binary |
|---|
| 22 |
operator ~ as the concatenation operator. It works with |
|---|
| 23 |
arrays (of which strings are a subset). ~= is the corresponding |
|---|
| 24 |
append operator. ~ on arrays of floats would concatenate them, |
|---|
| 25 |
+ would imply a vector add. Adding a new operator makes it possible |
|---|
| 26 |
for orthogonality and consistency in the treatment of arrays. |
|---|
| 27 |
(In D, strings are simply arrays of characters, not a special |
|---|
| 28 |
type.) |
|---|
| 29 |
) |
|---|
| 30 |
|
|---|
| 31 |
<h4>Interoperability With C String Syntax</h4> |
|---|
| 32 |
|
|---|
| 33 |
$(P Overloading of operators only really works if one of the operands |
|---|
| 34 |
is overloadable. So the C++ string class cannot consistently |
|---|
| 35 |
handle arbitrary expressions containing strings. Consider: |
|---|
| 36 |
) |
|---|
| 37 |
|
|---|
| 38 |
$(CCODE |
|---|
| 39 |
const char abc[5] = "world"; |
|---|
| 40 |
string str = "hello" + abc; |
|---|
| 41 |
) |
|---|
| 42 |
|
|---|
| 43 |
$(P That isn't going to work. But it does work when the core language |
|---|
| 44 |
knows about strings: |
|---|
| 45 |
) |
|---|
| 46 |
|
|---|
| 47 |
$(CCODE |
|---|
| 48 |
const char[5] abc = "world"; |
|---|
| 49 |
char[] str = "hello" ~ abc; |
|---|
| 50 |
) |
|---|
| 51 |
|
|---|
| 52 |
<h4>Consistency With C String Syntax</h4> |
|---|
| 53 |
|
|---|
| 54 |
$(P |
|---|
| 55 |
There are three ways to find the length of a string in C++: |
|---|
| 56 |
) |
|---|
| 57 |
|
|---|
| 58 |
$(CCODE |
|---|
| 59 |
const char abc[] = "world"; : sizeof(abc)/sizeof(abc[0])-1 |
|---|
| 60 |
: strlen(abc) |
|---|
| 61 |
string str; : str.length() |
|---|
| 62 |
) |
|---|
| 63 |
|
|---|
| 64 |
$(P |
|---|
| 65 |
That kind of inconsistency makes it hard to write generic templates. |
|---|
| 66 |
Consider D: |
|---|
| 67 |
) |
|---|
| 68 |
|
|---|
| 69 |
----------------------- |
|---|
| 70 |
char[5] abc = "world"; : abc.length |
|---|
| 71 |
char[] str : str.length |
|---|
| 72 |
----------------------- |
|---|
| 73 |
|
|---|
| 74 |
<h4>Checking For Empty Strings</h4> |
|---|
| 75 |
|
|---|
| 76 |
$(P |
|---|
| 77 |
C++ strings use a function to determine if a string is empty: |
|---|
| 78 |
) |
|---|
| 79 |
|
|---|
| 80 |
$(CCODE |
|---|
| 81 |
string str; |
|---|
| 82 |
if (str.empty()) |
|---|
| 83 |
// string is empty |
|---|
| 84 |
) |
|---|
| 85 |
|
|---|
| 86 |
$(P |
|---|
| 87 |
In D, an empty string has zero length: |
|---|
| 88 |
) |
|---|
| 89 |
|
|---|
| 90 |
----------------------- |
|---|
| 91 |
char[] str; |
|---|
| 92 |
if (!str.length) |
|---|
| 93 |
// string is empty |
|---|
| 94 |
----------------------- |
|---|
| 95 |
|
|---|
| 96 |
|
|---|
| 97 |
<h4>Resizing Existing String</h4> |
|---|
| 98 |
|
|---|
| 99 |
$(P |
|---|
| 100 |
C++ handles this with the resize() member function: |
|---|
| 101 |
) |
|---|
| 102 |
|
|---|
| 103 |
$(CCODE |
|---|
| 104 |
string str; |
|---|
| 105 |
str.resize(newsize); |
|---|
| 106 |
) |
|---|
| 107 |
|
|---|
| 108 |
$(P |
|---|
| 109 |
D takes advantage of knowing that str is an array, and |
|---|
| 110 |
so resizing it is just changing the length property: |
|---|
| 111 |
) |
|---|
| 112 |
|
|---|
| 113 |
----------------------- |
|---|
| 114 |
char[] str; |
|---|
| 115 |
str.length = newsize; |
|---|
| 116 |
----------------------- |
|---|
| 117 |
|
|---|
| 118 |
<h4>Slicing a String</h4> |
|---|
| 119 |
|
|---|
| 120 |
$(P |
|---|
| 121 |
C++ slices an existing string using a special constructor: |
|---|
| 122 |
) |
|---|
| 123 |
|
|---|
| 124 |
$(CCODE |
|---|
| 125 |
string s1 = "hello world"; |
|---|
| 126 |
string s2(s1, 6, 5); // s2 is "world" |
|---|
| 127 |
) |
|---|
| 128 |
|
|---|
| 129 |
$(P |
|---|
| 130 |
D has the array slice syntax, not possible with C++: |
|---|
| 131 |
) |
|---|
| 132 |
|
|---|
| 133 |
----------------------- |
|---|
| 134 |
string s1 = "hello world"; |
|---|
| 135 |
string s2 = s1[6 .. 11]; // s2 is "world" |
|---|
| 136 |
----------------------- |
|---|
| 137 |
|
|---|
| 138 |
$(P |
|---|
| 139 |
Slicing, of course, works with any array in D, not just strings. |
|---|
| 140 |
) |
|---|
| 141 |
|
|---|
| 142 |
<h4>Copying a String</h4> |
|---|
| 143 |
|
|---|
| 144 |
$(P |
|---|
| 145 |
C++ copies strings with the replace function: |
|---|
| 146 |
) |
|---|
| 147 |
|
|---|
| 148 |
$(CCODE |
|---|
| 149 |
string s1 = "hello world"; |
|---|
| 150 |
string s2 = "goodbye "; |
|---|
| 151 |
s2.replace(8, 5, s1, 6, 5); // s2 is "goodbye world" |
|---|
| 152 |
) |
|---|
| 153 |
|
|---|
| 154 |
$(P |
|---|
| 155 |
D uses the slice syntax as an lvalue: |
|---|
| 156 |
) |
|---|
| 157 |
|
|---|
| 158 |
----------------------- |
|---|
| 159 |
char[] s1 = "hello world".dup; |
|---|
| 160 |
char[] s2 = "goodbye ".dup; |
|---|
| 161 |
s2[8..13] = s1[6..11]; // s2 is "goodbye world" |
|---|
| 162 |
----------------------- |
|---|
| 163 |
|
|---|
| 164 |
$(P The $(CODE .dup) is needed because string literals are |
|---|
| 165 |
read-only in D, the $(CODE .dup) will create a copy |
|---|
| 166 |
that is writable. |
|---|
| 167 |
) |
|---|
| 168 |
|
|---|
| 169 |
|
|---|
| 170 |
<h4>Conversions to C Strings</h4> |
|---|
| 171 |
|
|---|
| 172 |
$(P |
|---|
| 173 |
This is needed for compatibility with C API's. In C++, this |
|---|
| 174 |
uses the c_str() member function: |
|---|
| 175 |
) |
|---|
| 176 |
|
|---|
| 177 |
$(CCODE |
|---|
| 178 |
void foo(const char *); |
|---|
| 179 |
string s1; |
|---|
| 180 |
foo(s1.c_str()); |
|---|
| 181 |
) |
|---|
| 182 |
|
|---|
| 183 |
$(P |
|---|
| 184 |
In D, strings can be converted to char* using the .ptr property: |
|---|
| 185 |
) |
|---|
| 186 |
|
|---|
| 187 |
----------------------- |
|---|
| 188 |
void foo(char*); |
|---|
| 189 |
char[] s1; |
|---|
| 190 |
foo(s1.ptr); |
|---|
| 191 |
----------------------- |
|---|
| 192 |
$(P although for this to work where $(TT foo) expects a 0 terminated |
|---|
| 193 |
string, $(TT s1) must have a terminating 0. Alternatively, the |
|---|
| 194 |
function $(TT std.string.toStringz) will ensure it:) |
|---|
| 195 |
|
|---|
| 196 |
----------------------- |
|---|
| 197 |
void foo(char*); |
|---|
| 198 |
char[] s1; |
|---|
| 199 |
foo(std.string.$(B toStringz)(s1)); |
|---|
| 200 |
----------------------- |
|---|
| 201 |
|
|---|
| 202 |
|
|---|
| 203 |
<h4>Array Bounds Checking</h4> |
|---|
| 204 |
|
|---|
| 205 |
$(P |
|---|
| 206 |
In C++, string array bounds checking for [] is not done. |
|---|
| 207 |
In D, array bounds checking is on by default and it can be turned off |
|---|
| 208 |
with a compiler switch after the program is debugged. |
|---|
| 209 |
) |
|---|
| 210 |
|
|---|
| 211 |
<h4>String Switch Statements</h4> |
|---|
| 212 |
|
|---|
| 213 |
$(P |
|---|
| 214 |
Are not possible in C++, nor is there any way to add them |
|---|
| 215 |
by adding more to the library. In D, they take the obvious |
|---|
| 216 |
syntactical forms: |
|---|
| 217 |
) |
|---|
| 218 |
|
|---|
| 219 |
----------------------- |
|---|
| 220 |
switch (str) |
|---|
| 221 |
{ |
|---|
| 222 |
case "hello": |
|---|
| 223 |
case "world": |
|---|
| 224 |
... |
|---|
| 225 |
} |
|---|
| 226 |
----------------------- |
|---|
| 227 |
|
|---|
| 228 |
$(P |
|---|
| 229 |
where str can be any of literal "string"s, fixed string arrays |
|---|
| 230 |
like char[10], or dynamic strings like char[]. A quality implementation |
|---|
| 231 |
can, of course, explore many strategies of efficiently implementing |
|---|
| 232 |
this based on the contents of the case strings. |
|---|
| 233 |
) |
|---|
| 234 |
|
|---|
| 235 |
<h4>Filling a String</h4> |
|---|
| 236 |
|
|---|
| 237 |
$(P |
|---|
| 238 |
In C++, this is done with the replace() member function: |
|---|
| 239 |
) |
|---|
| 240 |
|
|---|
| 241 |
$(CCODE |
|---|
| 242 |
string str = "hello"; |
|---|
| 243 |
str.replace(1,2,2,'?'); // str is "h??lo" |
|---|
| 244 |
) |
|---|
| 245 |
|
|---|
| 246 |
$(P |
|---|
| 247 |
In D, use the array slicing syntax in the natural manner: |
|---|
| 248 |
) |
|---|
| 249 |
|
|---|
| 250 |
----------------------- |
|---|
| 251 |
char[5] str = "hello"; |
|---|
| 252 |
str[1..3] = '?'; // str is "h??lo" |
|---|
| 253 |
----------------------- |
|---|
| 254 |
|
|---|
| 255 |
<h4>Value vs Reference</h4> |
|---|
| 256 |
|
|---|
| 257 |
$(P |
|---|
| 258 |
C++ strings, as implemented by STLport, are by value and are |
|---|
| 259 |
0-terminated. [The latter is an implementation choice, but |
|---|
| 260 |
STLport seems to be the most popular implementation.] |
|---|
| 261 |
This, coupled with no garbage collection, has |
|---|
| 262 |
some consequences. First of all, any string created must make |
|---|
| 263 |
its own copy of the string data. The $(SINGLEQUOTE owner) of the string |
|---|
| 264 |
data must be kept track of, because when the owner is deleted |
|---|
| 265 |
all references become invalid. If one tries to avoid the |
|---|
| 266 |
dangling reference problem by treating strings as value types, |
|---|
| 267 |
there will be a lot of overhead of memory allocation, |
|---|
| 268 |
data copying, and memory deallocation. Next, the 0-termination |
|---|
| 269 |
implies that strings cannot refer to other strings. String |
|---|
| 270 |
data in the data segment, stack, etc., cannot |
|---|
| 271 |
be referred to. |
|---|
| 272 |
) |
|---|
| 273 |
|
|---|
| 274 |
$(P |
|---|
| 275 |
D strings are reference types, and the memory is garbage collected. |
|---|
| 276 |
This means that only references need to be copied, not the |
|---|
| 277 |
string data. D strings can refer to data in the static data |
|---|
| 278 |
segment, data on the stack, data inside other strings, objects, |
|---|
| 279 |
file buffers, etc. There's no need to keep track of the $(SINGLEQUOTE owner) |
|---|
| 280 |
of the string data. |
|---|
| 281 |
) |
|---|
| 282 |
|
|---|
| 283 |
$(P |
|---|
| 284 |
The obvious question is if multiple D strings refer to the same |
|---|
| 285 |
string data, what happens if the data is modified? All the |
|---|
| 286 |
references will now point to the modified data. This can have |
|---|
| 287 |
its own consequences, which can be avoided if the copy-on-write |
|---|
| 288 |
convention is followed. All copy-on-write is is that if |
|---|
| 289 |
a string is written to, an actual copy of the string data is made |
|---|
| 290 |
first. |
|---|
| 291 |
) |
|---|
| 292 |
|
|---|
| 293 |
$(P |
|---|
| 294 |
The result of D strings being reference only and garbage collected |
|---|
| 295 |
is that code that does a lot of string manipulating, such as |
|---|
| 296 |
an lzw compressor, can be a lot more efficient in terms of both |
|---|
| 297 |
memory consumption and speed. |
|---|
| 298 |
) |
|---|
| 299 |
|
|---|
| 300 |
<h2>Benchmark</h2> |
|---|
| 301 |
|
|---|
| 302 |
$(P |
|---|
| 303 |
Let's take a look at a small utility, wordcount, that counts up |
|---|
| 304 |
the frequency of each word in a text file. In D, it looks like this: |
|---|
| 305 |
) |
|---|
| 306 |
|
|---|
| 307 |
----------------------- |
|---|
| 308 |
import std.file; |
|---|
| 309 |
import std.stdio; |
|---|
| 310 |
|
|---|
| 311 |
int main (char[][] args) |
|---|
| 312 |
{ |
|---|
| 313 |
int w_total; |
|---|
| 314 |
int l_total; |
|---|
| 315 |
int c_total; |
|---|
| 316 |
int[char[]] dictionary; |
|---|
| 317 |
|
|---|
| 318 |
writefln(" lines words bytes file"); |
|---|
| 319 |
for (int i = 1; i < args.length; ++i) |
|---|
| 320 |
{ |
|---|
| 321 |
char[] input; |
|---|
| 322 |
int w_cnt, l_cnt, c_cnt; |
|---|
| 323 |
int inword; |
|---|
| 324 |
int wstart; |
|---|
| 325 |
|
|---|
| 326 |
input = cast(char[])std.file.read(args[i]); |
|---|
| 327 |
|
|---|
| 328 |
for (int j = 0; j < input.length; j++) |
|---|
| 329 |
{ char c; |
|---|
| 330 |
|
|---|
| 331 |
c = input[j]; |
|---|
| 332 |
if (c == '\n') |
|---|
| 333 |
++l_cnt; |
|---|
| 334 |
if (c >= '0' && c <= '9') |
|---|
| 335 |
{ |
|---|
| 336 |
} |
|---|
| 337 |
else if (c >= 'a' && c <= 'z' || |
|---|
| 338 |
c >= 'A' && c <= 'Z') |
|---|
| 339 |
{ |
|---|
| 340 |
if (!inword) |
|---|
| 341 |
{ |
|---|
| 342 |
wstart = j; |
|---|
| 343 |
inword = 1; |
|---|
| 344 |
++w_cnt; |
|---|
| 345 |
} |
|---|
| 346 |
} |
|---|
| 347 |
else if (inword) |
|---|
| 348 |
{ char[] word = input[wstart .. j]; |
|---|
| 349 |
|
|---|
| 350 |
dictionary[word]++; |
|---|
| 351 |
inword = 0; |
|---|
| 352 |
} |
|---|
| 353 |
++c_cnt; |
|---|
| 354 |
} |
|---|
| 355 |
if (inword) |
|---|
| 356 |
{ char[] w = input[wstart .. input.length]; |
|---|
| 357 |
dictionary[w]++; |
|---|
| 358 |
} |
|---|
| 359 |
writefln("%8s%8s%8s %s", l_cnt, w_cnt, c_cnt, args[i]); |
|---|
| 360 |
l_total += l_cnt; |
|---|
| 361 |
w_total += w_cnt; |
|---|
| 362 |
c_total += c_cnt; |
|---|
| 363 |
} |
|---|
| 364 |
|
|---|
| 365 |
if (args.length > 2) |
|---|
| 366 |
{ |
|---|
| 367 |
writefln("--------------------------------------%8s%8s%8s total", |
|---|
| 368 |
l_total, w_total, c_total); |
|---|
| 369 |
} |
|---|
| 370 |
|
|---|
| 371 |
writefln("--------------------------------------"); |
|---|
| 372 |
|
|---|
| 373 |
foreach (char[] word1; dictionary.keys.sort) |
|---|
| 374 |
{ |
|---|
| 375 |
writefln("%3d %s", dictionary[word1], word1); |
|---|
| 376 |
} |
|---|
| 377 |
return 0; |
|---|
| 378 |
} |
|---|
| 379 |
----------------------- |
|---|
| 380 |
|
|---|
| 381 |
$(P (An $(LINK2 wc.html, alternate implementation) that |
|---|
| 382 |
uses buffered file I/O to handle larger files.)) |
|---|
| 383 |
|
|---|
| 384 |
$(P |
|---|
| 385 |
Two people have written C++ implementations using the C++ standard |
|---|
| 386 |
template library, |
|---|
| 387 |
<a href="http://groups.google.com/groups?q=g:thl953709878d&dq=&hl=en&lr=&ie=UTF-8&oe=UTF-8&selm=bjacrl%244un%2401%241%40news.t-online.com">wccpp1</a> |
|---|
| 388 |
and |
|---|
| 389 |
$(LINK2 #wccpp2, wccpp2). |
|---|
| 390 |
The input file |
|---|
| 391 |
$(LINK2 http://www.gutenberg.org/dirs/etext91/alice30.txt, alice30.txt) |
|---|
| 392 |
is the text of "Alice in Wonderland." |
|---|
| 393 |
The D compiler, |
|---|
| 394 |
<a HREF="http://ftp.digitalmars.com/dmd.zip" title="download D compiler">dmd</a>, |
|---|
| 395 |
and the C++ compiler, |
|---|
| 396 |
<a HREF="http://ftp.digitalmars.com/dmc.zip" title="download dmc.zip">dmc</a>, |
|---|
| 397 |
share the same |
|---|
| 398 |
optimizer and code generator, which provides a more apples to |
|---|
| 399 |
apples comparison of the efficiency of the semantics of the languages |
|---|
| 400 |
rather than the optimization and code generator sophistication. |
|---|
| 401 |
Tests were run on a Win XP machine. dmc uses STLport for the template |
|---|
| 402 |
implementation. |
|---|
| 403 |
) |
|---|
| 404 |
|
|---|
| 405 |
$(TABLE1 |
|---|
| 406 |
$(TR |
|---|
| 407 |
$(TH Program) |
|---|
| 408 |
$(TH Compile) |
|---|
| 409 |
$(TH Compile Time) |
|---|
| 410 |
$(TH Run) |
|---|
| 411 |
$(TH Run Time) |
|---|
| 412 |
) |
|---|
| 413 |
$(TR |
|---|
| 414 |
$(TD D wc) |
|---|
| 415 |
$(TD dmd wc -O -release) |
|---|
| 416 |
$(TD 0.0719) |
|---|
| 417 |
$(TD wc alice30.txt >log) |
|---|
| 418 |
$(TD 0.0326) |
|---|
| 419 |
) |
|---|
| 420 |
$(TR |
|---|
| 421 |
$(TD C++ wccpp1) |
|---|
| 422 |
$(TD dmc wccpp1 -o -I\dm\stlport\stlport) |
|---|
| 423 |
$(TD 2.1917) |
|---|
| 424 |
$(TD wccpp1 alice30.txt >log) |
|---|
| 425 |
$(TD 0.0944) |
|---|
| 426 |
) |
|---|
| 427 |
$(TR |
|---|
| 428 |
$(TD C++ wccpp2) |
|---|
| 429 |
$(TD dmc wccpp2 -o -I\dm\stlport\stlport) |
|---|
| 430 |
$(TD 2.0463) |
|---|
| 431 |
$(TD wccpp2 alice30.txt >log) |
|---|
| 432 |
$(TD 0.1012) |
|---|
| 433 |
) |
|---|
| 434 |
) |
|---|
| 435 |
|
|---|
| 436 |
$(P |
|---|
| 437 |
The following tests were run on linux, again comparing a D compiler |
|---|
| 438 |
($(LINK2 http://home.earthlink.net/~dvdfrdmn/d, gdc)) |
|---|
| 439 |
and a C++ compiler ($(B g++)) that share a common optimizer and |
|---|
| 440 |
code generator. The system is Pentium III 800MHz running RedHat Linux 8.0 |
|---|
| 441 |
and gcc 3.4.2. |
|---|
| 442 |
The Digital Mars D compiler for linux ($(B dmd)) |
|---|
| 443 |
is included for comparison. |
|---|
| 444 |
) |
|---|
| 445 |
|
|---|
| 446 |
|
|---|
| 447 |
$(TABLE1 |
|---|
| 448 |
$(TR |
|---|
| 449 |
$(TH Program) |
|---|
| 450 |
$(TH Compile) |
|---|
| 451 |
$(TH Compile Time) |
|---|
| 452 |
$(TH Run) |
|---|
| 453 |
$(TH Run Time) |
|---|
| 454 |
) |
|---|
| 455 |
$(TR |
|---|
| 456 |
$(TD D wc) |
|---|
| 457 |
$(TD gdc -O2 -frelease -o wc wc.d) |
|---|
| 458 |
$(TD 0.326) |
|---|
| 459 |
$(TD wc alice30.txt > /dev/null) |
|---|
| 460 |
$(TD 0.041) |
|---|
| 461 |
) |
|---|
| 462 |
$(TR |
|---|
| 463 |
$(TD D wc) |
|---|
| 464 |
$(TD dmd wc -O -release) |
|---|
| 465 |
$(TD 0.235) |
|---|
| 466 |
$(TD wc alice30.txt > /dev/null) |
|---|
| 467 |
$(TD 0.041) |
|---|
| 468 |
) |
|---|
| 469 |
$(TR |
|---|
| 470 |
$(TD C++ wccpp1) |
|---|
| 471 |
$(TD g++ -O2 -o wccpp1 wccpp1.cc) |
|---|
| 472 |
$(TD 2.874) |
|---|
| 473 |
$(TD wccpp1 alice30.txt > /dev/null) |
|---|
| 474 |
$(TD 0.086) |
|---|
| 475 |
) |
|---|
| 476 |
$(TR |
|---|
| 477 |
$(TD C++ wccpp2) |
|---|
| 478 |
$(TD g++ -O2 -o wccpp2 wccpp2.cc) |
|---|
| 479 |
$(TD 2.886) |
|---|
| 480 |
$(TD wccpp2 alice30.txt > /dev/null) |
|---|
| 481 |
$(TD 0.095) |
|---|
| 482 |
) |
|---|
| 483 |
) |
|---|
| 484 |
|
|---|
| 485 |
$(P |
|---|
| 486 |
These tests compare gdc with g++ on a PowerMac G5 2x2.0GHz |
|---|
| 487 |
running MacOS X 10.3.5 and gcc 3.4.2. (Timings are a little |
|---|
| 488 |
less accurate.) |
|---|
| 489 |
) |
|---|
| 490 |
|
|---|
| 491 |
$(TABLE1 |
|---|
| 492 |
$(TR |
|---|
| 493 |
$(TH Program) |
|---|
| 494 |
$(TH Compile) |
|---|
| 495 |
$(TH Compile Time) |
|---|
| 496 |
$(TH Run) |
|---|
| 497 |
$(TH Run Time) |
|---|
| 498 |
) |
|---|
| 499 |
$(TR |
|---|
| 500 |
$(TD D wc) |
|---|
| 501 |
$(TD gdc -O2 -frelease -o wc wc.d) |
|---|
| 502 |
$(TD 0.28) |
|---|
| 503 |
$(TD wc alice30.txt > /dev/null) |
|---|
| 504 |
$(TD 0.03) |
|---|
| 505 |
) |
|---|
| 506 |
$(TR |
|---|
| 507 |
$(TD C++ wccpp1) |
|---|
| 508 |
$(TD g++ -O2 -o wccpp1 wccpp1.cc) |
|---|
| 509 |
$(TD 1.90) |
|---|
| 510 |
$(TD wccpp1 alice30.txt > /dev/null) |
|---|
| 511 |
$(TD 0.07) |
|---|
| 512 |
) |
|---|
| 513 |
$(TR |
|---|
| 514 |
$(TD C++ wccpp2) |
|---|
| 515 |
$(TD g++ -O2 -o wccpp2 wccpp2.cc) |
|---|
| 516 |
$(TD 1.88) |
|---|
| 517 |
$(TD wccpp2 alice30.txt > /dev/null) |
|---|
| 518 |
$(TD 0.08) |
|---|
| 519 |
) |
|---|
| 520 |
) |
|---|
| 521 |
<hr> |
|---|
| 522 |
<h4><a name="wccpp2">wccpp2 by Allan Odgaard</a></h4> |
|---|
| 523 |
|
|---|
| 524 |
$(CCODE |
|---|
| 525 |
#include <algorithm> |
|---|
| 526 |
#include <cstdio> |
|---|
| 527 |
#include <fstream> |
|---|
| 528 |
#include <iterator> |
|---|
| 529 |
#include <map> |
|---|
| 530 |
#include <vector> |
|---|
| 531 |
|
|---|
| 532 |
bool isWordStartChar (char c) { return isalpha(c); } |
|---|
| 533 |
bool isWordEndChar (char c) { return !isalnum(c); } |
|---|
| 534 |
|
|---|
| 535 |
int main (int argc, char const* argv[]) |
|---|
| 536 |
{ |
|---|
| 537 |
using namespace std; |
|---|
| 538 |
printf("Lines Words Bytes File:\n"); |
|---|
| 539 |
|
|---|
| 540 |
map<string, int> dict; |
|---|
| 541 |
int tLines = 0, tWords = 0, tBytes = 0; |
|---|
| 542 |
for(int i = 1; i < argc; i++) |
|---|
| 543 |
{ |
|---|
| 544 |
ifstream file(argv[i]); |
|---|
| 545 |
istreambuf_iterator<char> from(file.rdbuf()), to; |
|---|
| 546 |
vector<char> v(from, to); |
|---|
| 547 |
vector<char>::iterator first = v.begin(), last = v.end(), bow, eow; |
|---|
| 548 |
|
|---|
| 549 |
int numLines = count(first, last, '\n'); |
|---|
| 550 |
int numWords = 0; |
|---|
| 551 |
int numBytes = last - first; |
|---|
| 552 |
|
|---|
| 553 |
for(eow = first; eow != last; ) |
|---|
| 554 |
{ |
|---|
| 555 |
bow = find_if(eow, last, isWordStartChar); |
|---|
| 556 |
eow = find_if(bow, last, isWordEndChar); |
|---|
| 557 |
if(bow != eow) |
|---|
| 558 |
++dict[string(bow, eow)], ++numWords; |
|---|
| 559 |
} |
|---|
| 560 |
|
|---|
| 561 |
printf("%5d %5d %5d %s\n", numLines, numWords, numBytes, argv[i]); |
|---|
| 562 |
|
|---|
| 563 |
tLines += numLines; |
|---|
| 564 |
tWords += numWords; |
|---|
| 565 |
tBytes += numBytes; |
|---|
| 566 |
} |
|---|
| 567 |
|
|---|
| 568 |
if(argc > 2) |
|---|
| 569 |
printf("-----------------------\n%5d %5d %5d\n", tLines, tWords, tBytes); |
|---|
| 570 |
printf("-----------------------\n\n"); |
|---|
| 571 |
|
|---|
| 572 |
for(map<string, int>::const_iterator it = dict.begin(); it != dict.end(); ++it) |
|---|
| 573 |
printf("%5d %s\n", it->second, it->first.c_str()); |
|---|
| 574 |
|
|---|
| 575 |
return 0; |
|---|
| 576 |
} |
|---|
| 577 |
) |
|---|
| 578 |
|
|---|
| 579 |
) |
|---|
| 580 |
|
|---|
| 581 |
Macros: |
|---|
| 582 |
TITLE=D Strings vs C++ Strings |
|---|
| 583 |
WIKI=CPPstrings |
|---|