root/trunk/docsrc/ctod.dd

Revision 1257, 37.4 kB (checked in by walter, 2 years ago)

update

  • Property svn:eol-style set to native
Line 
1 Ddoc
2
3 $(COMMUNITY Programming in D for C Programmers,
4
5 $(BLOCKQUOTE
6 Et tu, D? Then fall, C! -- William Nerdspeare
7 )
8
9 <img src="c1.gif" border=0 align=right alt="ouch!">
10
11 $(P Every experienced C programmer accumulates a series of idioms and techniques
12 which become second nature. Sometimes, when learning a new language, those
13 idioms can be so comfortable it's hard to see how to do the equivalent in the
14 new language. So here's a collection of common C techniques, and how to do the
15 corresponding task in D.
16 )
17
18 $(P Since C does not have object-oriented features, there's a separate section
19 for object-oriented issues
20 <a href="cpptod.html">Programming in D for C++ Programmers</a>.
21 )
22
23 $(P The C preprocessor is covered in
24 $(LINK2 pretod.html, The C Preprocessor vs D).
25 )
26
27 $(UL
28     $(LI $(LINK2 #sizeof, Getting the Size of a Type))
29     $(LI $(LINK2 #maxmin, Get the max and min values of a type))
30     $(LI $(LINK2 #types, Primitive Types))
31     $(LI $(LINK2 #floating, Special Floating Point Values))
32     $(LI $(LINK2 #modulus, Remainder after division of floating point numbers))
33     $(LI $(LINK2 #nans, Dealing with NANs in floating point compares))
34     $(LI $(LINK2 #assert, Asserts))
35     $(LI $(LINK2 #arrayinit, Initializing all elements of an array))
36     $(LI $(LINK2 #arrayloop, Looping through an array))
37     $(LI $(LINK2 #arraycreate, Creating an array of variable size))
38     $(LI $(LINK2 #strcat, String Concatenation))
39     $(LI $(LINK2 #printf, Formatted printing))
40     $(LI $(LINK2 #forwardfunc, Forward referencing functions))
41     $(LI $(LINK2 #funcvoid, Functions that have no arguments))
42     $(LI $(LINK2 #labeledbreak, Labeled break and continue statements))
43     $(LI $(LINK2 #goto, Goto Statements))
44     $(LI $(LINK2 #tagspace, Struct tag name space))
45     $(LI $(LINK2 #stringlookup, Looking up strings))
46     $(LI $(LINK2 #align, Setting struct member alignment))
47     $(LI $(LINK2 #anonymous, Anonymous Structs and Unions))
48     $(LI $(LINK2 #declaring, Declaring struct types and variables))
49     $(LI $(LINK2 #fieldoffset, Getting the offset of a struct member))
50     $(LI $(LINK2 #unioninit, Union initializations))
51     $(LI $(LINK2 #structinit, Struct initializations))
52     $(LI $(LINK2 #arrayinit2, Array initializations))
53     $(LI $(LINK2 #stringlit, Escaped String Literals))
54     $(LI $(LINK2 #ascii, Ascii vs Wide Characters))
55     $(LI $(LINK2 #arrayenum, Arrays that parallel an enum))
56     $(LI $(LINK2 #typedefs, Creating a new type with typedef))
57     $(LI $(LINK2 #structcmp, Comparing structs))
58     $(LI $(LINK2 #stringcmp, Comparing strings))
59     $(LI $(LINK2 #sort, Sorting arrays))
60     $(LI $(LINK2 #volatile, Volatile memory access))
61     $(LI $(LINK2 #strings, String literals))
62     $(LI $(LINK2 #traversal, Data Structure Traversal))
63     $(LI $(LINK2 #ushr, Unsigned Right Shift))
64     $(LI $(LINK2 #closures, Dynamic Closures))
65     $(LI $(LINK2 #variadic, Variadic Function Parameters))
66 )
67
68 <hr><!-- -------------------------------------------- -->
69 <h3><a name="sizeof">Getting the Size of a Type</a></h3>
70
71 <h4>The C Way</h4>
72
73 $(CCODE
74 sizeof(int)
75 sizeof(char *)
76 sizeof(double)
77 sizeof(struct Foo)
78 )
79
80 <h4>The D Way</h4>
81
82 <P>Use the size property:</P>
83
84 ----------------------------
85 int.sizeof
86 (char *).sizeof
87 double.sizeof
88 Foo.sizeof
89 ----------------------------
90
91 <hr><!-- ============================================ -->
92 <h3><a name="maxmin">Get the max and min values of a type</a></h3>
93
94 <h4>The C Way</h4>
95
96 $(CCODE
97 #include &lt;limits.h&gt;
98 #include &lt;math.h&gt;
99
100 CHAR_MAX
101 CHAR_MIN
102 ULONG_MAX
103 DBL_MIN
104 )
105
106 <h4>The D Way</h4>
107
108 ----------------------------
109 char.max
110 char.min
111 ulong.max
112 double.min
113 ----------------------------
114
115 <hr><!-- ============================================ -->
116 <h3><a name="types">Primitive Types</a></h3>
117
118 <h4>C to D types</h4>
119
120 $(CCODE
121 bool               =&gt;        bit
122 char               =&gt;        char
123 signed char        =&gt;        byte
124 unsigned char      =&gt;        ubyte
125 short              =&gt;        short
126 unsigned short     =&gt;        ushort
127 wchar_t            =&gt;        wchar
128 int                =&gt;        int
129 unsigned           =&gt;        uint
130 long               =&gt;        int
131 unsigned long      =&gt;        uint
132 long long          =&gt;        long
133 unsigned long long =&gt;        ulong
134 float              =&gt;        float
135 double             =&gt;        double
136 long double        =&gt;        real
137 _Imaginary long double =&gt;    ireal
138 _Complex long double   =&gt;    creal
139 )
140 <p>
141        Although char is an unsigned 8 bit type, and
142        wchar is an unsigned 16 bit type, they have their own separate types
143        in order to aid overloading and type safety.
144 <p>
145        Ints and unsigneds in C are of varying size; not so in D.
146
147 <hr><!-- ============================================ -->
148 <h3><a name="floating">Special Floating Point Values</a></h3>
149
150 <h4>The C Way</h4>
151
152 $(CCODE
153 #include &lt;fp.h&gt;
154
155 NAN
156 INFINITY
157
158 #include &lt;float.h&gt;
159
160 DBL_DIG
161 DBL_EPSILON
162 DBL_MANT_DIG
163 DBL_MAX_10_EXP
164 DBL_MAX_EXP
165 DBL_MIN_10_EXP
166 DBL_MIN_EXP
167 )
168
169 <h4>The D Way</h4>
170
171 ----------------------------
172 double.nan
173 double.infinity
174 double.dig
175 double.epsilon
176 double.mant_dig
177 double.max_10_exp
178 double.max_exp
179 double.min_10_exp
180 double.min_exp
181 ----------------------------
182
183 <hr><!-- ============================================ -->
184 <h3><a name="modulus">Remainder after division of floating point numbers</a></h3>
185
186 <h4>The C Way</h4>
187
188 $(CCODE
189 #include &lt;math.h&gt;
190
191 float f = fmodf(x,y);
192 double d = fmod(x,y);
193 long double r = fmodl(x,y);
194 )
195
196 <h4>The D Way</h4>
197
198 D supports the remainder ('%') operator on floating point operands:
199
200 ----------------------------
201 float f = x % y;
202 double d = x % y;
203 real r = x % y;
204 ----------------------------
205
206 <hr><!-- ============================================ -->
207 <h3><a name="nans">Dealing with NANs in floating point compares</a></h3>
208
209 <h4>The C Way</h4>
210
211        C doesn't define what happens if an operand to a compare
212        is NAN, and few C compilers check for it (the Digital Mars
213        C compiler is an exception, DM's compilers do check for NAN operands).
214
215 $(CCODE
216 #include &lt;math.h&gt;
217
218 if (isnan(x) || isnan(y))
219    result = FALSE;
220 else
221    result = (x &lt; y);
222 )
223
224 <h4>The D Way</h4>
225
226        D offers a full complement of comparisons and operators
227        that work with NAN arguments.
228
229 ----------------------------
230 result = (x < y);        // false if x or y is nan
231 ----------------------------
232
233 <hr><!-- ============================================ -->
234 <h3><a name="assert">Asserts are a necessary part of any good defensive coding strategy</a></h3>
235
236 <h4>The C Way</h4>
237 <p>
238 C doesn't directly support assert, but does support __FILE__
239 and __LINE__ from which an assert macro can be built. In fact,
240 there appears to be practically no other use for __FILE__ and __LINE__.
241
242 $(CCODE
243 #include &lt;assert.h&gt;
244
245 assert(e == 0);
246 )
247
248 <h4>The D Way</h4>
249
250 D simply builds assert into the language:
251
252 ----------------------------
253 assert(e == 0);
254 ----------------------------
255
256 <hr><!-- ============================================ -->
257 <h3><a name="arrayinit">Initializing all elements of an array</a></h3>
258
259 <h4>The C Way</h4>
260
261 $(CCODE
262 #define ARRAY_LENGTH        17
263 int array[ARRAY_LENGTH];
264 for (i = 0; i &lt; ARRAY_LENGTH; i++)
265    array[i] = value;
266 )
267
268 <h4>The D Way</h4>
269
270 ----------------------------
271 int array[17];
272 array[] = value;
273 ----------------------------
274
275 <hr><!-- ============================================ -->
276 <h3><a name="arrayloop">Looping through an array</a></h3>
277
278 <h4>The C Way</h4>
279 <p>
280        The array length is defined separately, or a clumsy
281        sizeof() expression is used to get the length.
282
283 $(CCODE
284 #define ARRAY_LENGTH        17
285 int array[ARRAY_LENGTH];
286 for (i = 0; i &lt; ARRAY_LENGTH; i++)
287    func(array[i]);
288 )
289
290 or:
291
292 $(CCODE
293 int array[17];
294 for (i = 0; i &lt; sizeof(array) / sizeof(array[0]); i++)
295    func(array[i]);
296 )
297
298 <h4>The D Way</h4>
299
300 The length of an array is accessible through the property "length".
301
302 ----------------------------
303 int array[17];
304 for (i = 0; i < array.length; i++)
305    func(array[i]);
306 ----------------------------
307
308 or even better:
309
310 ----------------------------
311 int array[17];
312 foreach (int value; array)
313    func(value);
314 ----------------------------
315
316
317 <hr><!-- ============================================ -->
318 <h3><a name="arraycreate">Creating an array of variable size</a></h3>
319
320 <h4>The C Way</h4>
321
322        C cannot do this with arrays. It is necessary to create a separate
323        variable for the length, and then explicitly manage the size of
324        the array:
325
326 $(CCODE
327 #include &lt;stdlib.h&gt;
328
329 int array_length;
330 int *array;
331 int *newarray;
332
333 newarray = (int *)
334    realloc(array, (array_length + 1) * sizeof(int));
335 if (!newarray)
336    error("out of memory");
337 array = newarray;
338 array[array_length++] = x;
339 )
340
341 <h4>The D Way</h4>
342
343        D supports dynamic arrays, which can be easily resized. D supports
344        all the requisite memory management.
345
346 ----------------------------
347 int[] array;
348
349 array.length = array.length + 1;
350 array[array.length - 1] = x;
351 ----------------------------
352
353 <hr><!-- ============================================ -->
354 <h3><a name="strcat">String Concatenation</a></h3>
355
356 <h4>The C Way</h4>
357
358        There are several difficulties to be resolved, like
359        when can storage be freed, dealing with null pointers,
360        finding the length of the strings, and memory allocation:
361
362 $(CCODE
363 #include &lt;string.h&gt;
364
365 char *s1;
366 char *s2;
367 char *s;
368
369 // Concatenate s1 and s2, and put result in s
370 free(s);
371 s = (char *)malloc((s1 ? strlen(s1) : 0) +
372           (s2 ? strlen(s2) : 0) + 1);
373 if (!s)
374    error("out of memory");
375 if (s1)
376    strcpy(s, s1);
377 else
378    *s = 0;
379 if (s2)
380    strcpy(s + strlen(s), s2);
381
382 // Append "hello" to s
383 char hello[] = "hello";
384 char *news;
385 size_t lens = s ? strlen(s) : 0;
386 news = (char *)
387    realloc(s, (lens + sizeof(hello) + 1) * sizeof(char));
388 if (!news)
389    error("out of memory");
390 s = news;
391 memcpy(s + lens, hello, sizeof(hello));
392 )
393
394 <h4>The D Way</h4>
395
396        D overloads the operators ~ and ~= for char and wchar arrays to mean
397        concatenate and append, respectively:
398
399 ----------------------------
400 char[] s1;
401 char[] s2;
402 char[] s;
403
404 s = s1 ~ s2;
405 s ~= "hello";
406 ----------------------------
407
408 <hr><!-- ============================================ -->
409 <h3><a name="printf">Formatted printing</a></h3>
410
411 <h4>The C Way</h4>
412
413        printf() is the general purpose formatted print routine:
414
415 $(CCODE
416 #include &lt;stdio.h&gt;
417
418 printf("Calling all cars %d times!\n", ntimes);
419 )
420
421 <h4>The D Way</h4>
422
423        What can we say? printf() rules:
424
425 ----------------------------
426 printf("Calling all cars %d times!\n", ntimes);
427 ----------------------------
428
429     writefln() improves on printf() by being type-aware and type-safe:
430
431 -----------------------
432 import std.stdio;
433
434 writefln("Calling all cars %s times!", ntimes);
435 -----------------------
436
437 <hr><!-- ============================================ -->
438 <h3><a name="forwardfunc">Forward referencing functions</a></h3>
439
440 <h4>The C Way</h4>
441
442        Functions cannot be forward referenced. Hence, to call a function
443        not yet encountered in the source file, it is necessary to insert
444        a function declaration lexically preceding the call.
445
446 $(CCODE
447 void forwardfunc();
448
449 void myfunc()
450 {   
451    forwardfunc();
452 }
453
454 void forwardfunc()
455 {   
456    ...
457 }
458 )
459
460 <h4>The D Way</h4>
461
462     The program is looked at as a whole, and so not only is it not
463     necessary to code forward declarations, it is not even allowed!
464     D avoids the tedium and errors associated with writing forward
465     referenced function declarations twice.
466     Functions can be defined in any order.
467
468 ----------------------------
469 void myfunc()
470 {   
471    forwardfunc();
472 }
473
474 void forwardfunc()
475 {   
476    ...
477 }
478 ----------------------------
479
480 <hr><!-- ============================================ -->
481 <h3><a name="funcvoid">Functions that have no arguments</a></h3>
482
483 <h4>The C Way</h4>
484
485 $(CCODE
486 void function(void);
487 )
488
489 <h4>The D Way</h4>
490
491        D is a strongly typed language, so there is no need to explicitly
492        say a function takes no arguments, just don't declare it has having
493        arguments.
494
495 ----------------------------
496 void function()
497 {
498    ...
499 }
500 ----------------------------
501
502 <hr><!-- ============================================ -->
503 <h3><a name="labeledbreak">Labeled break and continue statements</a></h3>
504
505 <h4>The C Way</h4>
506
507        Break and continue statements only apply to the innermost nested loop or
508        switch, so a multilevel break must use a goto:
509
510 $(CCODE
511     for (i = 0; i &lt; 10; i++)
512     {   
513        for (j = 0; j &lt; 10; j++)
514        {   
515        if (j == 3)
516            goto Louter;
517        if (j == 4)
518            goto L2;
519        }
520      L2:
521        ;
522     }
523 Louter:
524     ;
525 )
526
527 <h4>The D Way</h4>
528
529        Break and continue statements can be followed by a label. The label
530        is the label for an enclosing loop or switch, and the break applies
531        to that loop.
532
533 ----------------------------
534 Louter:
535    for (i = 0; i < 10; i++)
536    {   
537        for (j = 0; j < 10; j++)
538        {   
539        if (j == 3)
540            break Louter;
541        if (j == 4)
542            continue Louter;
543        }
544    }
545    // break Louter goes here
546 ----------------------------
547
548 <hr><!-- ============================================ -->
549 <h3><a name="goto">Goto Statements</a></h3>
550
551 <h4>The C Way</h4>
552
553        The much maligned goto statement is a staple for professional C coders.
554        It's
555        necessary to make up for sometimes inadequate control flow statements.
556
557 <h4>The D Way</h4>
558
559        Many C-way goto statements can be eliminated with the D feature of
560        labeled
561        break and continue statements. But D is a practical language for
562        practical
563        programmers who know when the rules need to be broken. So of course D
564        supports goto statements.
565
566 <hr><!-- ============================================ -->
567 <h3><a name="tagspace">Struct tag name space</a></h3>
568
569 <h4>The C Way</h4>
570
571        It's annoying to have to put the struct keyword every time a type is specified,
572        so a common idiom is to use:
573
574 $(CCODE
575 typedef struct ABC { ... } ABC;
576 )
577
578 <h4>The D Way</h4>
579
580        Struct tag names are not in a separate name space, they are in the same name
581        space as ordinary names. Hence:
582
583 ----------------------------
584 struct ABC { ... }
585 ----------------------------
586
587 <hr><!-- ============================================ -->
588 <h3><a name="stringlookup">Looking up strings</a></h3>
589
590 <h4>The C Way</h4>
591
592        Given a string, compare the string against a list of possible
593        values and take action based on which one it is. A typical use
594        for this might be command line argument processing.
595
596 $(CCODE
597 #include &lt;string.h&gt;
598 void dostring(char *s)
599 {   
600    enum Strings { Hello, Goodbye, Maybe, Max };
601    static char *table[] = { "hello", "goodbye", "maybe" };
602    int i;
603
604    for (i = 0; i &lt; Max; i++)
605    {   
606        if (strcmp(s, table[i]) == 0)
607        break;
608    }
609    switch (i)
610    {   
611        case Hello:   ...
612        case Goodbye: ...
613        case Maybe:   ...
614        default:      ...
615    }
616 }
617 )
618
619        The problem with this is trying to maintain 3 parallel data
620        structures, the enum, the table, and the switch cases. If there
621        are a lot of values, the connection between the 3 may not be so
622        obvious when doing maintenance, and so the situation is ripe for
623        bugs.
624
625        Additionally, if the number of values becomes large, a binary or
626        hash lookup will yield a considerable performance increase over
627        a simple linear search. But coding these can be time consuming,
628        and they need to be debugged. It's typical that such just never
629        gets done.
630
631 <h4>The D Way</h4>
632
633        D extends the concept of switch statements to be able to handle
634        strings as well as numbers. Then, the way to code the string
635        lookup becomes straightforward:
636
637 ----------------------------
638 void dostring(char[] s)
639 {   
640    switch (s)
641    {   
642        case "hello":   ...
643        case "goodbye": ...
644        case "maybe":   ...
645        default:        ...
646    }
647 }
648 ----------------------------
649
650        Adding new cases becomes easy. The compiler can be relied on
651        to generate a fast lookup scheme for it, eliminating the bugs
652        and time required in hand-coding one.
653
654 <hr><!-- ============================================ -->
655 <h3><a name="align">Setting struct member alignment</a></h3>
656
657 <h4>The C Way</h4>
658
659        It's done through a command line switch which affects the entire
660        program, and woe results if any modules or libraries didn't get
661        recompiled. To address this, $(TT #pragma)s are used:
662
663 $(CCODE
664 #pragma pack(1)
665 struct ABC
666 {   
667    ...
668 };
669 #pragma pack()
670 )
671
672        But $(TT #pragma)s are nonportable both in theory and in practice from
673        compiler to compiler.
674
675 <h4>The D Way</h4>
676
677     $(P D has a syntax for setting the alignment that is common
678     to all D compilers. The actual alignment done is compatible
679     with the companion C compiler's alignment, for ABI compatibility.
680     To match a particular layout across architectures, use
681     $(TT align(1)) and manually specify it.
682     )
683
684 ----------------------------
685 struct ABC
686 {   
687    int z;               // z is aligned to the default
688
689  align (1) int x;       // x is byte aligned
690  align (4)
691  {   
692    ...                  // declarations in {} are dword aligned
693  }
694  align (2):             // switch to word alignment from here on
695
696    int y;               // y is word aligned
697 }
698 ----------------------------
699
700 <hr><!-- ============================================ -->
701 <h3><a name="anonymous">Anonymous Structs and Unions</a></h3>
702
703 Sometimes, it's nice to control the layout of a struct with nested structs and unions.
704
705 <h4>The C Way</h4>
706
707        C doesn't allow anonymous structs or unions, which means that dummy tag names
708        and dummy members are necessary:
709
710 $(CCODE
711 struct Foo
712 {
713    int i;
714    union Bar
715    {
716       struct Abc { int x; long y; } _abc;
717       char *p;
718    } _bar;
719 };
720
721 #define x _bar._abc.x
722 #define y _bar._abc.y
723 #define p _bar.p
724
725 struct Foo f;
726
727 f.i;
728 f.x;
729 f.y;
730 f.p;
731 )
732
733        Not only is it clumsy, but using macros means a symbolic debugger won't understand
734        what is being done, and the macros have global scope instead of struct scope.
735
736 <h4>The D Way</h4>
737
738        Anonymous structs and unions are used to control the layout in a
739        more natural manner:
740
741 ----------------------------
742 struct Foo
743 {
744    int i;
745    union
746    {
747       struct { int x; long y; }
748       char* p;
749    }
750 }
751
752 Foo f;
753
754 f.i;
755 f.x;
756 f.y;
757 f.p;
758 ----------------------------
759
760 <hr><!-- ============================================ -->
761 <h3><a name="declaring">Declaring struct types and variables</a></h3>
762
763 <h4>The C Way</h4>
764
765     $(P Is to do it in one statement ending with a semicolon:)
766
767 $(CCODE
768 struct Foo { int x; int y; } foo;
769 )
770
771     $(P Or to separate the two:)
772
773 $(CCODE
774 struct Foo { int x; int y; };   // note terminating ;
775 struct Foo foo;
776 )
777
778 <h4>The D Way</h4>
779
780     $(P Struct definitions and declarations can't be done in the same
781     statement:
782     )
783
784 ----------------------------
785 struct Foo { int x; int y; }    // note there is no terminating ;
786 Foo foo;
787 ----------------------------
788
789     $(P which means that the terminating ; can be dispensed with,
790     eliminating the confusing difference between struct {} and function
791     block {} in how semicolons are used.
792     )
793
794 <hr><!-- ============================================ -->
795 <h3><a name="fieldoffset">Getting the offset of a struct member</a></h3>
796
797 <h4>The C Way</h4>
798
799        Naturally, another macro is used:
800
801 $(CCODE
802 #include &lt;stddef&gt;
803 struct Foo { int x; int y; };
804
805 off = offsetof(Foo, y);
806 )
807
808 <h4>The D Way</h4>
809
810        An offset is just another property:
811
812 ----------------------------
813 struct Foo { int x; int y; }
814
815 off = Foo.y.offsetof;
816 ----------------------------
817
818 <hr><!-- ============================================ -->
819 <h3><a name="unioninit">Union Initializations</a></h3>
820
821 <h4>The C Way</h4>
822
823        Unions are initialized using the "first member" rule:
824
825 $(CCODE
826 union U { int a; long b; };
827 union U x = { 5 };                // initialize member 'a' to 5
828 )
829
830        Adding union members or rearranging them can have disastrous consequences
831        for any initializers.
832
833 <h4>The D Way</h4>
834
835        In D, which member is being initialized is mentioned explicitly:
836
837 ----------------------------
838 union U { int a; long b; }
839 U x = { a:5 };
840 ----------------------------
841
842        avoiding the confusion and maintenance problems.
843
844 <hr><!-- ============================================ -->
845 <h3><a name="structinit">Struct Initializations</a></h3>
846
847 <h4>The C Way</h4>
848
849        Members are initialized by their position within the { }s:
850
851 $(CCODE
852 struct S { int a; int b; };
853 struct S x = { 5, 3 };
854 )
855
856        This isn't much of a problem with small structs, but when there
857        are numerous members, it becomes tedious to get the initializers
858        carefully lined up with the field declarations. Then, if members are
859        added or rearranged, all the initializations have to be found and
860        modified appropriately. This is a minefield for bugs.
861
862 <h4>The D Way</h4>
863
864        Member initialization can be done explicitly:
865
866 ----------------------------
867 struct S { int a; int b; }
868 S x = { b:3, a:5 };
869 ----------------------------
870
871        The meaning is clear, and there no longer is a positional dependence.
872
873 <hr><!-- ============================================ -->
874 <h3><a name="arrayinit2">Array Initializations</a></h3>
875
876 <h4>The C Way</h4>
877
878        C initializes array by positional dependence:
879 $(CCODE
880 int a[3] = { 3,2,2 };
881 )
882        Nested arrays may or may not have the { }:
883 $(CCODE
884 int b[3][2] = { 2,3, {6,5}, 3,4 };
885 )
886
887 <h4>The D Way</h4>
888
889        D does it by positional dependence too, but an index can be used as well.
890        The following all produce the same result:
891
892 ----------------------------
893 int[3] a = [ 3, 2, 0 ];
894 int[3] a = [ 3, 2 ];            // unsupplied initializers are 0, just like in C
895 int[3] a = [ 2:0, 0:3, 1:2 ];
896 int[3] a = [ 2:0, 0:3, 2 ];     // if not supplied, the index is the
897                 // previous one plus one.
898 ----------------------------
899        This can be handy if the array will be indexed by an enum, and the order of
900        enums may be changed or added to:
901
902 ----------------------------
903 enum color { black, red, green }
904 int[3] c = [ black:3, green:2, red:5 ];
905 ----------------------------
906        Nested array initializations must be explicit:
907 ----------------------------
908 int[2][3] b = [ [2,3], [6,5], [3,4] ];
909
910 int[2][3] b = [[2,6,3],[3,5,4]];            // error
911 ----------------------------
912
913 <hr><!-- ============================================ -->
914 <h3><a name="stringlit">Escaped String Literals</a></h3>
915
916 <h4>The C Way</h4>
917
918        C has problems with the DOS file system because a \ is an escape in a string. To specifiy file c:\root\file.c:
919 $(CCODE
920 char file[] = "c:\\root\\file.c";
921 )
922 This gets even more unpleasant with regular expressions.
923 Consider the escape sequence to match a quoted string:
924 $(CCODE
925 /"[^\\]*(\\.[^\\]*)*"/
926 )
927 <P>In C, this horror is expressed as:
928 $(CCODE
929 char quoteString[] = "\"[^\\\\]*(\\\\.[^\\\\]*)*\"";
930 )
931 <h4>The D Way</h4>
932
933     Within strings, it is WYSIWYG (what you see is what you get).
934     Escapes are in separate strings. So:
935
936 ----------------------------
937 char[] file = `c:\root\file.c`;
938 char[] quoteString = \"  r"[^\\]*(\\.[^\\]*)*"  \";
939 ----------------------------
940
941        The famous hello world string becomes:
942 ----------------------------
943 char[] hello = "hello world" \n;
944 ----------------------------
945
946 <hr><!-- ============================================ -->
947 <h3><a name="ascii">Ascii vs Wide Characters</a></h3>
948
949 <P>Modern programming requires that wchar strings be supported in an easy way, for internationalization of the programs.
950
951 <h4>The C Way</h4>
952
953        C uses the wchar_t and the L prefix on strings:
954 $(CCODE
955 #include &lt;wchar.h&gt;
956 char foo_ascii[] = "hello";
957 wchar_t foo_wchar[] = L"hello";
958 )
959 Things get worse if code is written to be both ascii and wchar compatible.
960 A macro is used to switch strings from ascii to wchar:
961 $(CCODE
962 #include &lt;tchar.h&gt;
963 tchar string[] = TEXT("hello");
964 )
965 <h4>The D Way</h4>
966
967 The type of a string is determined by semantic analysis, so there is no need to wrap strings in a macro call:
968 -----------------------------
969 char[] foo_ascii = "hello";        // string is taken to be ascii
970 wchar[] foo_wchar = "hello";       // string is taken to be wchar
971 -----------------------------
972
973 <hr><!-- ============================================ -->
974 <h3><a name="arrayenum">Arrays that parallel an enum</a></h3>
975
976 <h4>The C Way</h4>
977
978        Consider:
979 $(CCODE
980 enum COLORS { red, blue, green, max };
981 char *cstring[max] = {"red", "blue", "green" };
982 )
983        This is fairly easy to get right because the number of entries is small. But suppose it gets to be fairly large. Then it can get difficult to maintain correctly when new entries are added.
984
985 <h4>The D Way</h4>
986 -----------------------------
987 enum COLORS { red, blue, green }
988
989 char[][COLORS.max + 1] cstring =
990 [
991     COLORS.red   : "red",
992     COLORS.blue  : "blue",
993     COLORS.green : "green",
994 ];
995 -----------------------------
996
997 Not perfect, but better.
998
999 <hr><!-- ============================================ -->
1000 <h3><a name="typedefs">Creating a new type with typedef</a></h3>
1001
1002 <h4>The C Way</h4>
1003
1004     Typedefs in C are weak, that is, they really do not introduce
1005     a new type. The compiler doesn't distinguish between a typedef
1006     and its underlying type.
1007
1008 $(CCODE
1009 typedef void *Handle;
1010 void foo(void *);
1011 void bar(Handle);
1012
1013 Handle h;
1014 foo(h);         // coding bug not caught
1015 bar(h);         // ok
1016 )
1017
1018     The C solution is to create a dummy struct whose sole
1019     purpose is to get type checking and overloading on the new type.
1020
1021 $(CCODE
1022 struct Handle__ { void *value; }
1023 typedef struct Handle__ *Handle;
1024 void foo(void *);
1025 void bar(Handle);
1026
1027 Handle h;
1028 foo(h);         // syntax error
1029 bar(h);         // ok
1030 )
1031
1032     Having a default value for the type involves defining a macro,
1033     a naming convention, and then pedantically following that convention:
1034
1035 $(CCODE
1036 #define HANDLE_INIT ((Handle)-1)
1037
1038 Handle h = HANDLE_INIT;
1039 h = func();
1040 if (h != HANDLE_INIT)
1041     ...
1042 )
1043
1044     For the struct solution, things get even more complex:
1045
1046 $(CCODE
1047 struct Handle__ HANDLE_INIT;
1048
1049 void init_handle()  // call this function upon startup
1050 {
1051     HANDLE_INIT.value = (void *)-1;
1052 }
1053
1054 Handle h = HANDLE_INIT;
1055 h = func();
1056 if (memcmp(&h,&HANDLE_INIT,sizeof(Handle)) != 0)
1057     ...
1058 )
1059
1060     There are 4 names to remember: $(TT Handle, HANDLE_INIT,
1061     struct Handle__, value).
1062
1063 <h4>The D Way</h4>
1064
1065     No need for idiomatic constructions like the above. Just write:
1066
1067 -----------------------------
1068 typedef void* Handle;
1069 void foo(void*);
1070 void bar(Handle);
1071
1072 Handle h;
1073 foo(h);
1074 bar(h);
1075 -----------------------------
1076
1077     To handle a default value, add an initializer to the typedef,
1078     and refer to it with the $(TT .init) property:
1079
1080 -----------------------------
1081 typedef void* Handle = cast(void*)(-1);
1082 Handle h;
1083 h = func();
1084 if (h != Handle.init)
1085     ...
1086 -----------------------------
1087
1088     There's only one name to remember: $(TT Handle).
1089
1090 <hr><!-- ============================================ -->
1091 <h3><a name="structcmp">Comparing structs</a></h3>
1092
1093 <h4>The C Way</h4>
1094
1095     While C defines struct assignment in a simple, convenient manner:
1096
1097 $(CCODE
1098 struct A x, y;
1099 ...
1100 x = y;
1101 )
1102
1103     it does not for struct comparisons. Hence, to compare two struct
1104     instances for equality:
1105
1106 $(CCODE
1107 #include &lt;string.h&gt;
1108
1109 struct A x, y;
1110 ...
1111 if (memcmp(&x, &y, sizeof(struct A)) == 0)
1112     ...
1113 )
1114
1115     Note the obtuseness of this, coupled with the lack of any kind
1116     of help from the language with type checking.
1117     <p>
1118
1119     There's a nasty bug lurking in the memcmp().
1120     The layout of a struct, due to alignment, can have 'holes' in it.
1121     C does not guarantee those holes are assigned any values, and so
1122     two different struct instances can have the same value for each member,
1123     but compare different because the holes contain different garbage.
1124
1125 <h4>The D Way</h4>
1126
1127     D does it the obvious, straightforward way:
1128
1129 -----------------------------
1130 A x, y;
1131 ...
1132 if (x == y)
1133     ...
1134 -----------------------------
1135
1136
1137 <hr><!-- ============================================ -->
1138 <h3><a name="stringcmp">Comparing strings</a></h3>
1139
1140 <h4>The C Way</h4>
1141
1142     The library function strcmp() is used:
1143 $(CCODE
1144 char string[] = "hello";
1145
1146 if (strcmp(string, "betty") == 0)   // do strings match?
1147     ...
1148 )
1149
1150     C uses 0 terminated strings, so the C way has an inherent
1151     inefficiency in constantly scanning for the terminating 0.
1152
1153 <h4>The D Way</h4>
1154
1155     Why not use the == operator?
1156
1157 -----------------------------
1158 char[] string = "hello";
1159
1160 if (string == "betty")
1161     ...
1162 -----------------------------
1163
1164     D strings have the length stored separately from the string.
1165     Thus, the implementation of string compares can be much faster
1166     than in C (the difference being equivalent to the difference
1167     in speed between the C memcmp() and strcmp()).
1168     <p>
1169
1170     D supports comparison operators on strings, too:
1171
1172 -----------------------------
1173 char[] string = "hello";
1174
1175 if (string < "betty")
1176     ...
1177 -----------------------------
1178
1179     which is useful for sorting/searching.
1180
1181 <hr><!-- ============================================ -->
1182 <h3><a name="sort">Sorting arrays</a></h3>
1183
1184 <h4>The C Way</h4>
1185
1186     Although many C programmers tend to reimplmement bubble sorts
1187     over and over, the right way to sort in C is to use qsort():
1188
1189 $(CCODE
1190 int compare(const void *p1, const void *p2)
1191 {
1192     type *t1 = (type *)p1;
1193     type *t2 = (type *)p2;
1194
1195     return *t1 - *t2;
1196 }
1197
1198 type array[10];
1199 ...
1200 qsort(array, sizeof(array)/sizeof(array[0]),
1201     sizeof(array[0]), compare);
1202 )
1203
1204     A compare() must be written for each type, and much careful
1205     typo-prone code needs to be written to make it work.
1206
1207
1208 <h4>The D Way</h4>
1209
1210     Sorting couldn't be easier:
1211
1212 -----------------------------
1213 type[] array;
1214 ...
1215 array.sort;      // sort array in-place
1216 -----------------------------
1217
1218 <hr><!-- ============================================ -->
1219 <h3><a name="volatile">Volatile memory access</a></h3>
1220
1221 <h4>The C Way</h4>
1222
1223     To access volatile memory, such as shared memory
1224     or memory mapped I/O, a pointer to volatile is created:
1225 $(CCODE
1226 volatile int *p = address;
1227
1228 i = *p;
1229 )
1230
1231 <h4>The D Way</h4>
1232
1233     D has volatile as a statement type, not as a type modifier:
1234
1235 -----------------------------
1236 int* p = address;
1237
1238 volatile { i = *p; }
1239 -----------------------------
1240
1241 <hr><!-- ============================================ -->
1242 <h3><a name="strings">String literals</a></h3>
1243
1244 <h4>The C Way</h4>
1245
1246     String literals in C cannot span multiple lines, so to have
1247     a block of text it is necessary to use \ line splicing:
1248
1249 $(CCODE
1250 "This text spans\n\
1251 multiple\n\
1252 lines\n"
1253 )
1254
1255     If there is a lot of text, this can wind up being tedious.
1256
1257 <h4>The D Way</h4>
1258
1259     String literals can span multiple lines, as in:
1260
1261 -----------------------------
1262 "This text spans
1263 multiple
1264 lines
1265 "
1266 -----------------------------
1267
1268     So blocks of text can just be cut and pasted into the D
1269     source.
1270
1271 <hr><!-- ============================================ -->
1272 <h3><a name="traversal">Data Structure Traversal</a></h3>
1273
1274 <h4>The C Way</h4>
1275
1276     Consider a function to traverse a recursive data structure.
1277     In this example, there's a simple symbol table of strings.
1278     The data structure is an array of binary trees.
1279     The code needs to do an exhaustive search of it to find
1280     a particular string in it, and determine if it is a unique
1281     instance.
1282     <p>
1283
1284     To make this work, a helper function $(TT membersearchx)
1285     is needed to recursively
1286     walk the trees. The helper function needs to read and write
1287     some context outside of the trees, so a custom $(TT struct Paramblock)
1288     is created and a pointer to it is used to maximize efficiency.
1289
1290 $(CCODE
1291 struct Symbol
1292 {
1293    char *id;
1294    struct Symbol *left;
1295    struct Symbol *right;
1296 };
1297
1298 struct Paramblock
1299 {
1300    char *id;
1301    struct Symbol *sm;
1302 };
1303
1304 static void membersearchx(struct Paramblock *p, struct Symbol *s)
1305 {
1306    while (s)
1307    {
1308       if (strcmp(p->id,s->id) == 0)
1309       {
1310          if (p->sm)
1311             error("ambiguous member %s\n",p->id);
1312          p->sm = s;
1313       }
1314
1315       if (s->left)
1316          membersearchx(p,s->left);
1317       s = s->right;
1318    }
1319 }
1320
1321 struct Symbol *symbol_membersearch(Symbol *table[], int tablemax, char *id)
1322 {
1323    struct Paramblock pb;
1324    int i;
1325
1326    pb.id = id;
1327    pb.sm = NULL;
1328    for (i = 0; i < tablemax; i++)
1329    {
1330       membersearchx(pb, table[i]);
1331    }
1332    return pb.sm;
1333 }
1334 )
1335
1336 <h4>The D Way</h4>
1337
1338     This is the same algorithm in D, and it shrinks dramatically.
1339     Since nested functions have access to the lexically enclosing
1340     function's variables, there's no need for a Paramblock or
1341     to deal with its bookkeeping details. The nested helper function
1342     is contained wholly within the function that needs it,
1343     improving locality and maintainability.
1344     <p>
1345
1346     The performance of the two versions is indistinguishable.
1347
1348 -----------------------------
1349 class Symbol
1350 {   char[] id;
1351     Symbol left;
1352     Symbol right;
1353 }
1354
1355 Symbol symbol_membersearch(Symbol[] table, char[] id)
1356 {   Symbol sm;
1357
1358     void membersearchx(Symbol s)
1359     {
1360     while (s)
1361     {
1362         if (id == s.id)
1363         {
1364         if (sm)
1365             error("ambiguous member %s\n", id);
1366         sm = s;
1367         }
1368
1369         if (s.left)
1370         membersearchx(s.left);
1371         s = s.right;
1372     }
1373     }
1374
1375     for (int i = 0; i < table.length; i++)
1376     {
1377     membersearchx(table[i]);
1378     }
1379     return sm;
1380 }
1381 -----------------------------
1382
1383 <hr><!-- ============================================ -->
1384 <h3><a name="ushr">Unsigned Right Shift</a></h3>
1385
1386 <h4>The C Way</h4>
1387
1388     The right shift operators &gt;&gt; and &gt;&gt;= are signed
1389     shifts if the left operand is a signed integral type, and
1390     are unsigned right shifts if the left operand is an unsigned
1391     integral type. To produce an unsigned right shift on an int,
1392     a cast is necessary:
1393
1394 $(CCODE
1395 int i, j;
1396 ...
1397 j = (unsigned)i >> 3;
1398 )
1399
1400     If $(TT i) is an $(TT int), this works fine. But if $(TT i) is
1401     of a type created with typedef,
1402
1403 $(CCODE
1404 myint i, j;
1405 ...
1406 j = (unsigned)i >> 3;
1407 )
1408
1409     and $(TT myint) happens to be a $(TT long int), then the cast to
1410     unsigned
1411     will silently throw away the most significant bits, corrupting
1412     the answer.
1413
1414 <h4>The D Way</h4>
1415
1416     D has the right shift operators &gt;&gt; and &gt;&gt;= which
1417     behave as they do in C. But D also has explicitly unsigned
1418     right shift operators &gt;&gt;&gt; and &gt;&gt;&gt;= which will
1419     do an unsigned right shift regardless of the sign of the left
1420     operand. Hence,
1421
1422 -----------------------------
1423 myint i, j;
1424 ...
1425 j = i >>> 3;
1426 -----------------------------
1427
1428     avoids the unsafe cast and will work as expected with any integral
1429     type.
1430
1431 <hr><!-- ============================================ -->
1432 <h3><a name="closures">Dynamic Closures</a></h3>
1433
1434 <h4>The C Way</h4>
1435
1436     Consider a reusable container type. In order to be reusable,
1437     it must support a way to apply arbitrary code to each element
1438     of the container. This is done by creating an $(I apply) function
1439     that accepts a function pointer to which is passed each
1440     element of the container contents.
1441     <p>
1442
1443     A generic context pointer is also needed, represented here by
1444     $(TT void *p). The example here is of a trivial container
1445     class that holds an array of ints, and a user of that container
1446     that computes the maximum of those ints.
1447
1448 $(CCODE
1449 void apply(void *p, int *array, int dim, void (*fp)(void *, int))
1450 {
1451     for (int i = 0; i < dim; i++)
1452     fp(p, array[i]);
1453 }
1454
1455 struct Collection
1456 {
1457     int array[10];
1458 };
1459
1460 void comp_max(void *p, int i)
1461 {
1462     int *pmax = (int *)p;
1463
1464     if (i > *pmax)
1465     *pmax = i;
1466 }
1467
1468 void func(struct Collection *c)
1469 {
1470     int max = INT_MIN;
1471
1472     apply(&amp;max, c->array, sizeof(c->array)/sizeof(c->array[0]), comp_max);
1473 }
1474 )
1475
1476     $(P While this works, it isn't very flexible.)
1477
1478 <h4>The D Way</h4>
1479
1480     The D version makes use of $(I delegates) to transmit
1481     context information for the $(I apply) function,
1482     and $(I nested functions) both to capture context
1483     information and to improve locality.
1484
1485 ----------------------------
1486 class Collection
1487 {
1488     int[10] array;
1489
1490     void apply(void delegate(int) fp)
1491     {
1492     for (int i = 0; i < array.length; i++)
1493         fp(array[i]);
1494     }
1495 }
1496
1497 void func(Collection c)
1498 {
1499     int max = int.min;
1500
1501     void comp_max(int i)
1502     {
1503     if (i > max)
1504         max = i;
1505     }
1506
1507     c.apply(comp_max);
1508 }
1509 -----------------------------
1510
1511     Pointers are eliminated, as well as casting and generic
1512     pointers. The D version is fully type safe.
1513     An alternate method in D makes use of $(I function literals):
1514
1515 -----------------------------
1516 void func(Collection c)
1517 {
1518     int max = int.min;
1519
1520     c.apply(delegate(int i) { if (i > max) max = i; } );
1521 }
1522 -----------------------------
1523
1524     eliminating the need to create irrelevant function names.
1525
1526 <hr><!-- ============================================ -->
1527 <h3><a name="variadic">Variadic Function Parameters</a></h3>
1528
1529     The task is to write a function that takes a varying
1530     number of arguments, such as a function that sums
1531     its arguments.
1532
1533 <h4>The C Way</h4>
1534
1535 $(CCODE
1536 #include &lt;stdio.h&gt;
1537 #include &lt;stdarg.h&gt;
1538
1539 int $(B sum)(int dim, ...)
1540 {   int i;
1541     int s = 0;
1542     va_list ap;
1543
1544     va_start(ap, dim);
1545     for (i = 0; i &lt; dim; i++)
1546     s += va_arg(ap, int);
1547     va_end(ap);
1548     return s;
1549 }
1550
1551 int main()
1552 {
1553     int i;
1554
1555     i = $(B sum)(3, 8,7,6);
1556     printf("sum = %d\n", i);
1557
1558     return 0;
1559 }
1560 )
1561
1562     There are two problems with this. The first is that the
1563     $(TT sum) function needs to know how many arguments were
1564     supplied. It has to be explicitly written, and it can get
1565     out of sync with respect to the actual number of arguments
1566     written.
1567     The second is that there's no way to check that the
1568     types of the arguments provided really were ints, and not
1569     doubles, strings, structs, etc.
1570
1571 <h4>The D Way</h4>
1572
1573     The ... following an array parameter declaration means that
1574     the trailing arguments are collected together to form
1575     an array. The arguments are type checked against the array
1576     type, and the number of arguments becomes a property
1577     of the array:
1578
1579 -----------------------------
1580 import std.stdio;
1581
1582 int $(B sum)(int[] values ...)
1583 {
1584     int s = 0;
1585
1586     foreach (int x; values)
1587     s += x;
1588     return s;
1589 }
1590
1591 int main()
1592 {
1593     int i;
1594
1595     i = $(B sum)(8,7,6);
1596     writefln("sum = %d", i);
1597
1598     return 0;
1599 }
1600 -----------------------------
1601 )
1602
1603 Macros:
1604     TITLE=Programming in D for C Programmers
1605     WIKI=ctod
Note: See TracBrowser for help on using the browser.