root/trunk/docsrc/pretod.dd

Revision 2040, 14.6 kB (checked in by walter, 2 years ago)

typography

  • Property svn:eol-style set to native
Line 
1 Ddoc
2
3 $(COMMUNITY The C Preprocessor Versus D,
4
5     $(P Back when C was invented, compiler technology was primitive.
6     Installing a text
7     macro preprocessor onto the front end was a straightforward
8     and easy way to add many
9     powerful features. The increasing size & complexity of programs
10     have illustrated
11     that these features come with many inherent problems.
12     D doesn't have a preprocessor; but
13     D provides a more scalable means to solve the same problems.
14     )
15
16 $(UL
17     $(LI <a href="#headerfiles">Header Files</a>)
18     $(LI <a href="#pragmaonce">#pragma once</a>)
19     $(LI <a href="#pragmapack">#pragma pack</a>)
20     $(LI <a href="#macros">Macros</a>)
21     $(LI <a href="#conditionalcompilation">Conditional Compilation</a>)
22     $(LI <a href="#codefactoring">Code Factoring</a>)
23     $(LI <a href="#staticassert">#error and Static Asserts</a>)
24     $(LI <a href="#mixins">Template Mixins</a>)
25 )
26
27 <hr><!-- -------------------------------------------- -->
28 $(SECTION3 <a name="headerfiles">Header Files</a>,
29
30 $(CWAY
31
32     $(P C and C++ rely heavily on textual inclusion of header files.
33     This frequently results in the compiler having to recompile tens of thousands
34     of lines of code over and over again for every source file, an obvious
35     source of slow compile times. What header files are normally used for is
36     more appropriately done doing a symbolic, rather than textual, insertion.
37     This is done with the import statement. Symbolic inclusion means the compiler
38     just loads an already compiled symbol table. The needs for macro "wrappers" to
39     prevent multiple #inclusion, funky #pragma once syntax, and incomprehensible
40     fragile syntax for precompiled headers are simply unnecessary and irrelevant to
41     D.
42     )
43
44 $(CCODE
45 #include &lt;stdio.h&gt;
46 )
47 )
48
49 $(DWAY
50
51     $(P D uses symbolic imports:)
52
53 ---------
54 import std.c.stdio;
55 ---------
56 )
57 )
58
59 <hr><!-- -------------------------------------------- -->
60 <h3><a name="pragmaonce">#pragma once</a></h3>
61
62 $(CWAY
63
64     $(P C header files frequently need to be protected against
65     being #include'd multiple times.
66     To do it, a header file will contain the line:
67     )
68
69 $(CCODE
70 #pragma once
71 )
72
73     $(P or the more portable:)
74
75 $(CCODE
76 #ifndef __STDIO_INCLUDE
77 #define __STDIO_INCLUDE
78 ... header file contents
79 #endif
80 )
81 )
82
83 $(DWAY
84     $(P Completely unnecessary since D does a symbolic include of import
85     files; they only get imported once no matter how many times
86     the import declaration appears.
87     )
88 )
89
90 <hr><!-- -------------------------------------------- -->
91 <h3><a name="pragmapack">#pragma pack</a></h3>
92
93 $(CWAY
94     $(P This is used in C to adjust the alignment for structs.)
95 )
96
97 $(DWAY
98     $(P For D classes, there is no need to adjust the alignment (in fact, the
99     compiler is free to rearrange the data fields to get the optimum layout,
100     much as the compiler will rearrange local variables on the stack frame).
101     For D structs that get mapped onto externally defined data structures,
102     there is a need, and it is handled with:
103     )
104
105 ---------
106 struct Foo
107 {
108     align (4):  // use 4 byte alignment
109     ...
110 }
111 ---------
112 )
113
114 <hr><!-- -------------------------------------------- -->
115 <h3><a name="macros">Macros</a></h3>
116
117     $(P Preprocessor macros add powerful features and flexibility to C. But
118     they have a downside:
119     )
120
121 $(UL
122     $(LI Macros have no concept of scope; they are valid from the point of definition
123     to the end of the source. They cut a swath across .h files, nested code, etc. When
124     #include'ing tens of thousands of lines of macro definitions, it becomes
125     problematical to avoid inadvertent macro expansions.
126     )
127
128     $(LI Macros are unknown to the debugger. Trying to debug a program with
129     symbolic data is undermined by the debugger only knowing about macro
130     expansions, not the macros themselves.
131     )
132
133     $(LI Macros make it impossible to tokenize source code, as an earlier macro change
134     can arbitrarily redo tokens.
135     )
136
137     $(LI The purely textual basis of macros leads to arbitrary and inconsistent usage,
138     making code using macros error prone. (Some attempt to resolve this was
139     introduced with templates in C++.)
140     )
141
142     $(LI Macros are still used to make up for deficits in the language's expressive
143     capability, such as for "wrappers" around header files.
144     )
145 )
146
147
148     $(P Here's an enumeration of the common uses for macros, and the
149     corresponding feature in D:
150     )
151
152 $(OL
153     $(LI Defining literal constants:
154
155     $(CWAY
156
157 $(CCODE
158 #define VALUE   5
159 )
160     )
161
162     $(DWAY
163
164 ---------
165 const int VALUE = 5;
166 ---------
167     )
168     )
169
170     $(LI Creating a list of values or flags:
171
172     $(CWAY
173
174 $(CCODE
175 int flags:
176 #define FLAG_X  0x1
177 #define FLAG_Y  0x2
178 #define FLAG_Z  0x4
179 ...
180 flags |= FLAG_X;
181 )
182     )
183
184     $(DWAY
185
186 ---------
187 enum FLAGS { X = 0x1, Y = 0x2, Z = 0x4 };
188 FLAGS flags;
189 ...
190 flags |= FLAGS.X;
191 ---------
192     )
193     )
194
195     $(LI Distinguishing between ascii chars and wchar chars:
196
197     $(CWAY
198
199 $(CCODE
200 #if UNICODE
201     #define dchar   wchar_t
202     #define TEXT(s) L##s
203 #else
204     #define dchar   char
205     #define TEXT(s) s
206 #endif
207
208 ...
209 dchar h[] = TEXT("hello");
210 )
211     )
212
213     $(DWAY
214
215 ---------
216 dchar[] h = "hello";
217 ---------
218
219
220     D's optimizer will inline the function, and will do the conversion of the
221     string constant at compile time.
222     <p>
223     )
224     )
225
226     $(LI Supporting legacy compilers:
227
228     $(CWAY
229
230 $(CCODE
231 #if PROTOTYPES
232 #define P(p)    p
233 #else
234 #define P(p)    ()
235 #endif
236 int func P((int x, int y));
237 )
238     )
239
240     $(DWAY
241     By making the D compiler open source, it will largely
242     avoid the problem of syntactical backwards compatibility.
243     )
244     )
245
246     $(LI Type aliasing:
247
248     $(CWAY
249
250 $(CCODE
251 #define INT     int
252 )
253     )
254
255     $(DWAY
256
257 ---------
258 alias int INT;
259 ---------
260     )
261     )
262
263     $(LI Using one header file for both declaration and definition:
264
265     $(CWAY
266
267 $(CCODE
268 #define EXTERN extern
269 #include "declarations.h"
270 #undef EXTERN
271 #define EXTERN
272 #include "declarations.h"
273 )
274
275     In declarations.h:
276
277 $(CCODE
278 EXTERN int foo;
279 )
280     )
281
282     $(DWAY
283
284     The declaration and the definition are the same, so there is no need
285     to muck with the storage class to generate both a declaration and a definition
286     from the same source.
287     )
288     )
289
290     $(LI Lightweight inline functions:
291
292     $(CWAY
293
294 $(CCODE
295 #define X(i)    ((i) = (i) / 3)
296 )
297     )
298
299     $(DWAY
300
301 ---------
302 int X(ref int i) { return i = i / 3; }
303 ---------
304
305     The compiler optimizer will inline it; no efficiency is lost.
306     )
307     )
308
309     $(LI Assert function file and line number information:
310
311     $(CWAY
312
313 $(CCODE
314 #define assert(e)   ((e) || _assert(__LINE__, __FILE__))
315 )
316     )
317
318     $(DWAY
319
320     assert() is a built-in expression primitive. Giving the compiler
321     such knowledge of assert() also enables the optimizer to know about things
322     like the _assert() function never returns.
323     )
324     )
325
326     $(LI Setting function calling conventions:
327
328     $(CWAY
329
330 $(CCODE
331 #ifndef _CRTAPI1
332 #define _CRTAPI1 __cdecl
333 #endif
334 #ifndef _CRTAPI2
335 #define _CRTAPI2 __cdecl
336 #endif
337
338 int _CRTAPI2 func();
339 )
340     )
341
342     $(DWAY
343
344     Calling conventions can be specified in blocks, so there's no
345     need to change it for every function:
346
347 ---------
348 extern (Windows)
349 {
350     int onefunc();
351     int anotherfunc();
352 }
353 ---------
354     )
355     )
356
357     $(LI Hiding __near or __far pointer weirdness:
358
359     $(CWAY
360
361 $(CCODE
362 #define LPSTR   char FAR *
363 )
364     )
365
366     $(DWAY
367
368     D doesn't support 16 bit code, mixed pointer sizes, and different
369     kinds of pointers, and so the problem is just
370     irrelevant.
371     )
372     )
373
374     $(LI Simple generic programming:
375
376     $(CWAY
377
378     Selecting which function to use based on text substitution:
379
380 $(CCODE
381 #ifdef UNICODE
382 int getValueW(wchar_t *p);
383 #define getValue getValueW
384 #else
385 int getValueA(char *p);
386 #define getValue getValueA
387 #endif
388 )
389     )
390
391     $(DWAY
392
393     D enables declarations of symbols that are $(I aliases) of
394     other symbols:
395
396 ---------
397 version (UNICODE)
398 {
399     int getValueW(wchar[] p);
400     alias getValueW getValue;
401 }
402 else
403 {
404     int getValueA(char[] p);
405     alias getValueA getValue;
406 }
407 ---------
408     )
409     )
410
411 )
412
413 <hr><!-- -------------------------------------------- -->
414 <h3><a name="conditionalcompilation">Conditional Compilation</a></h3>
415
416
417 $(CWAY
418
419     $(P Conditional compilation is a powerful feature of the C preprocessor,
420     but it has its downside:)
421
422     $(UL
423     $(LI The preprocessor has no concept of scope. #if/#endif can be
424     interleaved with code in a completely unstructured and disorganized
425     fashion, making things difficult to follow.
426     )
427
428     $(LI Conditional compilation triggers off of macros - macros that
429     can conflict with identifiers used in the program.
430     )
431
432     $(LI #if expressions are evaluated in subtly different ways than
433     C expressions are.
434     )
435
436     $(LI The preprocessor language is fundamentally different in concept
437     than C, for example, whitespace and line terminators mean things to
438     the preprocessor that they do not in C.
439     )
440     )
441 )
442
443 $(DWAY
444
445     $(P D supports conditional compilation:)
446
447     $(OL
448     $(LI Separating version specific functionality into separate modules.
449     )
450
451     $(LI The debug statement for enabling/disabling debug harnesses,
452     extra printing, etc.
453     )
454
455     $(LI The version statement for dealing with multiple versions
456     of the program generated from a single set of sources.
457     )
458
459     $(LI The if (0) statement.
460     )
461
462     $(LI The /+ +/ nesting comment can be used to comment out blocks
463     of code.
464     )
465     )
466 )
467
468 <hr><!-- -------------------------------------------- -->
469 <h3><a name="codefactoring">Code Factoring</a></h3>
470
471 $(CWAY
472
473     $(P It's common in a function to have a repetitive sequence
474     of code to be executed in multiple places. Performance
475     considerations preclude factoring it out into a separate
476     function, so it is implemented as a macro. For example,
477     consider this fragment from a byte code interpreter:
478     )
479
480 $(CCODE
481 unsigned char *ip;  // byte code instruction pointer
482 int *stack;
483 int spi;        // stack pointer
484 ...
485 #define pop()       (stack[--spi])
486 #define push(i)     (stack[spi++] = (i))
487 while (1)
488 {
489     switch (*ip++)
490     {
491     case ADD:
492         op1 = pop();
493         op2 = pop();
494         result = op1 + op2;
495         push(result);
496         break;
497
498     case SUB:
499     ...
500     }
501 }
502 )
503
504     $(P This suffers from numerous problems:
505     )
506
507     $(OL
508     $(LI The macros must evaluate to expressions and cannot declare
509     any variables. Consider the difficulty of extending them to
510     check for stack overflow/underflow.
511     )
512     $(LI The macros exist outside of the semantic symbol table, so
513     remain in scope even outside of the function they are declared in.
514     )
515     $(LI Parameters to macros are passed textually, not by value,
516     meaning that the macro implementation needs to be careful to not
517     use the parameter more than once, and must protect it with ().
518     )
519     $(LI Macros are invisible to the debugger, which sees only the
520     expanded expressions.
521     )
522     )
523 )
524
525 $(DWAY
526
527     $(P D neatly addresses this with nested functions:)
528
529 ---------
530 ubyte* ip;      // byte code instruction pointer
531 int[] stack;        // operand stack
532 int spi;        // stack pointer
533 ...
534
535 int pop()        { return stack[--spi]; }
536 void push(int i) { stack[spi++] = i; }
537
538 while (1)
539 {
540     switch (*ip++)
541     {
542     case ADD:
543         op1 = pop();
544         op2 = pop();
545         push(op1 + op2);
546         break;
547
548     case SUB:
549     ...
550     }
551 }
552 ---------
553
554     $(P The problems addressed are:)
555
556     $(OL
557     $(LI The nested functions have available the full expressive
558     power of D functions. The array accesses already are bounds
559     checked (adjustable by compile time switch).
560     )
561     $(LI Nested function names are scoped just like any other name.
562     )
563     $(LI Parameters are passed by value, so need to worry about
564     side effects in the parameter expressions.
565     )
566     $(LI Nested functions are visible to the debugger.
567     )
568     )
569
570     $(P Additionally, nested functions can be inlined by the implementation
571     resulting in the same high performance that the C macro version
572     exhibits.
573     )
574 )
575
576 <hr><!-- -------------------------------------------- -->
577 <h3><a name="staticassert">#error and Static Asserts</a></h3>
578
579     $(P Static asserts are user defined checks made at compile time;
580     if the check fails the compile issues an error and fails.
581     )
582
583 $(CWAY
584
585     $(P The first way is to use the $(TT #error) preprocessing directive:
586     )
587
588 $(CCODE
589 #if FOO || BAR
590     ... code to compile ...
591 #else
592 #error "there must be either FOO or BAR"
593 #endif
594 )
595
596     $(P This has the limitations inherent in preprocessor expressions
597     (i.e. integer constant expressions only, no casts, no $(TT sizeof),
598     no symbolic constants, etc.).
599     )
600
601     $(P These problems can be circumvented to some extent by defining a
602     $(TT static_assert) macro (thanks to M. Wilson):
603     )
604
605 $(CCODE
606 #define static_assert(_x) do { typedef int ai[(_x) ? 1 : 0]; } while(0)
607 )
608
609     $(P and using it like:)
610
611 $(CCODE
612 void foo(T t)
613 {
614     static_assert(sizeof(T) < 4);
615     ...
616 }
617 )
618
619     $(P This works by causing a compile time semantic error if the condition
620     evaluates
621     to false. The limitations of this technique are a sometimes very
622     confusing error message from the compiler, along with an inability
623     to use a $(TT static_assert) outside of a function body.
624     )
625 )
626
627 $(DWAY
628
629     $(P D has the <a href="version.html#staticassert">static assert</a>,
630     which can be used anywhere a declaration
631     or a statement can be used. For example:
632     )
633
634 ---------
635 version (FOO)
636 {
637     class Bar
638     {
639     const int x = 5;
640     static assert(Bar.x == 5 || Bar.x == 6);
641
642     void foo(T t)
643     {
644         static assert(T.sizeof < 4);
645         ...
646     }
647     }
648 }
649 else version (BAR)
650 {
651     ...
652 }
653 else
654 {
655     static assert(0);   // unsupported version
656 }
657 ---------
658 )
659
660 <hr><!-- -------------------------------------------- -->
661 <h3><a name="mixins">Template Mixins</a></h3>
662
663     $(P D $(LINK2 template-mixin.html, template mixins)
664     superficially look just
665     like using C's preprocessor to insert blocks of code and
666     parse them in the scope of where they are instantiated.
667     But the advantages of mixins over macros are:
668     )
669
670     $(OL
671     $(LI Mixins substitute in parsed declaration trees that pass muster with
672     the language syntax, macros substitute in arbitrary preprocessor tokens
673     that have no organization.
674     )
675
676     $(LI Mixins are in the same language. Macros are a separate and
677     distinct language layered on top of C++, with its own expression rules,
678     its own types, its distinct symbol table, its own scoping rules, etc.
679     )
680
681     $(LI Mixins are selected based on partial specialization rules, macros
682     have no overloading.
683     )
684
685     $(LI Mixins create a scope, macros do not.
686     )
687
688     $(LI Mixins are compatible with syntax parsing tools, macros are not.
689     )
690
691     $(LI Mixin semantic information and symbol tables are passed through to
692     the debugger, macros are lost in translation.
693     )
694
695     $(LI Mixins have override conflict resolution rules, macros just
696     collide.
697     )
698
699     $(LI Mixins automatically create unique identifiers as required using a
700     standard algorithm, macros have to do it manually with kludgy token
701     pasting.
702     )
703
704     $(LI Mixin value arguments with side effects are evaluated once, macro
705     value arguments get evaluated each time they are used in the expansion
706     (leading to weird bugs).
707     )
708
709     $(LI Mixin argument replacements don't need to be $(SINGLEQUOTE protected) with
710     parentheses to avoid operator precedence regrouping.
711     )
712
713     $(LI Mixins can be typed as normal D code of arbitrary length, multiline
714     macros have to be backslash line-spliced, can't use // to end of line
715     comments, etc.
716     )
717
718     $(LI Mixins can define other mixins. Macros cannot create other macros.
719     )
720
721     )
722
723 )
724
725 Macros:
726     TITLE=The C Preprocessor vs D
727     WIKI=PreToD
728     CWAY=$(SECTION4 The C Preprocessor Way, $0)
729     DWAY=$(SECTION4 The D Way, $0)
Note: See TracBrowser for help on using the browser.