root/trunk/docsrc/arrays.dd

Revision 1810, 33.7 kB (checked in by schveiguy, 2 years ago)

bugs 4551 and 4590 array documentation
also added anchors for all the headers

  • Property svn:eol-style set to native
Line 
1 Ddoc
2
3 $(SPEC_S Arrays,
4
5     $(P There are four kinds of arrays:)
6
7     $(TABLE2 Kinds of Arrays,
8     $(TR $(TH Syntax)   $(TH Description))
9     $(TR $(TD $(I type)*)   $(TD $(LINK2 #pointers, Pointers to data)))
10
11     $(TR $(TD $(I type)[$(I integer)])  $(TD $(LINK2 #static-arrays, Static arrays)))
12
13     $(TR $(TD $(I type)[])  $(TD $(LINK2 #dynamic-arrays, Dynamic arrays)))
14
15     $(TR $(TD $(I type)[$(I type)]) $(TD $(LINK2 hash-map.html, Associative arrays)))
16     )
17
18 <h3>$(LNAME2 pointers, Pointers)</h3>
19
20 ---------
21 int* p;
22 ---------
23
24     $(P These are simple pointers to data, analogous to C pointers.
25     Pointers are provided for interfacing with C and for
26     specialized systems work.
27     There
28     is no length associated with it, and so there is no way for the
29     compiler or runtime to do bounds checking, etc., on it.
30     Most conventional uses for pointers can be replaced with
31     dynamic arrays, $(TT out) and $(TT ref) parameters,
32     and reference types.
33     )
34
35 <h3>$(LNAME2 static-arrays, Static Arrays)</h3>
36
37 ---------
38 int[3] s;
39 ---------
40
41     $(P These are analogous to C arrays. Static arrays are distinguished
42     by having a length fixed at compile time.
43     )
44
45     $(P The total size of a static array cannot exceed 16Mb.
46     A dynamic array should be used instead for such large arrays.
47     )
48
49     $(P A static array with a dimension of 0 is allowed, but no
50     space is allocated for it. It's useful as the last member
51     of a variable length struct, or as the degenerate case of
52     a template expansion.
53     )
54
55 $(V1
56     $(P Static arrays are value types, but as in C
57     static arrays are passed to functions by reference
58     and cannot be returned from functions.
59     )
60 )
61 $(V2
62     $(P Static arrays are value types. Unlike in C and D version 1,
63     static arrays are passed to functions by value.
64     Static arrays can also be returned by functions.
65     )
66 )
67
68 <h3>$(LNAME2 dynamic-arrays, Dynamic Arrays)</h3>
69
70 ---------
71 int[] a;
72 ---------
73
74     $(P Dynamic arrays consist of a length and a pointer to the array data.
75     Multiple dynamic arrays can share all or parts of the array data.
76     )
77
78 <h2>Array Declarations</h2>
79
80     $(P There are two ways to declare arrays, prefix and postfix.
81     The prefix form is the preferred method, especially for
82     non-trivial types.
83     )
84
85 <h4>Prefix Array Declarations</h4>
86
87     $(P Prefix declarations appear before the identifier being
88     declared and read right to left, so:
89     )
90
91 ---------
92 int[] a;    // dynamic array of ints
93 int[4][3] b;    // array of 3 arrays of 4 ints each
94 int[][5] c; // array of 5 dynamic arrays of ints.
95 int*[]*[3] d;   // array of 3 pointers to dynamic arrays of pointers to ints
96 int[]* e;   // pointer to dynamic array of ints
97 ---------
98
99
100 <h4>Postfix Array Declarations</h4>
101
102     $(P Postfix declarations appear after the identifier being
103     declared and read left to right.
104     Each group lists equivalent declarations:
105     )
106
107 ---------
108 // dynamic array of ints
109 int[] a;
110 int a[];
111
112 // array of 3 arrays of 4 ints each
113 int[4][3] b;
114 int[4] b[3];
115 int b[3][4];
116
117 // array of 5 dynamic arrays of ints.
118 int[][5] c;
119 int[] c[5];
120 int c[5][];
121
122 // array of 3 pointers to dynamic arrays of pointers to ints
123 int*[]*[3] d;
124 int*[]* d[3];
125 int* (*d[3])[];
126
127 // pointer to dynamic array of ints
128 int[]* e;
129 int (*e)[];
130 ---------
131
132     $(P $(B Rationale:) The postfix form matches the way arrays are
133     declared in C and C++, and supporting this form provides an
134     easy migration path for programmers used to it.
135     )
136
137 <h2>$(LNAME2 usage, Usage)</h2>
138
139     $(P There are two broad kinds of operations to do on an array -
140     affecting
141     the handle to the array,
142     and affecting the contents of the array.
143     C only has
144     operators to affect the handle. In D, both are accessible.
145     )
146
147     $(P The handle to an array is specified by naming the array, as
148     in p, s or a:
149     )
150
151 ---------
152 int* p;
153 int[3] s;
154 int[] a;
155
156 int* q;
157 int[3] t;
158 int[] b;
159
160 p = q;      // p points to the same thing q does.
161 p = s.ptr;  // p points to the first element of the array s.
162 p = a.ptr;  // p points to the first element of the array a.
163
164 s = ...;    // error, since s is a compiled in static
165         // reference to an array.
166
167 a = p;      // error, since the length of the array pointed
168         // to by p is unknown
169 a = s;      // a is initialized to point to the s array
170 a = b;      // a points to the same array as b does
171 ---------
172
173 <h2>$(LNAME2 slicing, Slicing)</h2>
174
175     $(P $(I Slicing) an array means to specify a subarray of it.
176     An array slice does not copy the data, it is only another
177     reference to it.
178     For example:
179     )
180
181 ---------
182 int[10] a;  // declare array of 10 ints
183 int[] b;
184
185 b = a[1..3];    // a[1..3] is a 2 element array consisting of
186         // a[1] and a[2]
187 foo(b[1]);  // equivalent to foo(0)
188 a[2] = 3;
189 foo(b[1]);  // equivalent to foo(3)
190 ---------
191
192     $(P The [] is shorthand for a slice of the entire array.
193     For example, the assignments to b:
194     )
195
196 ---------
197 int[10] a;
198 int[] b;
199
200 b = a;
201 b = a[];
202 b = a[0 .. a.length];
203 ---------
204
205     $(P are all semantically equivalent.
206     )
207
208     $(P Slicing
209     is not only handy for referring to parts of other arrays,
210     but for converting pointers into bounds-checked arrays:
211     )
212
213 ---------
214 int* p;
215 int[] b = p[0..8];
216 ---------
217
218 <h2>$(LNAME2 array-copying, Array Copying)</a></h2>
219
220     $(P When the slice operator appears as the lvalue of an assignment
221     expression, it means that the contents of the array are the
222     target of the assignment rather than a reference to the array.
223     Array copying happens when the lvalue is a slice, and the rvalue
224     is an array of or pointer to the same type.
225     )
226
227 ---------
228 int[3] s;
229 int[3] t;
230
231 s[] = t;        // the 3 elements of t[3] are copied into s[3]
232 s[] = t[];      // the 3 elements of t[3] are copied into s[3]
233 s[1..2] = t[0..1];  // same as s[1] = t[0]
234 s[0..2] = t[1..3];  // same as s[0] = t[1], s[1] = t[2]
235 s[0..4] = t[0..4];  // error, only 3 elements in s
236 s[0..2] = t;        // error, different lengths for lvalue and rvalue
237 ---------
238
239     $(P Overlapping copies are an error:)
240
241 ---------
242 s[0..2] = s[1..3];  // error, overlapping copy
243 s[1..3] = s[0..2];  // error, overlapping copy
244 ---------
245
246     $(P Disallowing overlapping makes it possible for more aggressive
247     parallel code optimizations than possible with the serial
248     semantics of C.
249     )
250
251 <h2>$(LNAME2 array-setting, Array Setting)</h2>
252
253     $(P If a slice operator appears as the lvalue of an assignment
254     expression, and the type of the rvalue is the same as the element
255     type of the lvalue, then the lvalue's array contents
256     are set to the rvalue.
257     )
258
259 ---------
260 int[3] s;
261 int* p;
262
263 s[] = 3;        // same as s[0] = 3, s[1] = 3, s[2] = 3
264 p[0..2] = 3;        // same as p[0] = 3, p[1] = 3
265 ---------
266
267 <h2>$(LNAME2 array-concatenation, Array Concatenation)</a></h2>
268
269     $(P The binary operator ~ is the $(I cat) operator. It is used
270     to concatenate arrays:
271     )
272
273 ---------
274 int[] a;
275 int[] b;
276 int[] c;
277
278 a = b ~ c;  // Create an array from the concatenation of the
279         // b and c arrays
280 ---------
281
282     $(P Many languages overload the + operator to mean concatenation.
283     This confusingly leads to, does:
284     )
285
286 ---------
287 "10" + 3 + 4
288 ---------
289
290         $(P produce the number 17, the string "1034" or the string "107" as the
291         result? It isn't obvious, and the language designers wind up carefully
292         writing rules to disambiguate it - rules that get incorrectly
293         implemented, overlooked, forgotten, and ignored. It's much better to
294         have + mean addition, and a separate operator to be array
295         concatenation.
296     )
297
298     $(P Similarly, the ~= operator means append, as in:
299     )
300
301 ---------
302 a ~= b;     // a becomes the concatenation of a and b
303 ---------
304
305     $(P Concatenation always creates a copy of its operands, even
306     if one of the operands is a 0 length array, so:
307     )
308
309 ---------
310 a = b;          // a refers to b
311 a = b ~ c[0..0];    // a refers to a copy of b
312 ---------
313
314         $(P Appending does not always create a copy, see $(LINK2 #resize,
315         setting dynamic array length) for details.
316     )
317
318 <h2>$(LNAME2 array-operations, Array Operations)</h2>
319
320     $(P Many array operations, also known as vector operations,
321     can be expressed at a high level rather than as a loop.
322     For example, the loop:
323     )
324
325 ---
326 T[] a, b;
327 ...
328 for (size_t i = 0; i < a.length; i++)
329     a[i] = b[i] + 4;
330 ---
331
332     $(P assigns to the elements of $(CODE a) the elements of $(CODE b)
333     with $(CODE 4) added to each. This can also be expressed in
334     vector notation as:
335     )
336
337 ---
338 T[] a, b;
339 ...
340 a[] = b[] + 4;
341 ---
342
343     $(P A vector operation is indicated by the slice operator appearing
344     as the lvalue of an =, +=, -=, *=, /=, %=, ^=, &amp;= or |= operator.
345     The rvalue can be an expression consisting either of an array
346     slice of the same length and type as the lvalue or an expression
347     of the element type of the lvalue, in any combination.
348     The operators supported for vector operations are the binary
349     operators +, -, *, /, %, ^, &amp; and |, and the unary operators
350     - and ~.
351     )
352
353     $(P The lvalue slice and any rvalue slices must not overlap.
354     The vector assignment operators are evaluated right to left,
355     and the other binary operators are evaluated left to right.
356     All operands are evaluated exactly once, even if the array slice
357     has zero elements in it.
358     )
359
360     $(P The order in which the array elements are computed
361     is implementation defined, and may even occur in parallel.
362     An application must not depend on this order.
363     )
364
365     $(P Implementation note: many of the more common vector
366     operations are expected to take advantage of any vector
367     math instructions available on the target computer.
368     )
369
370 <h2>$(LNAME2 pointer-arithmetic, Pointer Arithmetic)</h2>
371
372 ---------
373 int[3] abc;         // static array of 3 ints
374 int[] def = [ 1, 2, 3 ];    // dynamic array of 3 ints
375
376 void dibb(int* array)
377 {
378     array[2];       // means same thing as *(array + 2)
379     *(array + 2);       // get 3rd element
380 }
381
382 void diss(int[] array)
383 {
384     array[2];       // ok
385     *(array + 2);       // error, array is not a pointer
386 }
387
388 void ditt(int[3] array)
389 {
390     array[2];       // ok
391     *(array + 2);       // error, array is not a pointer
392 }
393 ---------
394
395 <h2>$(LNAME2 rectangular-arrays, Rectangular Arrays)</h2>
396
397     $(P Experienced FORTRAN numerics programmers know that multidimensional
398     "rectangular" arrays for things like matrix operations are much faster than trying to
399     access them via pointers to pointers resulting from "array of pointers to array" semantics.
400     For example, the D syntax:
401     )
402
403 ---------
404 double[][] matrix;
405 ---------
406
407     $(P declares matrix as an array of pointers to arrays. (Dynamic arrays are implemented as
408     pointers to the array data.) Since the arrays can have varying sizes (being dynamically
409     sized), this is sometimes called "jagged" arrays. Even worse for optimizing the code, the
410     array rows can sometimes point to each other! Fortunately, D static arrays, while using
411     the same syntax, are implemented as a fixed rectangular layout:
412     )
413
414 ---------
415 double[3][3] matrix;
416 ---------
417
418     $(P declares a rectangular matrix with 3 rows and 3 columns, all contiguously in memory. In
419     other languages, this would be called a multidimensional array and be declared as:
420     )
421 ---------
422 double matrix[3,3];
423 ---------
424
425 <h2>$(LNAME2 array-length, Array Length)</h2>
426
427 $(V1
428     $(P Within the [ ] of a static or a dynamic array,
429     the variable $(B length)
430     is implicitly declared and set to the length of the array.
431     The symbol $(B $) can also be so used.
432     )
433 )
434 $(V2
435     $(P Within the [ ] of a static or a dynamic array,
436     the symbol $(B $)
437     represents the length of the array.
438     )
439 )
440
441 ---------
442 int[4] foo;
443 int[]  bar = foo;
444 int*   p = &foo[0];
445
446 // These expressions are equivalent:
447 bar[]
448 bar[0 .. 4]
449 $(V1 bar[0 .. $(B length)]
450 )bar[0 .. $(B $)]
451 bar[0 .. bar.length]
452
453 $(V1 p[0 .. length]     // 'length' is not defined, since p is not an array
454 bar[0]+length       // 'length' is not defined, out of scope of [ ]
455
456 bar[$(B length)-1]  // retrieves last element of the array
457 )
458 $(V2 p[0 .. $(DOLLAR)]      // '$' is not defined, since p is not an array
459 bar[0]+$(DOLLAR)        // '$' is not defined, out of scope of [ ]
460
461 bar[$(B $(DOLLAR))-1]   // retrieves last element of the array
462 )
463 ---------
464
465 <h2>$(LNAME2 array-properties, Array Properties)</h2>
466
467     $(P Static array properties are:)
468
469     $(TABLE2 Static Array Properties,
470     $(TR $(TH Property) $(TH Description))
471
472     $(TR
473     $(TD $(B .init))
474 $(V1    $(TD returns the default initializer for the element type.)
475 )
476 $(V2    $(TD Returns an array literal with each element of the literal being the $(B .init)
477     property of the array element type.
478 )
479     )
480     )
481
482     $(TR
483     $(TD $(B .sizeof))
484     $(TD Returns the array length multiplied by the number of
485     bytes per array element.
486     )
487     )
488
489     $(TR
490     $(TD $(B .length))
491     $(TD Returns the number of elements in the array.
492     This is a fixed quantity for static arrays.
493     It is of type $(B size_t).
494     )
495     )
496
497     $(TR
498     $(TD $(B .ptr))
499     $(TD Returns a pointer to the first element of the array.
500     )
501     )
502
503     $(TR
504     $(TD $(B .dup))
505     $(TD Create a dynamic array of the same size
506     and copy the contents of the array into it.
507     )
508     )
509
510     $(TR
511     $(TD $(B .idup))
512     $(TD Create a dynamic array of the same size
513     and copy the contents of the array into it.
514     The copy is typed as being immutable.
515     $(I D 2.0 only)
516     )
517     )
518
519     $(TR
520     $(TD $(B .reverse))
521     $(TD Reverses in place the order of the elements in the array.
522     Returns the array.
523     )
524     )
525
526     $(TR
527     $(TD $(B .sort))
528     $(TD Sorts in place the order of the elements in the array.
529     Returns the array.
530     )
531     )
532
533     )
534
535     $(P Dynamic array properties are:)
536
537     $(TABLE2 Dynamic Array Properties,
538     $(TR $(TH Property) $(TH Description))
539
540     $(TR
541     $(TD $(B .init))
542     $(TD Returns null.)
543     )
544
545     $(TR
546     $(TD $(B .sizeof))
547     $(TD Returns the size of the dynamic array reference,
548     which is 8 on 32 bit machines.
549     )
550     )
551
552     $(TR
553     $(TD $(B .length))
554     $(TD Get/set number of elements in the array.
555     It is of type $(B size_t).
556     )
557     )
558
559     $(TR
560     $(TD $(B .ptr))
561     $(TD Returns a pointer to the first element of the array.
562     )
563     )
564
565     $(TR
566     $(TD $(B .dup))
567     $(TD Create a dynamic array of the same size
568     and copy the contents of the array into it.
569     )
570     )
571
572     $(TR
573     $(TD $(B .idup))
574     $(TD Create a dynamic array of the same size
575     and copy the contents of the array into it.
576     The copy is typed as being immutable.
577     $(I D 2.0 only)
578     )
579     )
580
581     $(TR
582     $(TD $(B .reverse))
583     $(TD Reverses in place the order of the elements in the array.
584     Returns the array.
585     )
586     )
587
588     $(TR
589     $(TD $(B .sort))
590     $(TD Sorts in place the order of the elements in the array.
591     Returns the array.
592     )
593     )
594
595     )
596
597     $(P For the $(B .sort) property to work on arrays of class
598     objects, the class definition must define the function:
599     $(TT int opCmp(Object)). This is used to determine the
600     ordering of the class objects. Note that the parameter
601     is of type $(TT Object), not the type of the class.)
602
603     $(P For the $(B .sort) property to work on arrays of
604     structs or unions, the struct or union definition must
605     define the function:
606     $(TT int opCmp(S)) or
607     $(TT int opCmp(S*)).
608     The type $(TT S) is the type of the struct or union.
609     This function will determine the sort ordering.
610     )
611
612     $(P Examples:)
613
614 ---------
615 int* p;
616 int[3] s;
617 int[] a;
618
619 p.length;   // error, length not known for pointer
620 s.length;   // compile time constant 3
621 a.length;   // runtime value
622
623 p.dup;      // error, length not known
624 s.dup;      // creates an array of 3 elements, copies
625         // elements s into it
626 a.dup;      // creates an array of a.length elements, copies
627         // elements of a into it
628 ---------
629
630 <h3>$(LNAME2 resize, Setting Dynamic Array Length)</h3>
631
632     $(P The $(B $(TT .length)) property of a dynamic array can be set
633     as the lvalue of an = operator:
634     )
635
636 ---------
637 array.length = 7;
638 ---------
639
640     $(P This causes the array to be reallocated in place, and the existing
641     contents copied over to the new array. If the new array length is
642         shorter, the array is not reallocated, and no data is copied.  It is
643         equivalent to slicing the array:
644
645 ---------
646 array = array[0..7];
647 ---------
648
649         If the new array length is longer, the remainder is filled out with the
650         default initializer.
651     )
652
653     $(P To maximize efficiency, the runtime always tries to resize the
654     array in place to avoid extra copying.
655         $(V1 It will always do a copy if the new size is larger and the array
656         was not allocated via the new operator or a previous resize operation.)
657         $(V2 It will always do a copy if the new size is larger and the array
658         was not allocated via the new operator or resizing in place would
659         overwrite valid data in the array.)
660     )
661
662 $(V1    $(P This means that if there is an array slice immediately following the
663     array being resized, the resized array could overlap the slice; i.e.:
664     )
665
666 ---------
667 char[] a = new char[20];
668 char[] b = a[0..10];
669 char[] c = a[10..20];
670
671 b.length = 15;  // always resized in place because it is sliced
672         // from a[] which has enough memory for 15 chars
673 b[11] = 'x';    // a[11] and c[1] are also affected
674
675 a.length = 1;
676 a.length = 20;  // no net change to memory layout
677
678 c.length = 12;  // always does a copy because c[] is not at the
679         // start of a gc allocation block
680 c[5] = 'y'; // does not affect contents of a[] or b[]
681
682 a.length = 25;  // may or may not do a copy
683 a[3] = 'z'; // may or may not affect b[3] which still overlaps
684         // the old a[3]
685 ---------
686 )
687
688 $(V2
689         For example:
690
691 ---------
692 char[] a = new char[20];
693 char[] b = a[0..10];
694 char[] c = a[10..20];
695 char[] d = a;
696
697 b.length = 15;    // always reallocates because extending in place would
698                   // overwrite other data in a.
699 b[11] = 'x';      // a[11] and c[1] are not affected
700
701 d.length = 1;
702 d.length = 20;    // also reallocates, because doing this will overwrite a and
703                   // c
704
705 c.length = 12;    // may reallocate in place if space allows, because nothing
706                   // was allocated after c.
707 c[5] = 'y';       // may affect contents of a, but not b or d because those
708                   // were reallocated.
709
710 a.length = 25;    // This always reallocates because if c extended in place,
711                   // then extending a would overwrite c.  If c didn't
712                   // reallocate in place, it means there was not enough space,
713                   // which will still be true for a.
714 a[15] = 'z';      // does not affect c, because either a or c has reallocated.
715 ---------
716 )
717
718     $(P To guarantee copying behavior, use the .dup property to ensure
719         a unique array that can be resized. $(V2 Also, one may use the phobos
720         $(TT .capacity) property to determine how many elements can be appended
721         to the array without reallocating.)
722     )
723
724         $(P These issues also apply to appending arrays with the ~= operator.
725         Concatenation using the ~ operator is not affected since it always
726         reallocates.
727     )
728
729     $(P Resizing a dynamic array is a relatively expensive operation.
730     So, while the following method of filling an array:
731     )
732
733 ---------
734 int[] array;
735 while (1)
736 {   c = getinput();
737     if (!c)
738        break;
739     array.length = array.length + 1;
740     array[array.length - 1] = c;
741 }
742 ---------
743
744     $(P will work, it will be inefficient. A more practical
745     approach would be to minimize the number of resizes:
746     )
747
748 ---------
749 int[] array;
750 array.length = 100;        // guess
751 for (i = 0; 1; i++)
752 {   c = getinput();
753      if (!c)
754     break;
755      if (i == array.length)
756     array.length = array.length * 2;
757      array[i] = c;
758 }
759 array.length = i;
760 ---------
761
762     $(P Picking a good initial guess is an art, but you usually can
763     pick a value covering 99% of the cases.
764     For example, when gathering user
765     input from the console - it's unlikely to be longer than 80.
766     )
767
768         $(V2 $(P Also, you may wish to utilize the phobos $(TT reserve)
769         function to pre-allocate array data to use with the append operator.))
770
771 <h3>$(LNAME2 func-as-property, Functions as Array Properties)</h3>
772
773     $(P If the first parameter to a function is an array, the
774     function can be called as if it were a property of the array:
775     )
776
777 ---
778 int[] array;
779 void foo(int[] a, int x);
780
781 foo(array, 3);
782 array.foo(3);   // means the same thing
783 ---
784
785 <h2>$(LNAME2 bounds, Array Bounds Checking)</h2>
786
787     $(P It is an error to index an array with an index that is less than
788     0 or greater than or equal to the array length. If an index is
789         out of bounds, $(V1 an ArrayBoundsError)$(V2 a RangeError) exception is
790         raised if detected at runtime, and an error if detected at compile
791         time.  A program may not rely on array bounds checking happening, for
792         example, the following program is incorrect:
793     )
794
795 ---------
796 try
797 {
798     for (i = 0; ; i++)
799     {
800     array[i] = 5;
801     }
802 }
803 catch (ArrayBoundsError)
804 {
805     // terminate loop
806 }
807 ---------
808
809     The loop is correctly written:
810
811 ---------
812 for (i = 0; i < array.length; i++)
813 {
814     array[i] = 5;
815 }
816 ---------
817
818     $(P $(B Implementation Note:) Compilers should attempt to detect
819     array bounds errors at compile time, for example:
820     )
821
822 ---------
823 int[3] foo;
824 int x = foo[3];     // error, out of bounds
825 ---------
826
827     $(P Insertion of array bounds checking code at runtime should be
828     turned on and off
829     with a compile time switch.
830     )
831
832 <h2>$(LNAME2 array-initialization, Array Initialization)</h2>
833
834 <h3>$(LNAME2 default-initialization, Default Initialization)</h3>
835
836     $(UL
837     $(LI Pointers are initialized to $(B null).)
838     $(LI Static array contents are initialized to the default
839     initializer for the array element type.)
840     $(LI Dynamic arrays are initialized to having 0 elements.)
841     $(LI Associative arrays are initialized to having 0 elements.)
842     )
843
844 <h3>$(LNAME2 void-initialization, Void Initialization)</h3>
845
846     $(P Void initialization happens when the $(I Initializer) for
847     an array is $(B void). What it means is that no initialization
848     is done, i.e. the contents of the array will be undefined.
849     This is most useful as an efficiency optimization.
850     Void initializations are an advanced technique and should only be used
851     when profiling indicates that it matters.
852     )
853
854 <h3>$(LNAME2 static-init-static, Static Initialization of Static Arrays)</h3>
855
856     $(P Static initalizations are supplied by a list of array
857     element values enclosed in [ ]. The values can be optionally
858     preceded by an index and a :.
859     If an index is not supplied, it is set to the previous index
860     plus 1, or 0 if it is the first value.
861     )
862
863 ---------
864 int[3] a = [ 1:2, 3 ];      // a[0] = 0, a[1] = 2, a[2] = 3
865 ---------
866
867     $(P This is most handy when the array indices are given by enums:)
868
869 ---------
870 enum Color { red, blue, green };
871
872 int value[Color.max + 1] = [ Color.blue:6, Color.green:2, Color.red:5 ];
873 ---------
874
875     $(P These arrays are static when they appear in global scope.
876     Otherwise, they need to be marked with $(B const) or $(B static)
877     storage classes to make them static arrays.)
878
879
880 <h2>$(LNAME2 special-array, Special Array Types)</h2>
881
882 <h3>$(LNAME2 strings, Strings)</h3>
883
884     $(P A string is
885     an array of characters. String literals are just
886     an easy way to write character arrays.
887     String literals are immutable (read only).
888     )
889
890 $(V1
891 ---------
892 char[] str;
893 char[] str1 = "abc";
894 str[0] = 'b';        // error, "abc" is read only, may crash
895 ---------
896
897     $(P The name $(CODE string) is aliased to $(CODE char[]),
898     so the above declarations could be equivalently written as:
899     )
900
901 ---------
902 string str;
903 string str1 = "abc";
904 ---------
905 )
906 $(V2
907 ---------
908 char[] str1 = "abc";                // error, "abc" is not mutable
909 char[] str2 = "abc".dup;            // ok, make mutable copy
910 immutable(char)[] str3 = "abc";     // ok
911 immutable(char)[] str4 = str1;      // error, str4 is not mutable
912 immutable(char)[] str5 = str1.idup; // ok, make immutable copy
913 ---------
914
915     $(P The name $(CODE string) is aliased to $(CODE immutable(char)[]),
916     so the above declarations could be equivalently written as:
917     )
918 ---------
919 char[] str1 = "abc";       // error, "abc" is not mutable
920 char[] str2 = "abc".dup;   // ok, make mutable copy
921 string str3 = "abc";       // ok
922 string str4 = str1;        // error, str4 is not mutable
923 string str5 = str1.idup;   // ok, make immutable copy
924 ---------
925 )
926     $(P $(CODE char[]) strings are in UTF-8 format.
927     $(CODE wchar[]) strings are in UTF-16 format.
928     $(CODE dchar[]) strings are in UTF-32 format.
929     )
930
931     $(P Strings can be copied, compared, concatenated, and appended:)
932
933 ---------
934 str1 = str2;
935 if (str1 < str3) ...
936 func(str3 ~ str4);
937 str4 ~= str1;
938 ---------
939
940     $(P with the obvious semantics. Any generated temporaries get cleaned up
941     by the garbage collector (or by using $(CODE alloca())).
942     Not only that, this works with any
943     array not just a special String array.
944     )
945
946     $(P A pointer to a char can be generated:
947     )
948
949 ---------
950 char* p = &str[3];  // pointer to 4th element
951 char* p = str;      // pointer to 1st element
952 ---------
953
954     $(P Since strings, however, are not 0 terminated in D,
955     when transferring a pointer
956     to a string to C, add a terminating 0:
957     )
958
959 ---------
960 str ~= "\0";
961 ---------
962
963     $(P or use the function $(TT std.string.toStringz).)
964
965     $(P The type of a string is determined by the semantic phase of
966     compilation. The type is
967     one of: char[], wchar[], dchar[], and is determined by
968     implicit conversion rules.
969     If there are two equally applicable implicit conversions,
970     the result is an error. To
971     disambiguate these cases, a cast or a postfix of $(B c),
972     $(B w) or $(B d) can be used:
973     )
974
975 ---------
976 $(V1
977 cast(wchar [])"abc" // this is an array of wchar characters
978 "abc"w          // so is this
979 )
980 $(V2
981 cast(immutable(wchar) [])"abc"  // this is an array of wchar characters
982 "abc"w                  // so is this
983 )
984 ---------
985
986     $(P String literals that do not have a postfix character and that
987     have not been cast can be implicitly converted between char[],
988     wchar[], and dchar[] as necessary.
989     )
990
991 ---------
992 char c;
993 wchar w;
994 dchar d;
995
996 c = 'b';        // c is assigned the character 'b'
997 w = 'b';        // w is assigned the wchar character 'b'
998 w = 'bc';       // error - only one wchar character at a time
999 w = "b"[0];     // w is assigned the wchar character 'b'
1000 w = "\r"[0];        // w is assigned the carriage return wchar character
1001 d = 'd';        // d is assigned the character 'd'
1002 ---------
1003
1004 <h4>$(LNAME2 printf, C's printf() and Strings)</h4>
1005
1006     $(P $(B printf()) is a C function and is not part of D. $(B printf())
1007     will print C strings, which are 0 terminated. There are two ways
1008     to use $(B printf()) with D strings. The first is to add a
1009     terminating 0, and cast the result to a char*:
1010     )
1011
1012 ---------
1013 str ~= "\0";
1014 printf("the string is '%s'\n", cast(char*)str);
1015 ---------
1016
1017     $(P or:)
1018
1019 ---------
1020 import std.string;
1021 printf("the string is '%s'\n", std.string.toStringz(str));
1022 ---------
1023
1024     $(P String literals already have a 0 appended to them, so
1025     can be used directly:)
1026
1027 -----------
1028 printf("the string is '%s'\n", cast(char*)"string literal");
1029 -----------
1030
1031     $(P So, why does the first string literal to printf not need
1032     the cast? The first parameter is prototyped as a char*, and
1033     a string literal can be implicitly cast to a char*.
1034     The rest of the arguments to printf, however, are variadic
1035     (specified by ...),
1036     and a string literal is passed as a (length,pointer) combination
1037     to variadic parameters.)
1038
1039     $(P The second way is to use the precision specifier. The way D arrays
1040     are laid out, the length comes first, so the following works:)
1041
1042 ---------
1043 printf("the string is '%.*s'\n", str);
1044 ---------
1045
1046     $(P The best way is to use std.stdio.writefln, which can handle
1047     D strings:)
1048
1049 ---------
1050 import std.stdio;
1051 writefln("the string is '%s'", str);
1052 ---------
1053
1054 <h3>$(LNAME2 implicit-conversions, Implicit Conversions)</h3>
1055
1056     $(P A pointer $(TT $(I T)*) can be implicitly converted to
1057     one of the following:)
1058
1059     $(UL
1060     $(LI $(TT void*))
1061     )
1062
1063     $(P A static array $(TT $(I T)[$(I dim)]) can be implicitly
1064     converted to
1065     one of the following:
1066     )
1067
1068     $(UL
1069     $(LI $(TT $(I T)[]))
1070     $(LI $(TT $(I U)[]))
1071     $(LI $(TT void[]))
1072     )
1073
1074     $(P A dynamic array $(TT $(I T)[]) can be implicitly converted to
1075     one of the following:
1076     )
1077
1078     $(UL
1079     $(LI $(TT $(I U)[]))
1080     $(LI $(TT void[]))
1081     )
1082
1083     $(P Where $(I U) is a base class of $(I T).)
1084
1085 <hr>
1086 <h1>$(LNAME2 associative, Associative Arrays)</h1>
1087
1088     $(P Associative arrays have an index that is not necessarily an integer,
1089     and can be sparsely populated. The index for an associative array
1090     is called the $(I key), and its type is called the $(I KeyType).
1091     )
1092
1093     $(P Associative arrays are declared by placing the $(I KeyType)
1094     within the [] of an array declaration:
1095     )
1096
1097 ---------
1098 int[char[]] b;      // associative array b of ints that are
1099             // indexed by an array of characters.
1100             // The $(I KeyType) is char[]
1101 b["hello"] = 3;     // set value associated with key "hello" to 3
1102 func(b["hello"]);   // pass 3 as parameter to func()
1103 ---------
1104
1105     $(P Particular keys in an associative array can be removed with the
1106     remove function:
1107     )
1108
1109 ---------
1110 b.$(B remove)("hello");
1111 ---------
1112
1113     $(P The $(I InExpression) yields a pointer to the value
1114     if the key is in the associative array, or $(B null) if not:
1115     )
1116
1117 ---------
1118 int* p;
1119 p = ("hello" $(B in) b);
1120 if (p != $(B null))
1121     ...
1122 ---------
1123
1124     $(P $(I KeyType)s cannot be functions or voids.
1125     )
1126
1127 <h3>$(LNAME2 classes-as-keys, Using Classes as the KeyType)</h3>
1128
1129     $(P Classes can be used as the $(I KeyType). For this to work,
1130     the class definition must override the following member functions
1131     of class $(TT Object):)
1132
1133     $(UL
1134     $(LI $(TT hash_t toHash()))
1135 $(V1    $(LI $(TT int opEquals(Object))))
1136 $(V2    $(LI $(TT bool opEquals(Object))))
1137     $(LI $(TT int opCmp(Object)))
1138     )
1139
1140     $(P $(TT hash_t) is an alias to an integral type.)
1141
1142     $(P Note that the parameter to $(TT opCmp) and $(TT opEquals) is
1143     of type
1144     $(TT Object), not the type of the class in which it is defined.)
1145
1146     $(P For example:)
1147
1148 ---
1149 class Foo
1150 {
1151     int a, b;
1152
1153     hash_t $(B toHash)() { return a + b; }
1154
1155 $(V1      int $(B opEquals)(Object o))$(V2      bool $(B opEquals)(Object o))
1156     {   Foo foo = cast(Foo) o;
1157     return foo && a == foo.a && b == foo.b;
1158     }
1159
1160     int $(B opCmp)(Object o)
1161     {   Foo foo = cast(Foo) o;
1162     if (!foo)
1163         return -1;
1164     if (a == foo.a)
1165         return b - foo.b;
1166     return a - foo.a;
1167     }
1168 }
1169 ---
1170
1171     $(P The implementation may use either $(TT opEquals) or $(TT opCmp) or
1172     both. Care should be taken so that the results of
1173     $(TT opEquals) and $(TT opCmp) are consistent with each other when
1174     the class objects are the same or not.)
1175
1176 <h3>$(LNAME2 structs-as-keys, Using Structs or Unions as the KeyType)</h3>
1177
1178     $(P If the $(I KeyType) is a struct or union type,
1179     a default mechanism is used
1180     to compute the hash and comparisons of it based on the binary
1181     data within the struct value. A custom mechanism can be used
1182     by providing the following functions as struct members:
1183     )
1184
1185 ---------
1186 $(V2 const) hash_t $(B toHash)();
1187 $(V1 int $(B opEquals)($(I KeyType)* s);)$(V2 const bool $(B opEquals)(ref const $(I KeyType) s);)
1188 $(V1 int $(B opCmp)($(I KeyType)* s);)$(V2 const int $(B opCmp)(ref const $(I KeyType) s);)
1189 ---------
1190
1191     $(P For example:)
1192
1193 ---------
1194 import std.string;
1195
1196 struct MyString
1197 {
1198     string str;
1199
1200 $(V1      hash_t $(B toHash)()
1201     {   hash_t hash;
1202     foreach (char c; str)
1203         hash = (hash * 9) + c;
1204     return hash;
1205     }
1206
1207     bool $(B opEquals)(MyString* s)
1208     {
1209     return std.string.cmp(this.str, s.str) == 0;
1210     }
1211
1212     int $(B opCmp)(MyString* s)
1213     {
1214     return std.string.cmp(this.str, s.str);
1215     })
1216 $(V2      const hash_t $(B toHash)()
1217     {   hash_t hash;
1218     foreach (char c; str)
1219         hash = (hash * 9) + c;
1220     return hash;
1221     }
1222
1223     const bool $(B opEquals)(ref const MyString s)
1224     {
1225     return std.string.cmp(this.str, s.str) == 0;
1226     }
1227
1228     const int $(B opCmp)(ref const MyString s)
1229     {
1230     return std.string.cmp(this.str, s.str);
1231     })
1232 }
1233 ---------
1234
1235
1236     $(P The implementation may use either $(TT opEquals) or $(TT opCmp) or
1237     both. Care should be taken so that the results of
1238     $(TT opEquals) and $(TT opCmp) are consistent with each other when
1239     the struct/union objects are the same or not.)
1240
1241 <h3>$(LNAME2 aa-properties, Properties)</h3>
1242
1243 Properties for associative arrays are:
1244
1245     $(TABLE2 Associative Array Properties,
1246     $(TR $(TH Property) $(TH Description))
1247
1248     $(TR
1249     $(TD $(B .sizeof))
1250     $(TD Returns the size of the reference to the associative
1251     array; it is typically 8.
1252     )
1253     )
1254
1255     $(TR
1256     $(TD $(B .length))
1257     $(TD Returns number of values in the associative array.
1258     Unlike for dynamic arrays, it is read-only.
1259     )
1260     )
1261
1262     $(TR
1263     $(TD $(B .keys))
1264     $(TD Returns dynamic array, the elements of which are the keys in
1265     the associative array.
1266     )
1267     )
1268
1269     $(TR
1270     $(TD $(B .values))
1271     $(TD Returns dynamic array, the elements of which are the values in
1272     the associative array.
1273     )
1274     )
1275
1276     $(TR
1277     $(TD $(B .rehash))
1278     $(TD Reorganizes the associative array in place so that lookups
1279     are more efficient. rehash is effective when, for example,
1280     the program is done loading up a symbol table and now needs
1281     fast lookups in it.
1282     Returns a reference to the reorganized array.
1283     )
1284     )
1285
1286 $(V2
1287     $(TR
1288     $(TD $(B .byKey()))
1289     $(TD Returns a delegate suitable for use as an $(I Aggregate) to
1290     a $(LINK2 statement.html#ForeachStatement, $(I ForeachStatement))
1291     which will iterate over the keys
1292     of the associative array.
1293     )
1294     )
1295
1296     $(TR
1297     $(TD $(B .byValue()))
1298     $(TD Returns a delegate suitable for use as an $(I Aggregate) to
1299     a $(LINK2 statement.html#ForeachStatement, $(I ForeachStatement))
1300     which will iterate over the values
1301     of the associative array.
1302     )
1303     )
1304
1305     $(TR
1306     $(TD $(B .get(Key key, lazy Value defaultValue)))
1307     $(TD Looks up $(I key); if it exists returns corresponding $(I value)
1308     else evaluates and returns $(I defaultValue).
1309     )
1310     )
1311 )
1312     )
1313
1314 <hr>
1315 <h3>$(LNAME2 aa-example, Associative Array Example: word count)</h3>
1316
1317 ---------
1318 import std.file;         // D file I/O
1319 import std.stdio;
1320
1321 int main (string[] args)
1322 {
1323     int word_total;
1324     int line_total;
1325     int char_total;
1326     int[char[]] dictionary;
1327
1328     writefln("   lines   words   bytes file");
1329     for (int i = 1; i < args.length; ++i)      // program arguments
1330     {
1331     char[] input;            // input buffer
1332     int w_cnt, l_cnt, c_cnt; // word, line, char counts
1333     int inword;
1334     int wstart;
1335
1336     // read file into input[]
1337     input = cast(char[])std.file.read(args[i]);
1338
1339     foreach (j, char c; input)
1340     {
1341         if (c == '\n')
1342             ++l_cnt;
1343         if (c >= '0' && c <= '9')
1344         {
1345         }
1346         else if (c >= 'a' && c <= 'z' ||
1347             c >= 'A' && c <= 'Z')
1348         {
1349         if (!inword)
1350         {
1351             wstart = j;
1352             inword = 1;
1353             ++w_cnt;
1354         }
1355         }
1356         else if (inword)
1357         {
1358         char[] word = input[wstart .. j];
1359         dictionary[word]++;        // increment count for word
1360         inword = 0;
1361         }
1362         ++c_cnt;
1363     }
1364     if (inword)
1365     {
1366         char[] word = input[wstart .. input.length];
1367         dictionary[word]++;
1368     }
1369     writefln("%8d%8d%8d %s", l_cnt, w_cnt, c_cnt, args[i]);
1370     line_total += l_cnt;
1371     word_total += w_cnt;
1372     char_total += c_cnt;
1373     }
1374
1375     if (args.length > 2)
1376     {
1377     writef("-------------------------------------\n%8d%8d%8d total",
1378         line_total, word_total, char_total);
1379     }
1380
1381     writefln("-------------------------------------");
1382     foreach (word; dictionary.keys.sort)
1383     {
1384     writefln("%3d %s", dictionary[word], word);
1385     }
1386     return 0;
1387 }
1388 ---------
1389
1390 )
1391
1392 Macros:
1393     TITLE=Arrays
1394     WIKI=Arrays
1395     DOLLAR=$
1396     FOO=
Note: See TracBrowser for help on using the browser.