Download Reference Manual
The Developer's Library for D
About Wiki Forums Source Search Contact

Ticket #568: iso8601.d

File iso8601.d, 28.9 kB (added by Deewiant, 1 year ago)
Line 
1 import tango.util.time.Date;
2
3 /// Returns the number of chars used to compose a valid date: 0 if no date can be composed.
4 /// Fields in date will either be correct (e.g. months will be >= 1 and <= 12) or zero.
5
6 size_t iso8601Date(T)(T[] src, inout Date date, size_t expanded = 0) {
7     ubyte dummy = void;
8     T* p = src.ptr;
9     return doIso8601Date(p, src, date, expanded, dummy);
10 }
11
12 private size_t doIso8601Date(T)(inout T* p, T[] src, inout Date date, size_t expanded, out ubyte separators)
13 out {
14     assert (!date.month || (              date.month >= 1 && date.month <= 12));
15     assert (!date.day   || (date.month && date.day   >= 1 && date.day   <= daysPerMonth(date.month, date.year)));
16 } body {
17
18     size_t eaten() { return p - src.ptr; }
19     bool done(T[] s) { return .done(eaten(), src.length, *p, s); }
20
21     if (!parseYear(p, expanded, date.year))
22         return (date.year = 0);
23
24     auto onlyYear = eaten();
25
26     // /([+-]Y{expanded})?(YYYY|YY)/
27     if (done("-0123W"))
28         return onlyYear;
29
30     if (accept(p, '-'))
31         separators = true;
32
33     if (accept(p, 'W')) {
34         // (year)-Www-D
35
36         T* p2 = p;
37
38         int i = parseInt(p, 3u);
39
40         if (i) if (p - p2 == 2) {
41
42             // (year)-Www
43             if (done("-")) {
44                 if (getMonthAndDayFromWeek(date, i))
45                     return eaten();
46
47             // (year)-Www-D
48             } else if (demand(p, '-'))
49                 if (getMonthAndDayFromWeek(date, i, *p++ - '0'))
50                     return eaten();
51
52         } else if (p - p2 == 3)
53             // (year)WwwD
54             if (getMonthAndDayFromWeek(date, i / 10, i % 10))
55                 return eaten();
56
57         return onlyYear;
58     }
59
60     // next up, MM or MM[-]DD or DDD
61
62     T* p2 = p;
63
64     int i = parseInt(p);
65     if (!i)
66         return onlyYear;
67
68     switch (p - p2) {
69         case 2:
70             date.month = i;
71
72             if (!(date.month >= 1 && date.month <= 12)) {
73                 date.month = 0;
74                 return onlyYear;
75             }
76
77             auto onlyMonth = eaten();
78
79             // (year)-MM
80             if (done("-"))
81                 return onlyMonth;
82
83             // (year)-MM-DD
84             if (!(
85                 demand(p, '-') &&
86                 (date.day = parseInt(p, 2u)) != 0 && date.day <= daysPerMonth(date.month, date.year)
87             )) {
88                 date.day = 0;
89                 return onlyMonth;
90             }
91
92             break;
93
94         case 4:
95             // e.g. 20010203, i = 203 now
96
97             date.month = i / 100;
98             date.day   = i % 100;
99
100             // (year)MMDD
101             if (!(
102                 date.month >= 1 && date.month <= 12 &&
103                 date.day   >= 0 && date.day   <= daysPerMonth(date.month, date.year)
104             )) {
105                 date.month = date.day = 0;
106                 return onlyYear;
107             }
108
109             break;
110
111         case 3:
112             // (year)-DDD
113             // i is the ordinal of the day within the year
114
115             bool leap = isLeapYear(date.year);
116
117             if (i > 365 + leap)
118                 return onlyYear;
119
120             if (i <= 31) {
121                 date.month = 1;
122                 date.day   = i;
123
124             } else if (i <= 59 + leap) {
125                 date.month = 2;
126                 date.day   = i - 31 - leap;
127
128             } else if (i <= 90 + leap) {
129                 date.month = 3;
130                 date.day   = i - 59 - leap;
131
132             } else if (i <= 120 + leap) {
133                 date.month = 4;
134                 date.day   = i - 90 - leap;
135
136             } else if (i <= 151 + leap) {
137                 date.month = 5;
138                 date.day   = i - 120 - leap;
139
140             } else if (i <= 181 + leap) {
141                 date.month = 6;
142                 date.day   = i - 151 - leap;
143
144             } else if (i <= 212 + leap) {
145                 date.month = 7;
146                 date.day   = i - 181 - leap;
147
148             } else if (i <= 243 + leap) {
149                 date.month = 8;
150                 date.day   = i - 212 - leap;
151
152             } else if (i <= 273 + leap) {
153                 date.month = 9;
154                 date.day   = i - 243 - leap;
155
156             } else if (i <= 304 + leap) {
157                 date.month = 10;
158                 date.day   = i - 273 - leap;
159
160             } else if (i <= 334 + leap) {
161                 date.month = 11;
162                 date.day   = i - 304 - leap;
163
164             } else {
165                 if (i > 365 + leap)
166                     assert (false);
167
168                 date.month = 12;
169                 date.day   = i - 334 - leap;
170             }
171
172         default: break;
173     }
174
175     return eaten();
176 }
177
178 /// Returns the number of chars used to compose a valid date: 0 if no date can be composed.
179 /// Fields in date will be zero if incorrect: since 00:00:00,000 is a valid time, the return value must be checked to be sure of the result.
180 /// date.sec may be 60 if the hours and minutes are 23 and 59, as leap seconds are occasionally added to UTC time.
181 /// date.hour may be 0 or 24: the latter marks the end of a day, the former the beginning.
182
183 size_t iso8601Time(T)(T[] src, inout Date date) {
184     bool dummy = void;
185     T* p = src.ptr;
186     return doIso8601Time(p, src, date, WHATEVER, dummy);
187 }
188
189 private enum : ubyte { NO = 0, YES = 1, WHATEVER }
190
191 // bothValid is used only to get iso8601() to catch errors correctly
192 private size_t doIso8601Time(T)(inout T* p, T[] src, inout Date date, ubyte separators, out bool bothValid)
193 out {
194     // yes, I could just write >= 0, but this emphasizes the difference between == 0 and != 0
195     assert (!date.hour || (date.hour > 0 && date.hour <=  24));
196     assert (!date.min  || (date.min  > 0 && date.min  <=  59));
197     assert (!date.sec  || (date.sec  > 0 && date.sec  <=  60));
198     assert (!date.ms   || (date.ms   > 0 && date.ms   <= 999));
199 } body {
200
201     size_t eaten() { return p - src.ptr; }
202     bool done(T[] s) { return .done(eaten(), src.length, *p, s); }
203
204     bool checkColon() {
205         if (separators == WHATEVER)
206             accept(p, ':');
207
208         else if (accept(p, ':') != separators)
209             return false;
210
211         return true;
212     }
213
214     byte getTimeZone() { return .getTimeZone(p, date, separators, &done); }
215
216     // TODO/BUG: need to convert from local time if got T
217     // however, Tango provides nothing like Phobos's std.date.getLocalTZA
218     // (which doesn't look like it should work on Windows, it should use tzi.bias only, and GetTimeZoneInformationForYear)
219     // (and which uses too complicated code for Posix, tzset should be enough)
220     // and I'm not interested in delving into system-specific code right now
221     // remember also that -1 BC is the year zero in ISO 8601... -2 BC is -1, etc
222     if (separators == WHATEVER)
223         accept(p, 'T');
224
225     if (parseInt(p, 2u, date.hour) != 2 || date.hour > 24)
226         return (date.hour = 0);
227
228     auto onlyHour = eaten();
229
230     // hh
231     if (done("+,-.012345:"))
232         return onlyHour;
233
234     switch (getDecimal(p, date, HOUR)) {
235         case NOTFOUND: break;
236         case    FOUND:
237             auto onlyDecimal = eaten();
238             if (getTimeZone() == BAD)
239                 return onlyDecimal;
240
241             // /hh,h+/
242             return eaten();
243
244         case BAD: return onlyHour;
245         default: assert (false);
246     }
247
248     switch (getTimeZone()) {
249         case NOTFOUND: break;
250         case    FOUND: return eaten();
251         case BAD:      return onlyHour;
252         default: assert (false);
253     }
254
255     if (
256         !checkColon() ||
257
258         parseInt(p, 2u, date.min) != 2 || date.min > 59 ||
259
260         // hour 24 is only for 24:00:00
261         (date.hour == 24 && date.min != 0)
262     ) {
263         date.min = 0;
264         return onlyHour;
265     }
266
267     auto onlyMinute = eaten();
268
269     // hh:mm
270     if (done("+,-.0123456:")) {
271         bothValid = true;
272         return onlyMinute;
273     }
274
275     switch (getDecimal(p, date, MINUTE)) {
276         case NOTFOUND: break;
277         case    FOUND:
278             auto onlyDecimal = eaten();
279             if (getTimeZone() == BAD)
280                 return onlyDecimal;
281
282             // /hh:mm,m+/
283             bothValid = true;
284             return eaten();
285
286         case BAD: return onlyMinute;
287         default: assert (false);
288     }
289
290     switch (getTimeZone()) {
291         case NOTFOUND: break;
292         case    FOUND: bothValid = true; return eaten();
293         case BAD:      return onlyMinute;
294         default: assert (false);
295     }
296
297     if (
298         !checkColon() ||
299
300         parseInt(p, 2u, date.sec) != 2 || date.sec > 60 ||
301
302         (date.hour == 24 && date.sec  != 0) ||
303         (date.sec  == 60 && date.hour != 23 && date.min != 59)
304     ) {
305         date.sec = 0;
306         return onlyMinute;
307     }
308
309     auto onlySecond = eaten();
310
311     // hh:mm:ss
312     if (done("+,-.Z")) {
313         bothValid = true;
314         return onlySecond;
315     }
316
317     switch (getDecimal(p, date, SECOND)) {
318         case NOTFOUND: break;
319         case    FOUND:
320             auto onlyDecimal = eaten();
321             if (getTimeZone() == BAD)
322                 return onlyDecimal;
323
324             // /hh:mm:ss,s+/
325             bothValid = true;
326             return eaten();
327
328         case BAD: return onlySecond;
329         default: assert (false);
330     }
331
332     if (getTimeZone() == BAD)
333         return onlySecond;
334     else {
335         bothValid = true;
336         return eaten(); // hh:mm:ss with timezone
337     }
338 }
339
340 // combination of date and time
341 // stricter than just date followed by time:
342 //  can't have an expanded or reduced date
343 //  either use separators everywhere or not at all
344
345 /// This function is very strict: either a complete date and time can be extracted, or nothing can.
346 /// If this function returns zero, the fields of date are undefined.
347
348 size_t iso8601(T)(T[] src, inout Date date) {
349     T* p = src.ptr;
350     ubyte sep;
351     bool bothValid = false;
352
353     if (
354         doIso8601Date(p, src, date, 0u, sep) &&
355         date.year && date.month && date.day &&
356
357         // by mutual agreement this T may be omitted
358         // but this is just a convenience method for date+time anyway
359         demand(p, 'T') &&
360
361         doIso8601Time(p, src, date, sep, bothValid) &&
362         bothValid
363     )
364         return p - src.ptr;
365     else
366         return 0;
367 }
368
369 /+ +++++++++++++++++++++++++++++++++++++++ +\
370
371    Privates used by date
372
373 \+ +++++++++++++++++++++++++++++++++++++++ +/
374
375 // /([+-]Y{expanded})?(YYYY|YY)/
376 private bool parseYear(T)(inout T* p, size_t expanded, out int year) {
377
378     bool doParse() {
379         T* p2 = p;
380
381         if (!parseInt(p, expanded + 4u, year))
382             return false;
383
384         // it's Y{expanded}YY, Y{expanded}YYYY, or unacceptable
385
386         if (p - p2 - expanded == 2u)
387             year *= 100;
388         else if (p - p2 - expanded != 4u)
389             return false;
390
391         return true;
392     }
393
394     if (accept(p, '-')) {
395         if (!doParse())
396             return false;
397         year = -year;
398     } else {
399         accept(p, '+');
400         if (!doParse())
401             return false;
402     }
403
404     return true;
405 }
406
407 // find the month and day based on the calendar week
408 // uses date.year for leap year calculations
409 // returns false if week and date.year are incompatible
410 // based on the VBA function at http://www.probabilityof.com/ISO8601.shtml
411 private bool getMonthAndDayFromWeek(inout Date date, int week, int day = 1) {
412     if (week < 1 || week > 53 || day < 1 || day > 7)
413         return false;
414
415     bool leap = isLeapYear(date.year);
416
417     // only years starting with Thursday and
418     // leap years starting with Wednesday have 53 weeks
419
420     if (week == 53) {
421         int startingDay = dayOfWeek(date.year, 1, 1, leap);
422
423         if (!(startingDay == 4 || (leap && startingDay == 3)))
424             return false;
425     }
426
427     // days since year-01-04
428     int delta = 7*(week - 1) - dayOfWeek(date.year, 1, 4, leap) + day;
429
430     if (delta <= -4) {
431         if (delta < -7)
432             assert (false);
433
434         --date.year;
435         date.month = 12;
436         date.day   = delta + 4 + 31;
437
438     } else if (delta <= 27) {
439         date.month = 1;
440         date.day   = delta + 4;
441
442     } else if (delta <= 56 + leap) {
443         date.month = 2;
444         date.day   = delta - 27;
445
446     } else if (delta <= 87 + leap) {
447         date.month = 3;
448         date.day   = delta - 55 - leap;
449
450     } else if (delta <= 117 + leap) {
451         date.month = 4;
452         date.day   = delta - 86 - leap;
453
454     } else if (delta <= 148 + leap) {
455         date.month = 5;
456         date.day   = delta - 116 - leap;
457
458     } else if (delta <= 178 + leap) {
459         date.month = 6;
460         date.day   = delta - 147 - leap;
461
462     } else if (delta <= 209 + leap) {
463         date.month = 7;
464         date.day   = delta - 177 - leap;
465
466     } else if (delta <= 240 + leap) {
467         date.month = 8;
468         date.day   = delta - 208 - leap;
469
470     } else if (delta <= 270 + leap) {
471         date.month = 9;
472         date.day   = delta - 239 - leap;
473
474     } else if (delta <= 301 + leap) {
475         date.month = 10;
476         date.day   = delta - 269 - leap;
477
478     } else if (delta <= 331 + leap) {
479         date.month = 11;
480         date.day   = delta - 300 - leap;
481
482     } else if (delta <= 361 + leap) {
483         date.month = 12;
484         date.day   = delta - 330 - leap;
485
486     } else {
487         if (delta > 365 + leap)
488             assert (false);
489
490         ++date.year;
491         date.month = 1;
492         date.day   = delta - 365 - leap + 4;
493     }
494
495     return true;
496 }
497
498 private bool isLeapYear(int year) {
499     return year % 4 == 0 && (year % 100 != 0 || year % 400 == 0);
500 }
501
502 // Babwani's Congruence
503 private int dayOfWeek(int year, int month, int day, bool leap)
504 in {
505     assert (month  >= 1 && month  <= 12);
506     assert (day    >= 1 && day    <= 31);
507
508     // BUG: only works for 1900-3-1 to 2100-2-28
509     assert (year >= 1901 && year <= 2099, "iso8601 :: Can't calculate day of week outside the years 1900-2099");
510
511 } out(result) {
512     assert (result >= 1 && result <= 7);
513
514 } body {
515     int f() {
516         if (leap && month <= 2)
517             return [6,2][month-1];
518
519         return [0,3,3,6,1,4,6,2,5,0,3,5][month-1];
520     }
521
522     int d = ((5*(year % 100) / 4) - 2*((year / 100) % 4) + f() + day) % 7;
523
524     // defaults to Saturday=0, Friday=6: convert to Monday=1, Sunday=7
525     return (d <= 1 ? d+6 : d-1);
526 }
527
528 /+ +++++++++++++++++++++++++++++++++++++++ +\
529
530    Privates used by time
531
532 \+ +++++++++++++++++++++++++++++++++++++++ +/
533
534 private enum : ubyte { HOUR, MINUTE, SECOND }
535 private enum :  byte { BAD, FOUND, NOTFOUND }
536
537 private byte getDecimal(T)(inout T* p, inout Date date, ubyte which) {
538     if (accept(p, ',') || accept(p, '.')) {
539
540         T* p2 = p;
541
542         int i;
543         size_t iLen = parseInt(p, i);
544
545         if (
546             iLen == 0 ||
547
548             // if i is 0, must have at least 3 digits
549             // ... or at least that's what I think the standard means
550             // when it says "[i]f the magnitude of the number is less
551             // than unity, the decimal sign shall be preceded by two
552             // zeros"...
553             // surely that should read "followed" and not "preceded"
554
555             (i == 0 && iLen < 3)
556         )
557             return BAD;
558
559         // 10 to the power of (iLen - 1)
560         int pow = 1;
561         while (--iLen)
562             pow *= 10;
563
564         switch (which) {
565             case HOUR:
566                 date.min = 6 * i / pow;
567                 date.sec = 6 * i % pow;
568                 break;
569             case MINUTE:
570                 date.sec = 6    * i / pow;
571                 date.ms  = 6000 * i / pow % 1000;
572                 break;
573             case SECOND:
574                 date.ms = 100 * i / pow;
575                 break;
576
577             default: assert (false);
578         }
579
580         return FOUND;
581     }
582
583     return NOTFOUND;
584 }
585
586 // the Date is always UTC, so this just adds the offset to the date fields
587 // another option would be to add time zone fields to Date and have this fill them
588
589 private byte <