1 Add support for use of the system timezone database, rather
2 than embedding a copy. Discussed upstream but was not desired.
5 r10 : make timezone case insensitive
6 r9: fix another compile error without --with-system-tzdata configured (Michael Heimpold)
7 r8: fix compile error without --with-system-tzdata configured
8 r7: improve check for valid timezone id to exclude directories
9 r6: fix fd leak in r5, fix country code/BC flag use in
10 timezone_identifiers_list() using system db,
11 fix use of PECL timezonedb to override system db,
12 r5: reverts addition of "System/Localtime" fake tzname.
13 updated for 5.3.0, parses zone.tab to pick up mapping between
14 timezone name, country code and long/lat coords
15 r4: added "System/Localtime" tzname which uses /etc/localtime
16 r3: fix a crash if /usr/share/zoneinfo doesn't exist (Raphael Geissert)
17 r2: add filesystem trawl to set up name alias index
20 --- a/ext/date/lib/parse_tz.c
21 +++ b/ext/date/lib/parse_tz.c
26 +#ifdef HAVE_SYSTEM_TZDATA
27 +#include <sys/mman.h>
28 +#include <sys/stat.h>
33 +#include "php_scandir.h"
44 +#ifndef HAVE_SYSTEM_TZDATA
45 #include "timezonedb.h"
50 #if (defined(__APPLE__) || defined(__APPLE_CC__)) && (defined(__BIG_ENDIAN__) || defined(__LITTLE_ENDIAN__))
51 # if defined(__LITTLE_ENDIAN__)
54 static void read_preamble(const unsigned char **tzf, timelib_tzinfo *tz)
59 + if (memcmp(tzf, "TZif", 4) == 0) {
68 tz->bc = (**tzf == '\1');
74 -static int seek_to_tz_position(const unsigned char **tzf, char *timezone, const timelib_tzdb *tzdb)
75 +#ifdef HAVE_SYSTEM_TZDATA
77 +#ifdef HAVE_SYSTEM_TZDATA_PREFIX
78 +#define ZONEINFO_PREFIX HAVE_SYSTEM_TZDATA_PREFIX
80 +#define ZONEINFO_PREFIX "/usr/share/zoneinfo"
83 +/* System timezone database pointer. */
84 +static const timelib_tzdb *timezonedb_system;
86 +/* Hash table entry for the cache of the zone.tab mapping table. */
87 +struct location_info {
89 + double latitude, longitude;
92 + struct location_info *next;
95 +/* Cache of zone.tab. */
96 +static struct location_info **system_location_table;
98 +/* Size of the zone.tab hash table; a random-ish prime big enough to
99 + * prevent too many collisions. */
100 +#define LOCINFO_HASH_SIZE (1021)
102 +/* Compute a case insensitive hash of str */
103 +static uint32_t tz_hash(const char *str)
105 + const unsigned char *p = (const unsigned char *)str;
106 + uint32_t hash = 5381;
109 + while ((c = tolower(*p++)) != '\0') {
110 + hash = (hash << 5) ^ hash ^ c;
113 + return hash % LOCINFO_HASH_SIZE;
116 +/* Parse an ISO-6709 date as used in zone.tab. Returns end of the
117 + * parsed string on success, or NULL on parse error. On success,
118 + * writes the parsed number to *result. */
119 +static char *parse_iso6709(char *p, double *result)
127 + else if (*p == '-')
133 + for (pend = p; *pend >= '0' && *pend <= '9'; pend++)
136 + /* Annoying encoding used by zone.tab has no decimal point, so use
137 + * the length to determine the format:
145 + if (len < 4 || len > 7) {
150 + v = (p[0] - '0') * 10.0 + (p[1] - '0');
152 + if (len == 5 || len == 7)
153 + v = v * 10.0 + (*p++ - '0');
155 + v += (10.0 * (p[0] - '0')
156 + + p[1] - '0') / 60.0;
160 + v += (10.0 * (p[0] - '0')
161 + + p[1] - '0') / 3600.0;
165 + /* Round to five decimal place, not because it's a good idea,
166 + * but, because the builtin data uses rounded data, so, match
168 + *result = round(v * sign * 100000.0) / 100000.0;
173 +/* This function parses the zone.tab file to build up the mapping of
174 + * timezone to country code and geographic location, and returns a
175 + * hash table. The hash table is indexed by the function:
177 + * tz_hash(timezone-name)
179 +static struct location_info **create_location_table(void)
181 + struct location_info **li, *i;
182 + char zone_tab[PATH_MAX];
186 + strncpy(zone_tab, ZONEINFO_PREFIX "/zone.tab", sizeof zone_tab);
188 + fp = fopen(zone_tab, "r");
193 + li = calloc(LOCINFO_HASH_SIZE, sizeof *li);
195 + while (fgets(line, sizeof line, fp)) {
196 + char *p = line, *code, *name, *comment;
198 + double latitude, longitude;
200 + while (isspace(*p))
203 + if (*p == '#' || *p == '\0' || *p == '\n')
206 + if (!isalpha(p[0]) || !isalpha(p[1]) || p[2] != '\t')
214 + /* coords => [+-][D]DDMM[SS][+-][D]DDMM[SS] */
215 + p = parse_iso6709(p, &latitude);
219 + p = parse_iso6709(p, &longitude);
224 + if (!p || *p != '\t') {
228 + /* name = string */
230 + while (*p != '\t' && *p && *p != '\n')
235 + /* comment = string */
237 + while (*p != '\t' && *p && *p != '\n')
240 + if (*p == '\n' || *p == '\t')
243 + hash = tz_hash(name);
244 + i = malloc(sizeof *i);
245 + memcpy(i->code, code, 2);
246 + strncpy(i->name, name, sizeof i->name);
247 + i->comment = strdup(comment);
248 + i->longitude = longitude;
249 + i->latitude = latitude;
250 + i->next = li[hash];
252 + /* printf("%s [%u, %f, %f]\n", name, hash, latitude, longitude); */
260 +/* Return location info from hash table, using given timezone name.
261 + * Returns NULL if the name could not be found. */
262 +const struct location_info *find_zone_info(struct location_info **li,
265 + uint32_t hash = tz_hash(name);
266 + const struct location_info *l;
272 + for (l = li[hash]; l; l = l->next) {
273 + if (strcasecmp(l->name, name) == 0)
280 +/* Filter out some non-tzdata files and the posix/right databases, if
282 +static int index_filter(const struct dirent *ent)
284 + return strcmp(ent->d_name, ".") != 0
285 + && strcmp(ent->d_name, "..") != 0
286 + && strcmp(ent->d_name, "posix") != 0
287 + && strcmp(ent->d_name, "posixrules") != 0
288 + && strcmp(ent->d_name, "right") != 0
289 + && strstr(ent->d_name, ".tab") == NULL;
292 +static int sysdbcmp(const void *first, const void *second)
294 + const timelib_tzdb_index_entry *alpha = first, *beta = second;
296 + return strcmp(alpha->id, beta->id);
300 +/* Create the zone identifier index by trawling the filesystem. */
301 +static void create_zone_index(timelib_tzdb *db)
303 + size_t dirstack_size, dirstack_top;
304 + size_t index_size, index_next;
305 + timelib_tzdb_index_entry *db_index;
308 + /* LIFO stack to hold directory entries to scan; each slot is a
309 + * directory name relative to the zoneinfo prefix. */
310 + dirstack_size = 32;
311 + dirstack = malloc(dirstack_size * sizeof *dirstack);
313 + dirstack[0] = strdup("");
317 + db_index = malloc(index_size * sizeof *db_index);
321 + struct dirent **ents;
322 + char name[PATH_MAX], *top;
325 + /* Pop the top stack entry, and iterate through its contents. */
326 + top = dirstack[--dirstack_top];
327 + snprintf(name, sizeof name, ZONEINFO_PREFIX "/%s", top);
329 + count = php_scandir(name, &ents, index_filter, php_alphasort);
331 + while (count > 0) {
333 + const char *leaf = ents[count - 1]->d_name;
335 + snprintf(name, sizeof name, ZONEINFO_PREFIX "/%s/%s",
338 + if (strlen(name) && stat(name, &st) == 0) {
339 + /* Name, relative to the zoneinfo prefix. */
340 + const char *root = top;
342 + if (root[0] == '/') root++;
344 + snprintf(name, sizeof name, "%s%s%s", root,
345 + *root ? "/": "", leaf);
347 + if (S_ISDIR(st.st_mode)) {
348 + if (dirstack_top == dirstack_size) {
349 + dirstack_size *= 2;
350 + dirstack = realloc(dirstack,
351 + dirstack_size * sizeof *dirstack);
353 + dirstack[dirstack_top++] = strdup(name);
356 + if (index_next == index_size) {
358 + db_index = realloc(db_index,
359 + index_size * sizeof *db_index);
362 + db_index[index_next++].id = strdup(name);
366 + free(ents[--count]);
369 + if (count != -1) free(ents);
371 + } while (dirstack_top);
373 + qsort(db_index, index_next, sizeof *db_index, sysdbcmp);
375 + db->index = db_index;
376 + db->index_size = index_next;
381 +#define FAKE_HEADER "1234\0??\1??"
382 +#define FAKE_UTC_POS (7 - 4)
384 +/* Create a fake data segment for database 'sysdb'. */
385 +static void fake_data_segment(timelib_tzdb *sysdb,
386 + struct location_info **info)
391 + data = malloc(3 * sysdb->index_size + 7);
393 + p = mempcpy(data, FAKE_HEADER, sizeof(FAKE_HEADER) - 1);
395 + for (n = 0; n < sysdb->index_size; n++) {
396 + const struct location_info *li;
397 + timelib_tzdb_index_entry *ent;
399 + ent = (timelib_tzdb_index_entry *)&sysdb->index[n];
401 + /* Lookup the timezone name in the hash table. */
402 + if (strcmp(ent->id, "UTC") == 0) {
403 + ent->pos = FAKE_UTC_POS;
407 + li = find_zone_info(info, ent->id);
409 + /* If found, append the BC byte and the
410 + * country code; set the position for this
411 + * section of timezone data. */
412 + ent->pos = (p - data) - 4;
414 + *p++ = li->code[0];
415 + *p++ = li->code[1];
418 + /* If not found, the timezone data can
419 + * point at the header. */
424 + sysdb->data = (unsigned char *)data;
427 +/* Returns true if the passed-in stat structure describes a
428 + * probably-valid timezone file. */
429 +static int is_valid_tzfile(const struct stat *st)
431 + return S_ISREG(st->st_mode) && st->st_size > 20;
434 +/* Return the mmap()ed tzfile if found, else NULL. On success, the
435 + * length of the mapped data is placed in *length. */
436 +static char *map_tzfile(const char *timezone, size_t *length)
438 + char fname[PATH_MAX];
443 + if (timezone[0] == '\0' || strstr(timezone, "..") != NULL) {
447 + if (system_location_table) {
448 + const struct location_info *li;
449 + if ((li = find_zone_info(system_location_table, timezone)) != NULL) {
450 + /* Use the stored name to avoid case issue */
451 + timezone = li->name;
454 + snprintf(fname, sizeof fname, ZONEINFO_PREFIX "/%s", timezone);
456 + fd = open(fname, O_RDONLY);
459 + } else if (fstat(fd, &st) != 0 || !is_valid_tzfile(&st)) {
464 + *length = st.st_size;
465 + p = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
468 + return p != MAP_FAILED ? p : NULL;
473 +static int inmem_seek_to_tz_position(const unsigned char **tzf, char *timezone, const timelib_tzdb *tzdb)
475 int left = 0, right = tzdb->index_size - 1;
476 #ifdef HAVE_SETLOCALE
477 @@ -295,36 +713,135 @@
481 +static int seek_to_tz_position(const unsigned char **tzf, char *timezone,
482 + char **map, size_t *maplen,
483 + const timelib_tzdb *tzdb)
485 +#ifdef HAVE_SYSTEM_TZDATA
486 + if (tzdb == timezonedb_system) {
489 + orig = map_tzfile(timezone, maplen);
490 + if (orig == NULL) {
494 + (*tzf) = (unsigned char *)orig ;
502 + return inmem_seek_to_tz_position(tzf, timezone, tzdb);
506 const timelib_tzdb *timelib_builtin_db(void)
508 +#ifdef HAVE_SYSTEM_TZDATA
509 + if (timezonedb_system == NULL) {
510 + timelib_tzdb *tmp = malloc(sizeof *tmp);
512 + tmp->version = "0.system";
514 + create_zone_index(tmp);
515 + system_location_table = create_location_table();
516 + fake_data_segment(tmp, system_location_table);
517 + timezonedb_system = tmp;
521 + return timezonedb_system;
523 return &timezonedb_builtin;
527 const timelib_tzdb_index_entry *timelib_timezone_builtin_identifiers_list(int *count)
529 +#ifdef HAVE_SYSTEM_TZDATA
530 + *count = timezonedb_system->index_size;
531 + return timezonedb_system->index;
533 *count = sizeof(timezonedb_idx_builtin) / sizeof(*timezonedb_idx_builtin);
534 return timezonedb_idx_builtin;
538 int timelib_timezone_id_is_valid(char *timezone, const timelib_tzdb *tzdb)
540 const unsigned char *tzf;
541 - return (seek_to_tz_position(&tzf, timezone, tzdb));
543 +#ifdef HAVE_SYSTEM_TZDATA
544 + if (tzdb == timezonedb_system) {
545 + char fname[PATH_MAX];
548 + if (timezone[0] == '\0' || strstr(timezone, "..") != NULL) {
552 + if (system_location_table) {
553 + if (find_zone_info(system_location_table, timezone) != NULL) {
554 + /* found in cache */
559 + snprintf(fname, sizeof fname, ZONEINFO_PREFIX "/%s", timezone);
561 + return stat(fname, &st) == 0 && is_valid_tzfile(&st);
565 + return (inmem_seek_to_tz_position(&tzf, timezone, tzdb));
568 timelib_tzinfo *timelib_parse_tzfile(char *timezone, const timelib_tzdb *tzdb)
570 const unsigned char *tzf;
571 + char *memmap = NULL;
575 - if (seek_to_tz_position(&tzf, timezone, tzdb)) {
576 + if (seek_to_tz_position(&tzf, timezone, &memmap, &maplen, tzdb)) {
577 tmp = timelib_tzinfo_ctor(timezone);
579 read_preamble(&tzf, tmp);
580 read_header(&tzf, tmp);
581 read_transistions(&tzf, tmp);
582 read_types(&tzf, tmp);
583 - read_location(&tzf, tmp);
585 +#ifdef HAVE_SYSTEM_TZDATA
587 + const struct location_info *li;
589 + /* TZif-style - grok the location info from the system database,
592 + if ((li = find_zone_info(system_location_table, timezone)) != NULL) {
593 + tmp->location.comments = strdup(li->comment);
594 + strncpy(tmp->location.country_code, li->code, 2);
595 + tmp->location.longitude = li->longitude;
596 + tmp->location.latitude = li->latitude;
600 + strcpy(tmp->location.country_code, "??");
602 + tmp->location.comments = strdup("");
605 + /* Now done with the mmap segment - discard it. */
606 + munmap(memmap, maplen);
610 + /* PHP-style - use the embedded info. */
611 + read_location(&tzf, tmp);
616 --- a/ext/date/lib/timelib.m4
617 +++ b/ext/date/lib/timelib.m4
618 @@ -78,3 +78,17 @@ stdlib.h
620 dnl Check for strtoll, atoll
621 AC_CHECK_FUNCS(strtoll atoll strftime)
623 +PHP_ARG_WITH(system-tzdata, for use of system timezone data,
624 +[ --with-system-tzdata[=DIR] to specify use of system timezone data],
627 +if test "$PHP_SYSTEM_TZDATA" != "no"; then
628 + AC_DEFINE(HAVE_SYSTEM_TZDATA, 1, [Define if system timezone data is used])
630 + if test "$PHP_SYSTEM_TZDATA" != "yes"; then
631 + AC_DEFINE_UNQUOTED(HAVE_SYSTEM_TZDATA_PREFIX, "$PHP_SYSTEM_TZDATA",
632 + [Define for location of system timezone data])