// baltzo_zoneinfobinaryreader.h -*-C++-*- #ifndef INCLUDED_BALTZO_ZONEINFOBINARYREADER #define INCLUDED_BALTZO_ZONEINFOBINARYREADER #include <bsls_ident.h> BSLS_IDENT("$Id: $") //@PURPOSE: Provide utilities for reading the Zoneinfo binary data format. // //@CLASSES: // baltzo::ZoneinfoBinaryReader: utilities for reading Zoneinfo binary data // //@SEE_ALSO: baltzo_zoneinfo // //@DESCRIPTION: This component provides a 'struct', // 'baltzo::ZoneinfoBinaryReader', containing utilities to read the binary data // format of the Zoneinfo database from a 'bsl::istream' and populate a // 'baltzo::Zoneinfo' object. The Zoneinfo database (also known as the Olson // database or TZ database) records information about the world's time zones. // It includes information such as the historic changes to the rules of local // times, the time of transitions to daylight saving time, and corrections for // leap seconds. // ///Zoneinfo (TZ Database) Files ///---------------------------- // The Zoneinfo binary data is generated from a set of text-based rule file. A // rule file may be compiled into multiple binary files, each describing one // time zone. Information about the Zoneinfo database can be found online at // 'http://www.iana.org/time-zones', including the time zone rules for the // supported time zones, and source code for the 'zic' compiler (for compiling // those rules into the binary representation read by utility functions // provided by this component). // ///Zoneinfo Binary File Format ///--------------------------- // A binary byte stream conforming to the Zoneinfo binary data format begins // with a 44-byte header that describes the file. The structure of the header // is shown below: //.. // Byte | Name | Description // ---------+-------------------+---------------------------------------------- // 0 - 3 | 'T''Z''i''f' | magic characters to identify the file // 4 | version | '\0', '2', or '3' to identify the version // 5 - 19 | reserved | unused // 20 - 23 | numIsGmt | number of UTC/local-time indicators // 24 - 27 | numIsStd | number of standard/local-time indicators // 28 - 31 | numLeaps | number of leap seconds corrections // 32 - 35 | numTransitions | number of transition times // 36 - 39 | numLocalTimeTypes | number of local-time types // 40 - 43 | numAbbrevChar | length of time-zone abbreviation string // ---------+-------------------+---------------------------------------------- //.. // NOTE: All the numbers are 4-byte signed integers stored in big-endian format // (higher order byte first). // // The binary file has the following layout: //.. // File Segment Number of bytes // +--------------------------------+ --------------- // | HEADER | 44 // +--------------------------------+ // | TRANSITION TIMES | 4 * header.numTransitions // +--------------------------------+ // | TRANSITION-TIME INDEXES | 1 * header.numTransitions // +--------------------------------+ // | LOCAL-TIME TYPES | (4+1+1) * header.numLocalTimeTypes // +--------------------------------+ // | TIME-ZONE ABBREVIATION STRINGS | 1 * header.numAbbrevChar // +--------------------------------+ // | LEAP SECONDS | (4+4) * header.numLeaps // +--------------------------------+ // | IS_GMT(s) | 1 * header.numIsGmt // +--------------------------------+ // | IS_STD(s) | 1 * header.numIsStd // +--------------------------------+ //.. //: o HEADER -- The header is 44 bytes that describe the file. //: //: o TRANSITION TIMES -- 'numTransitions' number of 4-byte big-endian signed //: integers representing the transition times in POSIX time format (number //: of seconds elapsed since midnight UTC, January 1, 1970) sorted in //: ascending order. Each transition time represents a time at which the //: rule for computing the local time in that time zone changes. (NOTE: See //: "Version '2'" section for additional information.) //: //: o TRANSITION-TIME INDEXES -- 'numTransitions' number of unsigned bytes. //: The byte at a given offset N stores the index of the local-time type //: entry (in the array of local-time types appearing next in the file) that //: corresponds to the transition at index N in the preceding sequence of //: transitions. //: //: o LOCAL-TIME TYPES -- An array of 'numLocalTimeTypes' local-time types, //: where each entry consist of: //: o gmtOffset -- 4-bytes big-endian signed integer indicating the number of //: seconds added to UTC to compute the local time in the time zone //: o isDst -- 1-byte boolean indicating whether the local-time type is //: considered daylight saving time //: o abbreviationIndex -- 1 unsigned byte indicating the index of the first //: byte of the null terminated string abbreviation of this local-time type //: in the array of bytes holding abbreviation strings later in the file. //: //: o TIME-ZONE ABBREVIATION STRINGS -- 'numAbbrevChar' number of bytes //: containing the abbreviation strings of the local-time types of this time //: zone. Each abbreviation string ends with the '\0' character. //: //: o LEAP SECONDS -- 'numLeaps' number of *pairs* of 4-byte signed big-endian //: integers (totaling 8 bytes). The first value of the pair is the POSIX //: time at which the leap seconds occur. The second value of the pair gives //: the number of leap seconds to be applied to times after the time defined //: by the first element of the pair. The pairs are stored in ascending //: order by time. (NOTE: See "Version '2'" section for additional //: information.) //: //: o IS_GMT(s) -- 'numIsGmt' number of 1-byte booleans indicating whether the //: transition time for the local-time type with the corresponding index in //: the preceding array of local-time types was specified in UTC time or //: local time in the original rule file (from which the binary file was //: presumably compiled) (see next section for details). //: //: o IS_STD(s) -- 'numIsStd' number of 1-byte booleans indicating whether the //: transition time for the local-time type with the corresponding index in //: the preceding array of local-time types was specified in standard time or //: local time in the original rule file (from which the binary file was //: presumably compiled). Standard time refers to the time without //: adjustment for daylight-saving time whether daylight saving is in effect //: or not. (see next section for details). // ///'isGmt' and 'isStd' ///- - - - - - - - - - // The 'isGmt' and 'isStd' boolean flags are not used by this component. // // They indicate properties of the original rule file that were compiled into // this binary representation. Each 'isGmt' boolean flag indicates whether a // transition time was originally specified as a UTC time or as a local time in // the *rule* *file*. 'isStd' indicates whether a transition time was // originally specified as a standard time or local time. Note that all the // transitions in the *binary* Zoneinfo representation are UTC times regardless // of whether the original representation in the rule file was in local time. // Also note that the value of 'numIsGmt' and 'numIsStd' should equal to // 'numLocalTimeTypes', but for backward compatibility reasons, they may be set // to zero. // ///Leap Corrections /// - - - - - - - - // Leap corrections are currently not supported by this component. Attempts to // read a stream containing leap corrections will result in an error. // // There is a slight difference between the mean length of a day and 86400 // (24 * 60 * 60) seconds. Leap corrections are adjustments to the UTC time to // account for this difference. In general, the time in a computer is updated // through the network to account for leap seconds. Thus, it is not necessary // to for this component to consider leap corrections for accurate time // calculations. // ///Version '2' ///- - - - - - // Version '2' format of the Zoneinfo binary data can be divided into two // parts. The first part contains the header and data described above. The // second part contains header and data in the same format with the exception // that eight bytes are used for transition time and leap correction time. The // second header and data is followed by a formatted string used for handling // time after the last transition time. // // The version '2' format uses 8 bytes to store date-time values because the // 4-byte values used in the version '\0' format provide a limited date range. // The standard IANA data contains many transitions that are outside the range // of representable values of a 4-byte offset (these transitions are ignored by // the version '\0' format, which leads to inaccurate information for dates far // in the past or future). // // The version '2' format adds an additional optional POSIX TZ environment // string, holding a text description of the local time transitions of the // timezone, which can be used to compute transitions beyond the range // represented in the binary compiled time zone data (see // ftp://ftp.iana.org/tz/code/tzfile.h). It is similar (but not necessarily // identical to) POSIX time zone description used for the 'TZ' environment // variable. String is found between two newline '\n' characters immediately // following the data. If two consecutive newline characters are found, no // string has been specified. // // This component will always load version '2' data if it is present in the // supplied binary data. // ///Version '3' ///- - - - - - // Version '3' format of the Zoneinfo binary data is identical to the version // '2' data described above. The version number of some files was incremented // because the rules for the allowable range of values for the POSIX TZ // environment string (found at the end of the data) were changed (see // ftp://ftp.iana.org/tz/code/tzfile.h for more information). // ///Additional Information /// - - - - - - - - - - - // Additional documentation for the Zoneinfo file format can be found at // various places on the internet: // //: o http://www.iana.org/time-zones -- Central links for Zoneinfo time-zone DB //: information. Up-to-date documentation on the file specification can be //: found inside the code distribution, in a file named 'tzfile.5.txt'. This //: file contains the most recent man-page text. //: //: o https://github.com/eggert/tz -- Unofficial GitHub repository for unstable //: changes not yet published into an official release on the IANA website. //: This repository is published and maintained by the official IANA //: maintainer, Paul Eggert. // ///Usage ///----- // In this section, we illustrate how this component can be used. // ///Example 1: Reading Zoneinfo Binary Data ///- - - - - - - - - - - - - - - - - - - - // The following demonstrates how to read a byte stream in the Zoneinfo binary // data format into a 'baltzo::Zoneinfo' object. We start by creating Zoneinfo // data in memory for "Asia/Bangkok", which was chosen due to its small size. // Note that this data was generated by the 'zic' compiler, which is publicly // obtainable as part of the standard Zoneinfo distribution (see // 'http://www.iana.org/time-zones'): //.. // const char ASIA_BANGKOK_DATA[] = { // 0x54, 0x5a, 0x69, 0x66, 0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, // 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, // 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x08, 0xa2, 0x6a, 0x67, 0xc4, // 0x01, 0x00, 0x00, 0x5e, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x62, 0x70, 0x00, // 0x04, 0x42, 0x4d, 0x54, 0x00, 0x49, 0x43, 0x54, 0x00, 0x00, 0x00, 0x00, // 0x00, 0x54, 0x5a, 0x69, 0x66, 0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x03, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x0c, 0xff, 0xff, 0xff, // 0xff, 0x56, 0xb6, 0x85, 0xc4, 0xff, 0xff, 0xff, 0xff, 0xa2, 0x6a, 0x67, // 0xc4, 0x01, 0x02, 0x00, 0x00, 0x5e, 0x3c, 0x00, 0x00, 0x00, 0x00, 0x5e, // 0x3c, 0x00, 0x04, 0x00, 0x00, 0x62, 0x70, 0x00, 0x08, 0x4c, 0x4d, 0x54, // 0x00, 0x42, 0x4d, 0x54, 0x00, 0x49, 0x43, 0x54, 0x00, 0x00, 0x00, 0x00, // 0x00, 0x00, 0x00, 0x0a, 0x49, 0x43, 0x54, 0x2d, 0x37, 0x0a // }; //.. // Then, we load this data into a stream buffer. //.. // bdlsb::FixedMemInStreamBuf inStreamBuf(ASIA_BANGKOK_DATA, // sizeof(ASIA_BANGKOK_DATA)); // bsl::istream inputStream(&inStreamBuf); //.. // Now, we read the 'inputStream' using 'baltzo::ZoneinfoBinaryReader::read'. //.. // // baltzo::Zoneinfo timeZone; // if (0 != baltzo::ZoneinfoBinaryReader::read(&timeZone, // inputStream)) { // bsl::cerr << "baltzo::ZoneinfoBinaryReader::load failed" // << bsl::endl; // return 1; //RETURN // } //.. // Finally, we write a description of the loaded Zoneinfo to the console. //.. // timeZone.print(bsl::cout, 1, 3); //.. // The output of the preceding statement should look like: //.. // [ // identifier = "" // transitions = [ // [ // time = 01JAN0001_00:00:00.000 // descriptor = [ // utcOffsetInSeconds = 24124 // dstInEffectFlag = false // description = "LMT" // ] // ] // [ // time = 31DEC1879_17:17:56.000 // descriptor = [ // utcOffsetInSeconds = 24124 // dstInEffectFlag = false // description = "BMT" // ] // ] // [ // time = 31MAR1920_17:17:56.000 // descriptor = [ // utcOffsetInSeconds = 25200 // dstInEffectFlag = false // description = "ICT" // ] // ] // ] // ] //.. #include <balscm_version.h> #include <bsl_iosfwd.h> namespace BloombergLP { namespace baltzo { class Zoneinfo; class ZoneinfoBinaryHeader; // =========================== // struct ZoneinfoBinaryReader // =========================== struct ZoneinfoBinaryReader { // This struct provides a namespace for functions that read Zoneinfo time // zone data from a binary input stream. The primary method, 'read', makes // use of a stream containing a Zoneinfo time zone database to populate a // 'Zoneinfo' object. // CLASS METHODS static int read(Zoneinfo *zoneinfoResult, bsl::istream& stream); static int read(Zoneinfo *zoneinfoResult, ZoneinfoBinaryHeader *headerResult, bsl::istream& stream); // Read time zone information from the specified 'stream', and load the // description into the specified 'zoneinfoResult'. Return 0 on // success and a non-zero value if 'stream' does not provide a sequence // of bytes consistent with the Zoneinfo binary format. If an error // occurs during the operation, 'zoneinfoResult' is unspecified. // Optionally specify a 'headerResult' that, on success, will be // populated with a summary of the 'stream' contents. }; } // close package namespace } // close enterprise namespace #endif // ---------------------------------------------------------------------------- // Copyright 2015 Bloomberg Finance L.P. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // ----------------------------- END-OF-FILE ----------------------------------