GeographicLib  1.21
Utility.hpp
Go to the documentation of this file.
00001 /**
00002  * \file Utility.hpp
00003  * \brief Header for GeographicLib::Utility class
00004  *
00005  * Copyright (c) Charles Karney (2011, 2012) <charles@karney.com> and licensed
00006  * under the MIT/X11 License.  For more information, see
00007  * http://geographiclib.sourceforge.net/
00008  **********************************************************************/
00009 
00010 #if !defined(GEOGRAPHICLIB_UTILITY_HPP)
00011 #define GEOGRAPHICLIB_UTILITY_HPP \
00012   "$Id: 92c92fcb8ea92116fed01909c2611934b708e4cd $"
00013 
00014 #include <GeographicLib/Constants.hpp>
00015 #include <iomanip>
00016 #include <vector>
00017 #include <string>
00018 #include <sstream>
00019 #include <algorithm>
00020 #include <cctype>
00021 
00022 namespace GeographicLib {
00023 
00024   /**
00025    * \brief Some utility routines for %GeographicLib
00026    *
00027    * Example of use:
00028    * \include example-Utility.cpp
00029    **********************************************************************/
00030   class GEOGRAPHIC_EXPORT Utility {
00031   private:
00032     static bool gregorian(int y, int m, int d) {
00033       // The original cut over to the Gregorian calendar in Pope Gregory XIII's
00034       // time had 1582-10-04 followed by 1582-10-15. Here we implement the
00035       // switch over used by the English-speaking world where 1752-09-02 was
00036       // followed by 1752-09-14. We also assume that the year always begins
00037       // with January 1, whereas in reality it often was reckoned to begin in
00038       // March.
00039       return 100 * (100 * y + m) + d >= 17520914; // or 15821004
00040     }
00041     static bool gregorian(int s) {
00042       return s >= 639799;       // 1752-09-14
00043     }
00044   public:
00045 
00046     /**
00047      * Convert a date to the day numbering sequentially starting with
00048      * 0001-01-01 as day 1.
00049      *
00050      * @param[in] y the year (must be positive).
00051      * @param[in] m the month, Jan = 1, etc. (must be positive).  Default = 1.
00052      * @param[in] d the day of the month (must be positive).  Default = 1.
00053      * @return the sequential day number.
00054      **********************************************************************/
00055     static int day(int y, int m = 1, int d = 1) throw() {
00056       // Convert from date to sequential day and vice versa
00057       //
00058       // Here is some code to convert a date to sequential day and vice
00059       // versa. The sequential day is numbered so that January 1, 1 AD is day 1
00060       // (a Saturday). So this is offset from the "Julian" day which starts the
00061       // numbering with 4713 BC.
00062       //
00063       // This is inspired by a talk by John Conway at the John von Neumann
00064       // National Supercomputer Center when he described his Doomsday algorithm
00065       // for figuring the day of the week. The code avoids explicitly doing ifs
00066       // (except for the decision of whether to use the Julian or Gregorian
00067       // calendar). Instead the equivalent result is achieved using integer
00068       // arithmetic. I got this idea from the routine for the day of the week
00069       // in MACLisp (I believe that that routine was written by Guy Steele).
00070       //
00071       // There are three issues to take care of
00072       //
00073       // 1. the rules for leap years,
00074       // 2. the inconvenient placement of leap days at the end of February,
00075       // 3. the irregular pattern of month lengths.
00076       //
00077       // We deal with these as follows:
00078       //
00079       // 1. Leap years are given by simple rules which are straightforward to
00080       // accommodate.
00081       //
00082       // 2. We simplify the calculations by moving January and February to the
00083       // previous year. Here we internally number the months March–December,
00084       // January, February as 0–9, 10, 11.
00085       //
00086       // 3. The pattern of month lengths from March through January is regular
00087       // with a 5-month period—31, 30, 31, 30, 31, 31, 30, 31, 30, 31, 31. The
00088       // 5-month period is 153 days long. Since February is now at the end of
00089       // the year, we don't need to include its length in this part of the
00090       // calculation.
00091       bool greg = gregorian(y, m, d);
00092       y += (m + 9) / 12 - 1; // Move Jan and Feb to previous year,
00093       m = (m + 9) % 12;      // making March month 0.
00094       return
00095         (1461 * y) / 4 // Julian years converted to days.  Julian year is 365 +
00096                        // 1/4 = 1461/4 days.
00097         // Gregorian leap year corrections.  The 2 offset with respect to the
00098         // Julian calendar synchronizes the vernal equinox with that at the time
00099         // of the Council of Nicea (325 AD).
00100         + (greg ? (y / 100) / 4 - (y / 100) + 2 : 0)
00101         + (153 * m + 2) / 5     // The zero-based start of the m'th month
00102         + d - 1                 // The zero-based day
00103         - 305; // The number of days between March 1 and December 31.
00104                // This makes 0001-01-01 day 1
00105     }
00106 
00107     /**
00108      * Convert a date to the day numbering sequentially starting with
00109      * 0001-01-01 as day 1.
00110      *
00111      * @param[in] y the year (must be positive).
00112      * @param[in] m the month, Jan = 1, etc. (must be positive).  Default = 1.
00113      * @param[in] d the day of the month (must be positive).  Default = 1.
00114      * @param[in] check whether to check the date.
00115      * @return the sequential day number.
00116      *
00117      * If \e check is true and the date is invalid an exception is thrown.
00118      **********************************************************************/
00119     static int day(int y, int m, int d, bool check) {
00120       int s = day(y, m, d);
00121       if (!check)
00122         return s;
00123       int y1, m1, d1;
00124       date(s, y1, m1, d1);
00125       if (!(s > 0 && y == y1 && m == m1 && d == d1))
00126         throw GeographicErr("Invalid date " +
00127                             str(y) + "-" + str(m) + "-" + str(d)
00128                             + (s > 0 ? "; use " +
00129                                str(y1) + "-" + str(m1) + "-" + str(d1) :
00130                                " before 0001-01-01"));
00131       return s;
00132     }
00133 
00134     /**
00135      * Given a day (counting from 0001-01-01 as day 1), return the date.
00136      *
00137      * @param[in] s the sequential day number (must be positive)
00138      * @param[out] y the year.
00139      * @param[out] m the month, Jan = 1, etc.
00140      * @param[out] d the day of the month.
00141      **********************************************************************/
00142     static void date(int s, int& y, int& m, int& d) throw() {
00143       int c = 0;
00144       bool greg = gregorian(s);
00145       s += 305;                 // s = 0 on March 1, 1BC
00146       if (greg) {
00147         s -= 2;                 // The 2 day Gregorian offset
00148         // Determine century with the Gregorian rules for leap years.  The
00149         // Gregorian year is 365 + 1/4 - 1/100 + 1/400 = 146097/400 days.
00150         c = (4 * s + 3) / 146097;
00151         s -= (c * 146097) / 4;  // s = 0 at beginning of century
00152       }
00153       y = (4 * s + 3) / 1461;   // Determine the year using Julian rules.
00154       s -= (1461 * y) / 4;      // s = 0 at start of year, i.e., March 1
00155       y += c * 100;             // Assemble full year
00156       m = (5 * s + 2) / 153;    // Determine the month
00157       s -= (153 * m + 2) / 5;   // s = 0 at beginning of month
00158       d = s + 1;                // Determine day of month
00159       y += (m + 2) / 12;        // Move Jan and Feb back to original year
00160       m = (m + 2) % 12 + 1;     // Renumber the months so January = 1
00161     }
00162 
00163     /**
00164      * Given a date as a string in the format yyyy, yyyy-mm, or yyyy-mm-dd,
00165      * return the numeric values for the year, month, and day.  No checking is
00166      * done on these values.
00167      *
00168      * @param[in] s the date in string format.
00169      * @param[out] y the year.
00170      * @param[out] m the month, Jan = 1, etc.
00171      * @param[out] d the day of the month.
00172      **********************************************************************/
00173     static void date(const std::string& s, int& y, int& m, int& d) {
00174       int y1, m1 = 1, d1 = 1;
00175       const char* digits = "0123456789";
00176       std::string::size_type p1 = s.find_first_not_of(digits);
00177       if (p1 == std::string::npos)
00178         y1 = num<int>(s);
00179       else if (s[p1] != '-')
00180         throw GeographicErr("Delimiter not hyphen in date " + s);
00181       else if (p1 == 0)
00182         throw GeographicErr("Empty year field in date " + s);
00183       else {
00184         y1 = num<int>(s.substr(0, p1));
00185         if (++p1 == s.size())
00186           throw GeographicErr("Empty month field in date " + s);
00187         std::string::size_type p2 = s.find_first_not_of(digits, p1);
00188         if (p2 == std::string::npos)
00189           m1 = num<int>(s.substr(p1));
00190         else if (s[p2] != '-')
00191           throw GeographicErr("Delimiter not hyphen in date " + s);
00192         else if (p2 == p1)
00193           throw GeographicErr("Empty month field in date " + s);
00194         else {
00195           m1 = num<int>(s.substr(p1, p2 - p1));
00196           if (++p2 == s.size())
00197             throw GeographicErr("Empty day field in date " + s);
00198           d1 = num<int>(s.substr(p2));
00199         }
00200       }
00201       y = y1; m = m1; d = d1;
00202     }
00203 
00204     /**
00205      * Given the date, return the day of the week.
00206      *
00207      * @param[in] y the year (must be positive).
00208      * @param[in] m the month, Jan = 1, etc. (must be positive).
00209      * @param[in] d the day of the month (must be positive).
00210      * @return the day of the week with Sunday, Monday - Saturday = 0, 1 - 6.
00211      **********************************************************************/
00212     static int dow(int y, int m, int d) throw() { return dow(day(y, m, d)); }
00213 
00214     /**
00215      * Given the sequential day, return the day of the week.
00216      *
00217      * @param[in] s the sequential day (must be positive).
00218      * @return the day of the week with Sunday, Monday - Saturday = 0, 1 - 6.
00219      **********************************************************************/
00220     static int dow(int s) throw() {
00221       return (s + 5) % 7;  // The 5 offset makes day 1 (0001-01-01) a Saturday.
00222     }
00223 
00224     /**
00225      * Convert a string representing a date to a fractional year.
00226      *
00227      * @tparam T the type of the argument.
00228      * @param[in] s the string to be converted.
00229      * @return the fractional year.
00230      *
00231      * The string is first read as an ordinary number (e.g., 2010 or 2012.5);
00232      * if this is successful, the value is returned.  Otherwise the string
00233      * should be of the form yyyy-mm or yyyy-mm-dd and this is converted to a
00234      * number with 2010-01-01 giving 2010.0 and 2012-07-03 giving 2012.5.
00235      **********************************************************************/
00236     template<typename T> static T fractionalyear(const std::string& s) {
00237       try {
00238         return num<T>(s);
00239       }
00240       catch (const std::exception&) {
00241       }
00242       int y, m, d;
00243       date(s, y, m, d);
00244       int t = day(y, m, d, true);
00245       return T(y) + T(t - day(y)) / T(day(y + 1) - day(y));
00246     }
00247 
00248     /**
00249      * Convert a object of type T to a string.
00250      *
00251      * @tparam T the type of the argument.
00252      * @param[in] x the value to be converted.
00253      * @param[in] p the precision used (default -1).
00254      * @return the string representation.
00255      *
00256      * If \e p >= 0, then the number fixed format is used with p bits of
00257      * precision.  With p < 0, there is no manipulation of the format.
00258      **********************************************************************/
00259     template<typename T> static std::string str(T x, int p = -1) {
00260       if (!std::numeric_limits<T>::is_integer && !Math::isfinite<T>(x))
00261         return x < 0 ? std::string("-inf") :
00262           (x > 0 ? std::string("inf") : std::string("nan"));
00263       std::ostringstream s;
00264       if (p >= 0) s << std::fixed << std::setprecision(p);
00265       s << x; return s.str();
00266     }
00267 
00268     /**
00269      * Convert a string to an object of type T.
00270      *
00271      * @tparam T the type of the return value.
00272      * @param[in] s the string to be converted.
00273      * @return object of type T
00274      **********************************************************************/
00275     template<typename T> static T num(const std::string& s) {
00276       T x;
00277       std::string errmsg;
00278       do {                     // Executed once (provides the ability to break)
00279         std::istringstream is(s);
00280         if (!(is >> x)) {
00281           errmsg = "Cannot decode " + s;
00282           break;
00283         }
00284         int pos = int(is.tellg()); // Returns -1 at end of string?
00285         if (!(pos < 0 || pos == int(s.size()))) {
00286           errmsg = "Extra text " + s.substr(pos) + " at end of " + s;
00287           break;
00288         }
00289         return x;
00290       } while (false);
00291       x = std::numeric_limits<T>::is_integer ? 0 : nummatch<T>(s);
00292       if (x == 0)
00293         throw GeographicErr(errmsg);
00294       return x;
00295     }
00296 
00297     /**
00298      * Match "nan" and "inf" (and variants thereof) in a string.
00299      *
00300      * @tparam T the type of the return value.
00301      * @param[in] s the string to be matched.
00302      * @return appropriate special value (+/-inf, nan) or 0 is none is found.
00303      **********************************************************************/
00304     template<typename T> static T nummatch(const std::string& s) {
00305       if (s.length() < 3)
00306         return 0;
00307       std::string t;
00308       t.resize(s.length());
00309       std::transform(s.begin(), s.end(), t.begin(), (int(*)(int))std::toupper);
00310       for (size_t i = s.length(); i--;)
00311         t[i] = std::toupper(s[i]);
00312       int sign = t[0] == '-' ? -1 : 1;
00313       std::string::size_type p0 = t[0] == '-' || t[0] == '+' ? 1 : 0;
00314       std::string::size_type p1 = t.find_last_not_of('0');
00315       if (p1 == std::string::npos || p1 + 1 < p0 + 3)
00316         return 0;
00317       // Strip off sign and trailing 0s
00318       t = t.substr(p0, p1 + 1 - p0);  // Length at least 3
00319       if (t == "NAN" || t == "1.#QNAN" || t == "1.#SNAN" || t == "1.#IND" ||
00320           t == "1.#R")
00321         return Math::NaN<T>();
00322       else if (t == "INF" || t == "1.#INF")
00323         return sign * Math::infinity<T>();
00324       return 0;
00325     }
00326 
00327     /**
00328      * Read a simple fraction, e.g., 3/4, from a string to an object of type T.
00329      *
00330      * @tparam T the type of the return value.
00331      * @param[in] s the string to be converted.
00332      * @return object of type T
00333      **********************************************************************/
00334     template<typename T> static T fract(const std::string& s) {
00335       std::string::size_type delim = s.find('/');
00336       return
00337         !(delim != std::string::npos && delim >= 1 && delim + 2 <= s.size()) ?
00338         num<T>(s) :
00339         // delim in [1, size() - 2]
00340         num<T>(s.substr(0, delim)) / num<T>(s.substr(delim + 1));
00341     }
00342 
00343     /**
00344      * Lookup up a character in a string.
00345      *
00346      * @param[in] s the string to be searched.
00347      * @param[in] c the character to look for.
00348      * @return the index of the first occurrence character in the string or -1
00349      *   is the character is not present.
00350      *
00351      * \e c is converted to upper case before search \e s.  Therefore, it is
00352      * intended that \e s should not contain any lower case letters.
00353      **********************************************************************/
00354     static int lookup(const std::string& s, char c) throw() {
00355       std::string::size_type r = s.find(toupper(c));
00356       return r == std::string::npos ? -1 : int(r);
00357     }
00358 
00359     /**
00360      * Read data of type ExtT from a binary stream to an array of type IntT.
00361      * The data in the file is in (bigendp ? big : little)-endian format.
00362      *
00363      * @tparam ExtT the type of the objects in the binary stream (external).
00364      * @tparam IntT the type of the objects in the array (internal).
00365      * @tparam bigendp true if the external storage format is big-endian.
00366      * @param[in] str the input stream containing the data of type ExtT
00367      *   (external).
00368      * @param[out] array the output array of type IntT (internal).
00369      * @param[in] num the size of the array.
00370      **********************************************************************/
00371     template<typename ExtT, typename IntT, bool bigendp>
00372       static inline void readarray(std::istream& str,
00373                                    IntT array[], size_t num) {
00374       if (sizeof(IntT) == sizeof(ExtT) &&
00375           std::numeric_limits<IntT>::is_integer ==
00376           std::numeric_limits<ExtT>::is_integer) {
00377         // Data is compatible (aside from the issue of endian-ness).
00378         str.read(reinterpret_cast<char *>(array), num * sizeof(ExtT));
00379         if (!str.good())
00380           throw GeographicErr("Failure reading data");
00381         if (bigendp != Math::bigendian) { // endian mismatch -> swap bytes
00382           for (size_t i = num; i--;)
00383             array[i] = Math::swab<IntT>(array[i]);
00384         }
00385       } else {
00386         const int bufsize = 1024; // read this many values at a time
00387         ExtT buffer[bufsize];     // temporary buffer
00388         int k = int(num);         // data values left to read
00389         int i = 0;                // index into output array
00390         while (k) {
00391           int n = (std::min)(k, bufsize);
00392           str.read(reinterpret_cast<char *>(buffer), n * sizeof(ExtT));
00393           if (!str.good())
00394             throw GeographicErr("Failure reading data");
00395           for (int j = 0; j < n; ++j)
00396             // fix endian-ness and cast to IntT
00397             array[i++] = IntT(bigendp == Math::bigendian ? buffer[j] :
00398                               Math::swab<ExtT>(buffer[j]));
00399           k -= n;
00400         }
00401       }
00402       return;
00403     }
00404 
00405     /**
00406      * Read data of type ExtT from a binary stream to a vector array of type
00407      * IntT.  The data in the file is in (bigendp ? big : little)-endian
00408      * format.
00409      *
00410      * @tparam ExtT the type of the objects in the binary stream (external).
00411      * @tparam IntT the type of the objects in the array (internal).
00412      * @tparam bigendp true if the external storage format is big-endian.
00413      * @param[in] str the input stream containing the data of type ExtT
00414      *   (external).
00415      * @param[out] array the output vector of type IntT (internal).
00416      **********************************************************************/
00417     template<typename ExtT, typename IntT, bool bigendp>
00418       static inline void readarray(std::istream& str,
00419                                    std::vector<IntT>& array) {
00420       readarray<ExtT, IntT, bigendp>(str, &array[0], array.size());
00421     }
00422 
00423     /**
00424      * Write data in an array of type IntT as type ExtT to a binary stream.
00425      * The data in the file is in (bigendp ? big : little)-endian format.
00426      *
00427      * @tparam ExtT the type of the objects in the binary stream (external).
00428      * @tparam IntT the type of the objects in the array (internal).
00429      * @tparam bigendp true if the external storage format is big-endian.
00430      * @param[out] str the output stream for the data of type ExtT (external).
00431      * @param[in] array the input array of type IntT (internal).
00432      * @param[in] num the size of the array.
00433      **********************************************************************/
00434     template<typename ExtT, typename IntT, bool bigendp>
00435       static inline void writearray(std::ostream& str,
00436                                    const IntT array[], size_t num) {
00437       if (sizeof(IntT) == sizeof(ExtT) &&
00438           std::numeric_limits<IntT>::is_integer ==
00439           std::numeric_limits<ExtT>::is_integer &&
00440           bigendp == Math::bigendian) {
00441         // Data is compatible (including endian-ness).
00442         str.write(reinterpret_cast<const char *>(array), num * sizeof(ExtT));
00443         if (!str.good())
00444           throw GeographicErr("Failure writing data");
00445       } else {
00446         const int bufsize = 1024; // write this many values at a time
00447         ExtT buffer[bufsize];     // temporary buffer
00448         int k = int(num);         // data values left to write
00449         int i = 0;                // index into output array
00450         while (k) {
00451           int n = (std::min)(k, bufsize);
00452           for (int j = 0; j < n; ++j)
00453             // cast to ExtT and fix endian-ness
00454             buffer[j] = bigendp == Math::bigendian ? ExtT(array[i++]) :
00455               Math::swab<ExtT>(ExtT(array[i++]));
00456           str.write(reinterpret_cast<const char *>(buffer), n * sizeof(ExtT));
00457           if (!str.good())
00458             throw GeographicErr("Failure writing data");
00459           k -= n;
00460         }
00461       }
00462       return;
00463     }
00464 
00465     /**
00466      * Write data in an array of type IntT as type ExtT to a binary stream.
00467      * The data in the file is in (bigendp ? big : little)-endian format.
00468      *
00469      * @tparam ExtT the type of the objects in the binary stream (external).
00470      * @tparam IntT the type of the objects in the array (internal).
00471      * @tparam bigendp true if the external storage format is big-endian.
00472      * @param[out] str the output stream for the data of type ExtT (external).
00473      * @param[in] array the input vector of type IntT (internal).
00474      **********************************************************************/
00475     template<typename ExtT, typename IntT, bool bigendp>
00476       static inline void writearray(std::ostream& str,
00477                                    std::vector<IntT>& array) {
00478       writearray<ExtT, IntT, bigendp>(str, &array[0], array.size());
00479     }
00480 
00481     /**
00482      * Parse a KEY VALUE line.
00483      *
00484      * @param[in] line the input line.
00485      * @param[out] key the key.
00486      * @param[out] val the value.
00487      * @return whether a key was found.
00488      *
00489      * A # character and everything after it are discarded.  If the results is
00490      * just white space, the routine returns false (and \e key and \e val are
00491      * not set).  Otherwise the first token is taken to be the key and the rest
00492      * of the line (trimmed of leading and trailing white space) is the value.
00493      **********************************************************************/
00494     static bool ParseLine(const std::string& line,
00495                           std::string& key, std::string& val);
00496 
00497   };
00498 
00499 } // namespace GeographicLib
00500 
00501 #endif  // GEOGRAPHICLIB_UTILITY_HPP