Support INF and NAN in locale-independent dmlc::strtof

dmlc · Mar 3, 2019 · 8afbca6 · 8afbca6
1 parent ea0f2c3
commit 8afbca6
Show file tree

Hide file tree

Showing 2 changed files with 87 additions and 8 deletions.
diff --git a/include/dmlc/strtonum.h b/include/dmlc/strtonum.h
@@ -47,6 +47,19 @@ inline bool isdigit(char c) {
   return (c >= '0' && c <= '9');
 }
 
+/*!
+ * \brief Inline implementation of isalpha(). Tests whether the given character
+ *        is an alphabet letter
+ * \param c Character to test
+ * \return Result of the test
+ */
+inline bool isalpha(char c) {
+  static_assert(
+    static_cast<int>('A') == 65 && static_cast<int>('Z' - 'A') == 25,
+    "Only system with ASCII character set is supported");
+  return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
+}
+
 /*!
  * \brief Tests whether the given character is a valid letter in the string
  *        representation of a floating-point value, i.e. decimal digits,
@@ -69,7 +82,7 @@ const int kStrtofMaxDigits = 19;
 
 /*!
  * \brief Common implementation for dmlc::strtof() and dmlc::strtod()
- * TODO: the current version does not support INF, NAN, and hex number
+ * TODO: the current version does not support hex number
  * \param nptr Beginning of the string that's to be converted into a
  *             floating-point number
  * \param endptr After the conversion, this pointer will be set to point one
@@ -123,6 +136,43 @@ inline FloatType ParseFloat(const char* nptr, char** endptr) {
     ++p;
   }
 
+  // Handle INF and NAN
+  {
+    int i = 0;
+    // case-insensitive match for INF and INFINITY
+    while (i < 8 && static_cast<char>((*p) | 32) == "infinity"[i]) {
+      ++i; ++p;
+    }
+    if (i == 3 || i == 8) {
+      if (endptr) *endptr = (char*)p;  // NOLINT(*)
+      return sign ?  std::numeric_limits<FloatType>::infinity()
+                  : -std::numeric_limits<FloatType>::infinity();
+    } else {
+      p -= i;
+    }
+
+    // case-insensitive match for NAN
+    i = 0;
+    while (i < 3 && static_cast<char>((*p) | 32) == "nan"[i]) {
+      ++i; ++p;
+    }
+    if (i == 3) {
+      // Got NAN; check if the value is of form NAN(char_sequence)
+      if (*p == '(') {
+        ++p;
+        while (isdigit(*p) || isalpha(*p) || *p == '_') ++p;
+        CHECK_EQ(*p, ')') << "Invalid NAN literal";
+        ++p;
+      }
+      static_assert(std::numeric_limits<FloatType>::has_quiet_NaN,
+        "Only system with quiet NaN is supported");
+      if (endptr) *endptr = (char*)p;  // NOLINT(*)
+      return std::numeric_limits<FloatType>::quiet_NaN();
+    } else {
+      p -= i;
+    }
+  }
+
   // Get digits before decimal point or exponent, if any.
   uint64_t predec;  // to store digits before decimal point
   for (predec = 0; isdigit(*p); ++p) {
@@ -206,7 +256,7 @@ inline FloatType ParseFloat(const char* nptr, char** endptr) {
  * \brief A faster implementation of strtof(). See documentation of
  *        std::strtof() for more information. Note that this function does not
  *        check for overflow. Use strtof_check_range() to check for overflow.
- * TODO: the current version does not support INF, NAN, and hex number
+ * TODO: the current version does not support hex number
  * TODO: the current version does not handle long decimals: you may only have
  *       up to 19 digits after the decimal point, and you cannot have too many
  *       digits before the decimal point either.
@@ -224,7 +274,7 @@ inline float strtof(const char* nptr, char** endptr) {
  *        std::strtof() for more information. This function will check for
  *        overflow. If the converted value is outside the range for the float
  *        type, errno is set to ERANGE and HUGE_VALF is returned.
- * TODO: the current version does not support INF, NAN, and hex number
+ * TODO: the current version does not support hex number
  * TODO: the current version does not handle long decimals: you may only have
  *       up to 19 digits after the decimal point, and you cannot have too many
  *       digits before the decimal point either.
@@ -241,7 +291,7 @@ inline float strtof_check_range(const char* nptr, char** endptr) {
  * \brief A faster implementation of strtod(). See documentation of
  *        std::strtof() for more information. Note that this function does not
  *        check for overflow. Use strtod_check_range() to check for overflow.
- * TODO: the current version does not support INF, NAN, and hex number
+ * TODO: the current version does not support hex number
  * TODO: the current version does not handle long decimals: you may only have
  *       up to 19 digits after the decimal point, and you cannot have too many
  *       digits before the decimal point either.
@@ -259,7 +309,7 @@ inline double strtod(const char* nptr, char** endptr) {
  *        std::strtod() for more information. This function will check for
  *        overflow. If the converted value is outside the range for the double
  *        type, errno is set to ERANGE and HUGE_VAL is returned.
- * TODO: the current version does not support INF, NAN, and hex number
+ * TODO: the current version does not support hex number
  * TODO: the current version does not handle long decimals: you may only have
  *       up to 19 digits after the decimal point, and you cannot have too many
  *       digits before the decimal point either.
@@ -388,7 +438,7 @@ inline long atol(const char* p) {  // NOLINT(*)
 /*!
  * \brief A faster implementation of atof(). Unlike std::atof(), this function
  *        returns float type. Note that this function does not check for overflow.
- * TODO: the current version does not support INF, NAN, and hex number
+ * TODO: the current version does not support hex number
  * TODO: the current version does not handle long decimals: you may only have
  *       up to 19 digits after the decimal point, and you cannot have too many
  *       digits before the decimal point either.
@@ -403,7 +453,7 @@ inline float atof(const char* nptr) {
  * \brief A faster implementation of stof(). See documentation of std::stof()
  *        for more information. This function will test for overflow and
  *        invalid arguments.
- * TODO: the current version does not support INF, NAN, and hex number
+ * TODO: the current version does not support hex number
  * TODO: the current version does not handle long decimals: you may only have
  *       up to 19 digits after the decimal point, and you cannot have too many
  *       digits before the decimal point either.
@@ -433,7 +483,7 @@ inline float stof(const std::string& value, size_t* pos = nullptr) {
  * \brief A faster implementation of stod(). See documentation of std::stod()
  *        for more information. This function will test for overflow and
  *        invalid arguments.
- * TODO: the current version does not support INF, NAN, and hex number
+ * TODO: the current version does not support hex number
  * TODO: the current version does not handle long decimals: you may only have
  *       up to 19 digits after the decimal point, and you cannot have too many
  *       digits before the decimal point either.

diff --git a/test/unittest/unittest_param.cc b/test/unittest/unittest_param.cc
@@ -1,5 +1,8 @@
 #include <gtest/gtest.h>
 #include <dmlc/parameter.h>
+#include <vector>
+#include <string>
+#include <cmath>
 
 struct LearningParam : public dmlc::Parameter<LearningParam> {
   float float_param;
@@ -121,4 +124,30 @@ TEST(Parameter, parsing_float) {
   ASSERT_THROW(param.Init(kwargs), dmlc::ParamError);
   kwargs["double_param"] = "1.2e-2 foo";
   ASSERT_THROW(param.Init(kwargs), dmlc::ParamError);
+
+  // INF and NAN
+  kwargs = std::map<std::string, std::string>();
+  errno = 0;  // clear errno, to clear previous range error
+  for (const char* s : {
+      "inf", "+inf", "-inf", "INF", "+INF", "-INF", "infinity", "+infinity",
+      "-infinity", "INFINITY", "+INFINITY", "-INFINITY"}) {
+    kwargs["float_param"] = s;
+    ASSERT_NO_THROW(param.Init(kwargs));
+    ASSERT_TRUE(std::isinf(param.float_param));
+    kwargs["double_param"] = s;
+    ASSERT_NO_THROW(param.Init(kwargs));
+    ASSERT_TRUE(std::isinf(param.double_param));
+  }
+  for (const char* s : {
+      "nan", "NAN", "nan(foobar)", "NAN(FooBar)", "NaN", "NaN(foo_bar_12)",
+      "+nan", "+NAN", "+nan(foobar)", "+NAN(FooBar)", "+NaN", "+NaN(foo_bar_12)",
+      "-nan", "-NAN", "-nan(foobar)", "-NAN(FooBar)", "-NaN",
+      "-NaN(foo_bar_12)"}) {
+    kwargs["float_param"] = s;
+    ASSERT_NO_THROW(param.Init(kwargs));
+    ASSERT_TRUE(std::isnan(param.float_param));
+    kwargs["double_param"] = s;
+    ASSERT_NO_THROW(param.Init(kwargs));
+    ASSERT_TRUE(std::isnan(param.double_param));
+  }
 }