util: introduce escape() and use it in Component::toUri()

Change-Id: Ie476ca0fc909fb275631c60195fb1aa5640cff10
Refs: #4484
diff --git a/src/name-component.cpp b/src/name-component.cpp
index 136fb64..fb89f65 100644
--- a/src/name-component.cpp
+++ b/src/name-component.cpp
@@ -135,46 +135,16 @@
     printHex(result, value(), value_size(), false);
   }
   else {
-    const uint8_t* value = this->value();
-    size_t valueSize = value_size();
-
-    bool gotNonDot = false;
-    for (size_t i = 0; i < valueSize; ++i) {
-      if (value[i] != 0x2e) {
-        gotNonDot = true;
-        break;
-      }
-    }
-    if (!gotNonDot) {
+    bool hasNonDot = std::any_of(value_begin(), value_end(),
+                                 [] (uint8_t x) { return x != '.'; });
+    if (!hasNonDot) {
       // Special case for component of zero or more periods.  Add 3 periods.
       result << "...";
-      for (size_t i = 0; i < valueSize; ++i)
+      for (size_t i = 0; i < value_size(); ++i)
         result << '.';
     }
     else {
-      // In case we need to escape, set to upper case hex and save the previous flags.
-      auto savedFlags = result.flags(std::ios::hex | std::ios::uppercase);
-
-      for (size_t i = 0; i < valueSize; ++i) {
-        uint8_t x = value[i];
-        // Unreserved characters are not escaped.
-        if ((x >= '0' && x <= '9') ||
-            (x >= 'A' && x <= 'Z') ||
-            (x >= 'a' && x <= 'z') ||
-            x == '-' || x == '.' ||
-            x == '_' || x == '~') {
-          result << x;
-        }
-        else {
-          result << '%';
-          if (x < 16)
-            result << '0';
-          result << static_cast<int>(x);
-        }
-      }
-
-      // Restore.
-      result.flags(savedFlags);
+      escape(result, reinterpret_cast<const char*>(value()), value_size());
     }
   }
 }
diff --git a/src/util/string-helper.cpp b/src/util/string-helper.cpp
index 8be94d0..642b238 100644
--- a/src/util/string-helper.cpp
+++ b/src/util/string-helper.cpp
@@ -1,6 +1,6 @@
 /* -*- Mode:C++; c-file-style:"gnu"; indent-tabs-mode:nil; -*- */
 /*
- * Copyright (c) 2013-2017 Regents of the University of California.
+ * Copyright (c) 2013-2018 Regents of the University of California.
  *
  * This file is part of ndn-cxx library (NDN C++ library with eXperimental eXtensions).
  *
@@ -77,19 +77,6 @@
   return toHex(buffer.data(), buffer.size(), wantUpperCase);
 }
 
-int
-fromHexChar(char c)
-{
-  if (c >= '0' && c <= '9')
-    return c - '0';
-  else if (c >= 'A' && c <= 'F')
-    return c - 'A' + 0xA;
-  else if (c >= 'a' && c <= 'f')
-    return c - 'a' + 0xA;
-  else
-    return -1;
-}
-
 shared_ptr<Buffer>
 fromHex(const std::string& hexString)
 {
@@ -107,31 +94,64 @@
 }
 
 std::string
+escape(const std::string& str)
+{
+  std::ostringstream os;
+  escape(os, str.data(), str.size());
+  return os.str();
+}
+
+void
+escape(std::ostream& os, const char* str, size_t len)
+{
+  for (size_t i = 0; i < len; ++i) {
+    auto c = str[i];
+    // Unreserved characters don't need to be escaped.
+    if ((c >= 'a' && c <= 'z') ||
+        (c >= 'A' && c <= 'Z') ||
+        (c >= '0' && c <= '9') ||
+        c == '-' || c == '.' ||
+        c == '_' || c == '~') {
+      os << c;
+    }
+    else {
+      os << '%';
+      os << toHexChar((c & 0xf0) >> 4);
+      os << toHexChar(c & 0xf);
+    }
+  }
+}
+
+std::string
 unescape(const std::string& str)
 {
-  std::ostringstream result;
+  std::ostringstream os;
+  unescape(os, str.data(), str.size());
+  return os.str();
+}
 
-  for (size_t i = 0; i < str.size(); ++i) {
-    if (str[i] == '%' && i + 2 < str.size()) {
+void
+unescape(std::ostream& os, const char* str, size_t len)
+{
+  for (size_t i = 0; i < len; ++i) {
+    if (str[i] == '%' && i + 2 < len) {
       int hi = fromHexChar(str[i + 1]);
       int lo = fromHexChar(str[i + 2]);
 
       if (hi < 0 || lo < 0)
         // Invalid hex characters, so just keep the escaped string.
-        result << str[i] << str[i + 1] << str[i + 2];
+        os << str[i] << str[i + 1] << str[i + 2];
       else
-        result << static_cast<char>((hi << 4) | lo);
+        os << static_cast<char>((hi << 4) | lo);
 
       // Skip ahead past the escaped value.
       i += 2;
     }
     else {
       // Just copy through.
-      result << str[i];
+      os << str[i];
     }
   }
-
-  return result.str();
 }
 
 } // namespace ndn
diff --git a/src/util/string-helper.hpp b/src/util/string-helper.hpp
index 403c25e..a34fdb7 100644
--- a/src/util/string-helper.hpp
+++ b/src/util/string-helper.hpp
@@ -1,6 +1,6 @@
 /* -*- Mode:C++; c-file-style:"gnu"; indent-tabs-mode:nil; -*- */
-/**
- * Copyright (c) 2013-2017 Regents of the University of California.
+/*
+ * Copyright (c) 2013-2018 Regents of the University of California.
  *
  * This file is part of ndn-cxx library (NDN C++ library with eXperimental eXtensions).
  *
@@ -142,12 +142,6 @@
 toHex(const Buffer& buffer, bool wantUpperCase = true);
 
 /**
- * @brief Convert the hex character to an integer from 0 to 15, or -1 if not a hex character
- */
-int
-fromHexChar(char c);
-
-/**
  * @brief Convert the hex string to buffer
  * @param hexString sequence of pairs of hex numbers (lower and upper case can be mixed)
  *        without any whitespace separators (e.g., "48656C6C6F2C20776F726C6421")
@@ -157,6 +151,51 @@
 fromHex(const std::string& hexString);
 
 /**
+ * @brief Convert (the least significant nibble of) @p n to the corresponding hex character
+ */
+constexpr char
+toHexChar(unsigned int n, bool wantUpperCase = true) noexcept
+{
+  return wantUpperCase ?
+         "0123456789ABCDEF"[n & 0xf] :
+         "0123456789abcdef"[n & 0xf];
+}
+
+/**
+ * @brief Convert the hex character @p c to an integer in [0, 15], or -1 if it's not a hex character
+ */
+constexpr int
+fromHexChar(char c) noexcept
+{
+  return (c >= '0' && c <= '9') ? int(c - '0') :
+         (c >= 'A' && c <= 'F') ? int(c - 'A' + 10) :
+         (c >= 'a' && c <= 'f') ? int(c - 'a' + 10) :
+         -1;
+}
+
+/**
+ * @brief Percent-encode a string
+ * @see RFC 3986 section 2
+ *
+ * This function will encode all characters that are not one of the following:
+ * ALPHA ("a" to "z" and "A" to "Z") / DIGIT (0 to 9) / "-" / "." / "_" / "~"
+ *
+ * The hex encoding uses the numbers 0-9 and the uppercase letters A-F.
+ *
+ * Examples:
+ *
+ * @code
+ * escape("hello world") == "hello%20world"
+ * escape("100%") == "100%25"
+ * @endcode
+ */
+std::string
+escape(const std::string& str);
+
+void
+escape(std::ostream& os, const char* str, size_t len);
+
+/**
  * @brief Decode a percent-encoded string
  * @see RFC 3986 section 2
  *
@@ -172,6 +211,9 @@
 std::string
 unescape(const std::string& str);
 
+void
+unescape(std::ostream& os, const char* str, size_t len);
+
 } // namespace ndn
 
 #endif // NDN_UTIL_STRING_HELPER_HPP
diff --git a/tests/unit-tests/util/indented-stream.t.cpp b/tests/unit-tests/util/indented-stream.t.cpp
index bc033ff..e450c4a 100644
--- a/tests/unit-tests/util/indented-stream.t.cpp
+++ b/tests/unit-tests/util/indented-stream.t.cpp
@@ -1,6 +1,6 @@
 /* -*- Mode:C++; c-file-style:"gnu"; indent-tabs-mode:nil; -*- */
-/**
- * Copyright (c) 2013-2016 Regents of the University of California.
+/*
+ * Copyright (c) 2013-2018 Regents of the University of California.
  *
  * This file is part of ndn-cxx library (NDN C++ library with eXperimental eXtensions).
  *
@@ -22,7 +22,6 @@
 #include "util/indented-stream.hpp"
 
 #include "boost-test.hpp"
-#include <boost/test/output_test_stream.hpp>
 
 namespace ndn {
 namespace util {
diff --git a/tests/unit-tests/util/logging.t.cpp b/tests/unit-tests/util/logging.t.cpp
index cced73b..693f844 100644
--- a/tests/unit-tests/util/logging.t.cpp
+++ b/tests/unit-tests/util/logging.t.cpp
@@ -1,6 +1,6 @@
 /* -*- Mode:C++; c-file-style:"gnu"; indent-tabs-mode:nil; -*- */
 /*
- * Copyright (c) 2013-2017 Regents of the University of California.
+ * Copyright (c) 2013-2018 Regents of the University of California.
  *
  * This file is part of ndn-cxx library (NDN C++ library with eXperimental eXtensions).
  *
@@ -24,7 +24,6 @@
 #include "../unit-test-time-fixture.hpp"
 
 #include "boost-test.hpp"
-#include <boost/test/output_test_stream.hpp>
 
 namespace ndn {
 namespace util {
diff --git a/tests/unit-tests/util/string-helper.t.cpp b/tests/unit-tests/util/string-helper.t.cpp
index 64144f8..0456a35 100644
--- a/tests/unit-tests/util/string-helper.t.cpp
+++ b/tests/unit-tests/util/string-helper.t.cpp
@@ -1,6 +1,6 @@
 /* -*- Mode:C++; c-file-style:"gnu"; indent-tabs-mode:nil; -*- */
-/**
- * Copyright (c) 2013-2017 Regents of the University of California.
+/*
+ * Copyright (c) 2013-2018 Regents of the University of California.
  *
  * This file is part of ndn-cxx library (NDN C++ library with eXperimental eXtensions).
  *
@@ -24,16 +24,21 @@
 
 #include "boost-test.hpp"
 
+#include <cctype>
+#include <cstring>
+
 namespace ndn {
 namespace util {
 namespace test {
 
+using boost::test_tools::output_test_stream;
+
 BOOST_AUTO_TEST_SUITE(Util)
 BOOST_AUTO_TEST_SUITE(TestStringHelper)
 
 BOOST_AUTO_TEST_CASE(PrintHex)
 {
-  boost::test_tools::output_test_stream os;
+  output_test_stream os;
 
   printHex(os, 0);
   BOOST_CHECK(os.is_equal("0x0"));
@@ -57,7 +62,7 @@
 BOOST_AUTO_TEST_CASE(AsHex)
 {
   using ndn::AsHex;
-  boost::test_tools::output_test_stream os;
+  output_test_stream os;
 
   os << AsHex{0};
   BOOST_CHECK(os.is_equal("0x0"));
@@ -86,6 +91,38 @@
   BOOST_CHECK_EQUAL(toHex(Buffer{}), "");
 }
 
+BOOST_AUTO_TEST_CASE(FromHex)
+{
+  BOOST_CHECK(*fromHex("") == Buffer{});
+  BOOST_CHECK(*fromHex("48656c6c6f2c20776f726c6421") ==
+              (std::vector<uint8_t>{0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x2c, 0x20,
+                                    0x77, 0x6f, 0x72, 0x6c, 0x64, 0x21}));
+  BOOST_CHECK(*fromHex("012a3Bc4defAB5CdEF") ==
+              (std::vector<uint8_t>{0x01, 0x2a, 0x3b, 0xc4, 0xde,
+                                    0xfa, 0xb5, 0xcd, 0xef}));
+
+  BOOST_CHECK_THROW(fromHex("1"), StringHelperError);
+  BOOST_CHECK_THROW(fromHex("zz"), StringHelperError);
+  BOOST_CHECK_THROW(fromHex("00az"), StringHelperError);
+  BOOST_CHECK_THROW(fromHex("1234z"), StringHelperError);
+}
+
+BOOST_AUTO_TEST_CASE(ToHexChar)
+{
+  static const std::vector<std::pair<unsigned int, char>> hexMap{
+    {0, '0'}, {1, '1'},  {2, '2'},  {3, '3'},  {4, '4'},  {5, '5'},  {6, '6'},  {7, '7'},
+    {8, '8'}, {9, '9'}, {10, 'A'}, {11, 'B'}, {12, 'C'}, {13, 'D'}, {14, 'E'}, {15, 'F'}
+  };
+
+  for (const auto& i : hexMap) {
+    BOOST_CHECK_EQUAL(toHexChar(i.first), i.second);
+    BOOST_CHECK_EQUAL(toHexChar(i.first + 16), i.second);
+    BOOST_CHECK_EQUAL(toHexChar(i.first + 32), i.second);
+    BOOST_CHECK_EQUAL(toHexChar(i.first + 240), i.second);
+    BOOST_CHECK_EQUAL(toHexChar(i.first, false), std::tolower(static_cast<unsigned char>(i.second)));
+  }
+}
+
 BOOST_AUTO_TEST_CASE(FromHexChar)
 {
   // for (int ch = 0; ch <= std::numeric_limits<uint8_t>::max(); ++ch) {
@@ -134,37 +171,33 @@
   }
 }
 
-BOOST_AUTO_TEST_CASE(FromHex)
+BOOST_AUTO_TEST_CASE(Escape)
 {
-  BOOST_CHECK(*fromHex("") == Buffer{});
-  BOOST_CHECK(*fromHex("48656c6c6f2c20776f726c6421") ==
-              (std::vector<uint8_t>{0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x2c, 0x20,
-                                    0x77, 0x6f, 0x72, 0x6c, 0x64, 0x21}));
-  BOOST_CHECK(*fromHex("012a3Bc4defAB5CdEF") ==
-              (std::vector<uint8_t>{0x01, 0x2a, 0x3b, 0xc4, 0xde,
-                                    0xfa, 0xb5, 0xcd, 0xef}));
+  BOOST_CHECK_EQUAL(escape(""), "");
+  BOOST_CHECK_EQUAL(escape("foo42"), "foo42");
+  BOOST_CHECK_EQUAL(escape("foo%bar"), "foo%25bar");
+  BOOST_CHECK_EQUAL(escape("lower UPPER"), "lower%20UPPER");
+  BOOST_CHECK_EQUAL(escape("-._~"), "-._~");
+  BOOST_CHECK_EQUAL(escape(":/?#[]@"), "%3A%2F%3F%23%5B%5D%40");
 
-  BOOST_CHECK_THROW(fromHex("1"), StringHelperError);
-  BOOST_CHECK_THROW(fromHex("zz"), StringHelperError);
-  BOOST_CHECK_THROW(fromHex("00az"), StringHelperError);
-  BOOST_CHECK_THROW(fromHex("1234z"), StringHelperError);
+  output_test_stream os;
+  const char str[] = "\x01\x2a\x3b\xc4\xde\xfa\xb5\xcd\xef";
+  escape(os, str, std::strlen(str));
+  BOOST_CHECK(os.is_equal("%01%2A%3B%C4%DE%FA%B5%CD%EF"));
 }
 
 BOOST_AUTO_TEST_CASE(Unescape)
 {
-  std::string test1 = "Hello%01, world!%AA  ";
-  std::string test2 = "Invalid escape %ZZ (not a hex value)";
-  std::string test3 = "Invalid escape %a (should be two hex symbols)";
-  std::string test4 = "Invalid escape %a";
+  BOOST_CHECK_EQUAL(unescape(""), "");
+  BOOST_CHECK_EQUAL(unescape("Hello%01, world!%AA  "), "Hello\x01, world!\xAA  ");
+  BOOST_CHECK_EQUAL(unescape("Bad %ZZ (not a hex value)"), "Bad %ZZ (not a hex value)");
+  BOOST_CHECK_EQUAL(unescape("Bad %a (should be two hex chars)"), "Bad %a (should be two hex chars)");
+  BOOST_CHECK_EQUAL(unescape("Bad %a"), "Bad %a");
 
-  BOOST_CHECK_EQUAL(unescape(test1), "Hello\x01, world!\xAA  ");
-
-  BOOST_CHECK_EQUAL(unescape(test2), "Invalid escape %ZZ (not a hex value)");
-  BOOST_CHECK_EQUAL(unescape(test3), "Invalid escape %a (should be two hex symbols)");
-  BOOST_CHECK_EQUAL(unescape(test4), "Invalid escape %a");
-
-  BOOST_CHECK_EQUAL(unescape("%01%2a%3B%c4%de%fA%B5%Cd%EF"),
-                    "\x01\x2a\x3b\xc4\xde\xfa\xb5\xcd\xef");
+  output_test_stream os;
+  const char str[] = "%01%2a%3B%c4%de%fA%B5%Cd%EF";
+  unescape(os, str, std::strlen(str));
+  BOOST_CHECK(os.is_equal("\x01\x2a\x3b\xc4\xde\xfa\xb5\xcd\xef"));
 }
 
 BOOST_AUTO_TEST_SUITE_END() // TestStringHelper