util: introduce escape() and use it in Component::toUri()

Change-Id: Ie476ca0fc909fb275631c60195fb1aa5640cff10
Refs: #4484
diff --git a/src/name-component.cpp b/src/name-component.cpp
index 136fb64..fb89f65 100644
--- a/src/name-component.cpp
+++ b/src/name-component.cpp
@@ -135,46 +135,16 @@
     printHex(result, value(), value_size(), false);
   }
   else {
-    const uint8_t* value = this->value();
-    size_t valueSize = value_size();
-
-    bool gotNonDot = false;
-    for (size_t i = 0; i < valueSize; ++i) {
-      if (value[i] != 0x2e) {
-        gotNonDot = true;
-        break;
-      }
-    }
-    if (!gotNonDot) {
+    bool hasNonDot = std::any_of(value_begin(), value_end(),
+                                 [] (uint8_t x) { return x != '.'; });
+    if (!hasNonDot) {
       // Special case for component of zero or more periods.  Add 3 periods.
       result << "...";
-      for (size_t i = 0; i < valueSize; ++i)
+      for (size_t i = 0; i < value_size(); ++i)
         result << '.';
     }
     else {
-      // In case we need to escape, set to upper case hex and save the previous flags.
-      auto savedFlags = result.flags(std::ios::hex | std::ios::uppercase);
-
-      for (size_t i = 0; i < valueSize; ++i) {
-        uint8_t x = value[i];
-        // Unreserved characters are not escaped.
-        if ((x >= '0' && x <= '9') ||
-            (x >= 'A' && x <= 'Z') ||
-            (x >= 'a' && x <= 'z') ||
-            x == '-' || x == '.' ||
-            x == '_' || x == '~') {
-          result << x;
-        }
-        else {
-          result << '%';
-          if (x < 16)
-            result << '0';
-          result << static_cast<int>(x);
-        }
-      }
-
-      // Restore.
-      result.flags(savedFlags);
+      escape(result, reinterpret_cast<const char*>(value()), value_size());
     }
   }
 }
diff --git a/src/util/string-helper.cpp b/src/util/string-helper.cpp
index 8be94d0..642b238 100644
--- a/src/util/string-helper.cpp
+++ b/src/util/string-helper.cpp
@@ -1,6 +1,6 @@
 /* -*- Mode:C++; c-file-style:"gnu"; indent-tabs-mode:nil; -*- */
 /*
- * Copyright (c) 2013-2017 Regents of the University of California.
+ * Copyright (c) 2013-2018 Regents of the University of California.
  *
  * This file is part of ndn-cxx library (NDN C++ library with eXperimental eXtensions).
  *
@@ -77,19 +77,6 @@
   return toHex(buffer.data(), buffer.size(), wantUpperCase);
 }
 
-int
-fromHexChar(char c)
-{
-  if (c >= '0' && c <= '9')
-    return c - '0';
-  else if (c >= 'A' && c <= 'F')
-    return c - 'A' + 0xA;
-  else if (c >= 'a' && c <= 'f')
-    return c - 'a' + 0xA;
-  else
-    return -1;
-}
-
 shared_ptr<Buffer>
 fromHex(const std::string& hexString)
 {
@@ -107,31 +94,64 @@
 }
 
 std::string
+escape(const std::string& str)
+{
+  std::ostringstream os;
+  escape(os, str.data(), str.size());
+  return os.str();
+}
+
+void
+escape(std::ostream& os, const char* str, size_t len)
+{
+  for (size_t i = 0; i < len; ++i) {
+    auto c = str[i];
+    // Unreserved characters don't need to be escaped.
+    if ((c >= 'a' && c <= 'z') ||
+        (c >= 'A' && c <= 'Z') ||
+        (c >= '0' && c <= '9') ||
+        c == '-' || c == '.' ||
+        c == '_' || c == '~') {
+      os << c;
+    }
+    else {
+      os << '%';
+      os << toHexChar((c & 0xf0) >> 4);
+      os << toHexChar(c & 0xf);
+    }
+  }
+}
+
+std::string
 unescape(const std::string& str)
 {
-  std::ostringstream result;
+  std::ostringstream os;
+  unescape(os, str.data(), str.size());
+  return os.str();
+}
 
-  for (size_t i = 0; i < str.size(); ++i) {
-    if (str[i] == '%' && i + 2 < str.size()) {
+void
+unescape(std::ostream& os, const char* str, size_t len)
+{
+  for (size_t i = 0; i < len; ++i) {
+    if (str[i] == '%' && i + 2 < len) {
       int hi = fromHexChar(str[i + 1]);
       int lo = fromHexChar(str[i + 2]);
 
       if (hi < 0 || lo < 0)
         // Invalid hex characters, so just keep the escaped string.
-        result << str[i] << str[i + 1] << str[i + 2];
+        os << str[i] << str[i + 1] << str[i + 2];
       else
-        result << static_cast<char>((hi << 4) | lo);
+        os << static_cast<char>((hi << 4) | lo);
 
       // Skip ahead past the escaped value.
       i += 2;
     }
     else {
       // Just copy through.
-      result << str[i];
+      os << str[i];
     }
   }
-
-  return result.str();
 }
 
 } // namespace ndn
diff --git a/src/util/string-helper.hpp b/src/util/string-helper.hpp
index 403c25e..a34fdb7 100644
--- a/src/util/string-helper.hpp
+++ b/src/util/string-helper.hpp
@@ -1,6 +1,6 @@
 /* -*- Mode:C++; c-file-style:"gnu"; indent-tabs-mode:nil; -*- */
-/**
- * Copyright (c) 2013-2017 Regents of the University of California.
+/*
+ * Copyright (c) 2013-2018 Regents of the University of California.
  *
  * This file is part of ndn-cxx library (NDN C++ library with eXperimental eXtensions).
  *
@@ -142,12 +142,6 @@
 toHex(const Buffer& buffer, bool wantUpperCase = true);
 
 /**
- * @brief Convert the hex character to an integer from 0 to 15, or -1 if not a hex character
- */
-int
-fromHexChar(char c);
-
-/**
  * @brief Convert the hex string to buffer
  * @param hexString sequence of pairs of hex numbers (lower and upper case can be mixed)
  *        without any whitespace separators (e.g., "48656C6C6F2C20776F726C6421")
@@ -157,6 +151,51 @@
 fromHex(const std::string& hexString);
 
 /**
+ * @brief Convert (the least significant nibble of) @p n to the corresponding hex character
+ */
+constexpr char
+toHexChar(unsigned int n, bool wantUpperCase = true) noexcept
+{
+  return wantUpperCase ?
+         "0123456789ABCDEF"[n & 0xf] :
+         "0123456789abcdef"[n & 0xf];
+}
+
+/**
+ * @brief Convert the hex character @p c to an integer in [0, 15], or -1 if it's not a hex character
+ */
+constexpr int
+fromHexChar(char c) noexcept
+{
+  return (c >= '0' && c <= '9') ? int(c - '0') :
+         (c >= 'A' && c <= 'F') ? int(c - 'A' + 10) :
+         (c >= 'a' && c <= 'f') ? int(c - 'a' + 10) :
+         -1;
+}
+
+/**
+ * @brief Percent-encode a string
+ * @see RFC 3986 section 2
+ *
+ * This function will encode all characters that are not one of the following:
+ * ALPHA ("a" to "z" and "A" to "Z") / DIGIT (0 to 9) / "-" / "." / "_" / "~"
+ *
+ * The hex encoding uses the numbers 0-9 and the uppercase letters A-F.
+ *
+ * Examples:
+ *
+ * @code
+ * escape("hello world") == "hello%20world"
+ * escape("100%") == "100%25"
+ * @endcode
+ */
+std::string
+escape(const std::string& str);
+
+void
+escape(std::ostream& os, const char* str, size_t len);
+
+/**
  * @brief Decode a percent-encoded string
  * @see RFC 3986 section 2
  *
@@ -172,6 +211,9 @@
 std::string
 unescape(const std::string& str);
 
+void
+unescape(std::ostream& os, const char* str, size_t len);
+
 } // namespace ndn
 
 #endif // NDN_UTIL_STRING_HELPER_HPP