util: introduce escape() and use it in Component::toUri()
Change-Id: Ie476ca0fc909fb275631c60195fb1aa5640cff10
Refs: #4484
diff --git a/src/name-component.cpp b/src/name-component.cpp
index 136fb64..fb89f65 100644
--- a/src/name-component.cpp
+++ b/src/name-component.cpp
@@ -135,46 +135,16 @@
printHex(result, value(), value_size(), false);
}
else {
- const uint8_t* value = this->value();
- size_t valueSize = value_size();
-
- bool gotNonDot = false;
- for (size_t i = 0; i < valueSize; ++i) {
- if (value[i] != 0x2e) {
- gotNonDot = true;
- break;
- }
- }
- if (!gotNonDot) {
+ bool hasNonDot = std::any_of(value_begin(), value_end(),
+ [] (uint8_t x) { return x != '.'; });
+ if (!hasNonDot) {
// Special case for component of zero or more periods. Add 3 periods.
result << "...";
- for (size_t i = 0; i < valueSize; ++i)
+ for (size_t i = 0; i < value_size(); ++i)
result << '.';
}
else {
- // In case we need to escape, set to upper case hex and save the previous flags.
- auto savedFlags = result.flags(std::ios::hex | std::ios::uppercase);
-
- for (size_t i = 0; i < valueSize; ++i) {
- uint8_t x = value[i];
- // Unreserved characters are not escaped.
- if ((x >= '0' && x <= '9') ||
- (x >= 'A' && x <= 'Z') ||
- (x >= 'a' && x <= 'z') ||
- x == '-' || x == '.' ||
- x == '_' || x == '~') {
- result << x;
- }
- else {
- result << '%';
- if (x < 16)
- result << '0';
- result << static_cast<int>(x);
- }
- }
-
- // Restore.
- result.flags(savedFlags);
+ escape(result, reinterpret_cast<const char*>(value()), value_size());
}
}
}
diff --git a/src/util/string-helper.cpp b/src/util/string-helper.cpp
index 8be94d0..642b238 100644
--- a/src/util/string-helper.cpp
+++ b/src/util/string-helper.cpp
@@ -1,6 +1,6 @@
/* -*- Mode:C++; c-file-style:"gnu"; indent-tabs-mode:nil; -*- */
/*
- * Copyright (c) 2013-2017 Regents of the University of California.
+ * Copyright (c) 2013-2018 Regents of the University of California.
*
* This file is part of ndn-cxx library (NDN C++ library with eXperimental eXtensions).
*
@@ -77,19 +77,6 @@
return toHex(buffer.data(), buffer.size(), wantUpperCase);
}
-int
-fromHexChar(char c)
-{
- if (c >= '0' && c <= '9')
- return c - '0';
- else if (c >= 'A' && c <= 'F')
- return c - 'A' + 0xA;
- else if (c >= 'a' && c <= 'f')
- return c - 'a' + 0xA;
- else
- return -1;
-}
-
shared_ptr<Buffer>
fromHex(const std::string& hexString)
{
@@ -107,31 +94,64 @@
}
std::string
+escape(const std::string& str)
+{
+ std::ostringstream os;
+ escape(os, str.data(), str.size());
+ return os.str();
+}
+
+void
+escape(std::ostream& os, const char* str, size_t len)
+{
+ for (size_t i = 0; i < len; ++i) {
+ auto c = str[i];
+ // Unreserved characters don't need to be escaped.
+ if ((c >= 'a' && c <= 'z') ||
+ (c >= 'A' && c <= 'Z') ||
+ (c >= '0' && c <= '9') ||
+ c == '-' || c == '.' ||
+ c == '_' || c == '~') {
+ os << c;
+ }
+ else {
+ os << '%';
+ os << toHexChar((c & 0xf0) >> 4);
+ os << toHexChar(c & 0xf);
+ }
+ }
+}
+
+std::string
unescape(const std::string& str)
{
- std::ostringstream result;
+ std::ostringstream os;
+ unescape(os, str.data(), str.size());
+ return os.str();
+}
- for (size_t i = 0; i < str.size(); ++i) {
- if (str[i] == '%' && i + 2 < str.size()) {
+void
+unescape(std::ostream& os, const char* str, size_t len)
+{
+ for (size_t i = 0; i < len; ++i) {
+ if (str[i] == '%' && i + 2 < len) {
int hi = fromHexChar(str[i + 1]);
int lo = fromHexChar(str[i + 2]);
if (hi < 0 || lo < 0)
// Invalid hex characters, so just keep the escaped string.
- result << str[i] << str[i + 1] << str[i + 2];
+ os << str[i] << str[i + 1] << str[i + 2];
else
- result << static_cast<char>((hi << 4) | lo);
+ os << static_cast<char>((hi << 4) | lo);
// Skip ahead past the escaped value.
i += 2;
}
else {
// Just copy through.
- result << str[i];
+ os << str[i];
}
}
-
- return result.str();
}
} // namespace ndn
diff --git a/src/util/string-helper.hpp b/src/util/string-helper.hpp
index 403c25e..a34fdb7 100644
--- a/src/util/string-helper.hpp
+++ b/src/util/string-helper.hpp
@@ -1,6 +1,6 @@
/* -*- Mode:C++; c-file-style:"gnu"; indent-tabs-mode:nil; -*- */
-/**
- * Copyright (c) 2013-2017 Regents of the University of California.
+/*
+ * Copyright (c) 2013-2018 Regents of the University of California.
*
* This file is part of ndn-cxx library (NDN C++ library with eXperimental eXtensions).
*
@@ -142,12 +142,6 @@
toHex(const Buffer& buffer, bool wantUpperCase = true);
/**
- * @brief Convert the hex character to an integer from 0 to 15, or -1 if not a hex character
- */
-int
-fromHexChar(char c);
-
-/**
* @brief Convert the hex string to buffer
* @param hexString sequence of pairs of hex numbers (lower and upper case can be mixed)
* without any whitespace separators (e.g., "48656C6C6F2C20776F726C6421")
@@ -157,6 +151,51 @@
fromHex(const std::string& hexString);
/**
+ * @brief Convert (the least significant nibble of) @p n to the corresponding hex character
+ */
+constexpr char
+toHexChar(unsigned int n, bool wantUpperCase = true) noexcept
+{
+ return wantUpperCase ?
+ "0123456789ABCDEF"[n & 0xf] :
+ "0123456789abcdef"[n & 0xf];
+}
+
+/**
+ * @brief Convert the hex character @p c to an integer in [0, 15], or -1 if it's not a hex character
+ */
+constexpr int
+fromHexChar(char c) noexcept
+{
+ return (c >= '0' && c <= '9') ? int(c - '0') :
+ (c >= 'A' && c <= 'F') ? int(c - 'A' + 10) :
+ (c >= 'a' && c <= 'f') ? int(c - 'a' + 10) :
+ -1;
+}
+
+/**
+ * @brief Percent-encode a string
+ * @see RFC 3986 section 2
+ *
+ * This function will encode all characters that are not one of the following:
+ * ALPHA ("a" to "z" and "A" to "Z") / DIGIT (0 to 9) / "-" / "." / "_" / "~"
+ *
+ * The hex encoding uses the numbers 0-9 and the uppercase letters A-F.
+ *
+ * Examples:
+ *
+ * @code
+ * escape("hello world") == "hello%20world"
+ * escape("100%") == "100%25"
+ * @endcode
+ */
+std::string
+escape(const std::string& str);
+
+void
+escape(std::ostream& os, const char* str, size_t len);
+
+/**
* @brief Decode a percent-encoded string
* @see RFC 3986 section 2
*
@@ -172,6 +211,9 @@
std::string
unescape(const std::string& str);
+void
+unescape(std::ostream& os, const char* str, size_t len);
+
} // namespace ndn
#endif // NDN_UTIL_STRING_HELPER_HPP
diff --git a/tests/unit-tests/util/indented-stream.t.cpp b/tests/unit-tests/util/indented-stream.t.cpp
index bc033ff..e450c4a 100644
--- a/tests/unit-tests/util/indented-stream.t.cpp
+++ b/tests/unit-tests/util/indented-stream.t.cpp
@@ -1,6 +1,6 @@
/* -*- Mode:C++; c-file-style:"gnu"; indent-tabs-mode:nil; -*- */
-/**
- * Copyright (c) 2013-2016 Regents of the University of California.
+/*
+ * Copyright (c) 2013-2018 Regents of the University of California.
*
* This file is part of ndn-cxx library (NDN C++ library with eXperimental eXtensions).
*
@@ -22,7 +22,6 @@
#include "util/indented-stream.hpp"
#include "boost-test.hpp"
-#include <boost/test/output_test_stream.hpp>
namespace ndn {
namespace util {
diff --git a/tests/unit-tests/util/logging.t.cpp b/tests/unit-tests/util/logging.t.cpp
index cced73b..693f844 100644
--- a/tests/unit-tests/util/logging.t.cpp
+++ b/tests/unit-tests/util/logging.t.cpp
@@ -1,6 +1,6 @@
/* -*- Mode:C++; c-file-style:"gnu"; indent-tabs-mode:nil; -*- */
/*
- * Copyright (c) 2013-2017 Regents of the University of California.
+ * Copyright (c) 2013-2018 Regents of the University of California.
*
* This file is part of ndn-cxx library (NDN C++ library with eXperimental eXtensions).
*
@@ -24,7 +24,6 @@
#include "../unit-test-time-fixture.hpp"
#include "boost-test.hpp"
-#include <boost/test/output_test_stream.hpp>
namespace ndn {
namespace util {
diff --git a/tests/unit-tests/util/string-helper.t.cpp b/tests/unit-tests/util/string-helper.t.cpp
index 64144f8..0456a35 100644
--- a/tests/unit-tests/util/string-helper.t.cpp
+++ b/tests/unit-tests/util/string-helper.t.cpp
@@ -1,6 +1,6 @@
/* -*- Mode:C++; c-file-style:"gnu"; indent-tabs-mode:nil; -*- */
-/**
- * Copyright (c) 2013-2017 Regents of the University of California.
+/*
+ * Copyright (c) 2013-2018 Regents of the University of California.
*
* This file is part of ndn-cxx library (NDN C++ library with eXperimental eXtensions).
*
@@ -24,16 +24,21 @@
#include "boost-test.hpp"
+#include <cctype>
+#include <cstring>
+
namespace ndn {
namespace util {
namespace test {
+using boost::test_tools::output_test_stream;
+
BOOST_AUTO_TEST_SUITE(Util)
BOOST_AUTO_TEST_SUITE(TestStringHelper)
BOOST_AUTO_TEST_CASE(PrintHex)
{
- boost::test_tools::output_test_stream os;
+ output_test_stream os;
printHex(os, 0);
BOOST_CHECK(os.is_equal("0x0"));
@@ -57,7 +62,7 @@
BOOST_AUTO_TEST_CASE(AsHex)
{
using ndn::AsHex;
- boost::test_tools::output_test_stream os;
+ output_test_stream os;
os << AsHex{0};
BOOST_CHECK(os.is_equal("0x0"));
@@ -86,6 +91,38 @@
BOOST_CHECK_EQUAL(toHex(Buffer{}), "");
}
+BOOST_AUTO_TEST_CASE(FromHex)
+{
+ BOOST_CHECK(*fromHex("") == Buffer{});
+ BOOST_CHECK(*fromHex("48656c6c6f2c20776f726c6421") ==
+ (std::vector<uint8_t>{0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x2c, 0x20,
+ 0x77, 0x6f, 0x72, 0x6c, 0x64, 0x21}));
+ BOOST_CHECK(*fromHex("012a3Bc4defAB5CdEF") ==
+ (std::vector<uint8_t>{0x01, 0x2a, 0x3b, 0xc4, 0xde,
+ 0xfa, 0xb5, 0xcd, 0xef}));
+
+ BOOST_CHECK_THROW(fromHex("1"), StringHelperError);
+ BOOST_CHECK_THROW(fromHex("zz"), StringHelperError);
+ BOOST_CHECK_THROW(fromHex("00az"), StringHelperError);
+ BOOST_CHECK_THROW(fromHex("1234z"), StringHelperError);
+}
+
+BOOST_AUTO_TEST_CASE(ToHexChar)
+{
+ static const std::vector<std::pair<unsigned int, char>> hexMap{
+ {0, '0'}, {1, '1'}, {2, '2'}, {3, '3'}, {4, '4'}, {5, '5'}, {6, '6'}, {7, '7'},
+ {8, '8'}, {9, '9'}, {10, 'A'}, {11, 'B'}, {12, 'C'}, {13, 'D'}, {14, 'E'}, {15, 'F'}
+ };
+
+ for (const auto& i : hexMap) {
+ BOOST_CHECK_EQUAL(toHexChar(i.first), i.second);
+ BOOST_CHECK_EQUAL(toHexChar(i.first + 16), i.second);
+ BOOST_CHECK_EQUAL(toHexChar(i.first + 32), i.second);
+ BOOST_CHECK_EQUAL(toHexChar(i.first + 240), i.second);
+ BOOST_CHECK_EQUAL(toHexChar(i.first, false), std::tolower(static_cast<unsigned char>(i.second)));
+ }
+}
+
BOOST_AUTO_TEST_CASE(FromHexChar)
{
// for (int ch = 0; ch <= std::numeric_limits<uint8_t>::max(); ++ch) {
@@ -134,37 +171,33 @@
}
}
-BOOST_AUTO_TEST_CASE(FromHex)
+BOOST_AUTO_TEST_CASE(Escape)
{
- BOOST_CHECK(*fromHex("") == Buffer{});
- BOOST_CHECK(*fromHex("48656c6c6f2c20776f726c6421") ==
- (std::vector<uint8_t>{0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x2c, 0x20,
- 0x77, 0x6f, 0x72, 0x6c, 0x64, 0x21}));
- BOOST_CHECK(*fromHex("012a3Bc4defAB5CdEF") ==
- (std::vector<uint8_t>{0x01, 0x2a, 0x3b, 0xc4, 0xde,
- 0xfa, 0xb5, 0xcd, 0xef}));
+ BOOST_CHECK_EQUAL(escape(""), "");
+ BOOST_CHECK_EQUAL(escape("foo42"), "foo42");
+ BOOST_CHECK_EQUAL(escape("foo%bar"), "foo%25bar");
+ BOOST_CHECK_EQUAL(escape("lower UPPER"), "lower%20UPPER");
+ BOOST_CHECK_EQUAL(escape("-._~"), "-._~");
+ BOOST_CHECK_EQUAL(escape(":/?#[]@"), "%3A%2F%3F%23%5B%5D%40");
- BOOST_CHECK_THROW(fromHex("1"), StringHelperError);
- BOOST_CHECK_THROW(fromHex("zz"), StringHelperError);
- BOOST_CHECK_THROW(fromHex("00az"), StringHelperError);
- BOOST_CHECK_THROW(fromHex("1234z"), StringHelperError);
+ output_test_stream os;
+ const char str[] = "\x01\x2a\x3b\xc4\xde\xfa\xb5\xcd\xef";
+ escape(os, str, std::strlen(str));
+ BOOST_CHECK(os.is_equal("%01%2A%3B%C4%DE%FA%B5%CD%EF"));
}
BOOST_AUTO_TEST_CASE(Unescape)
{
- std::string test1 = "Hello%01, world!%AA ";
- std::string test2 = "Invalid escape %ZZ (not a hex value)";
- std::string test3 = "Invalid escape %a (should be two hex symbols)";
- std::string test4 = "Invalid escape %a";
+ BOOST_CHECK_EQUAL(unescape(""), "");
+ BOOST_CHECK_EQUAL(unescape("Hello%01, world!%AA "), "Hello\x01, world!\xAA ");
+ BOOST_CHECK_EQUAL(unescape("Bad %ZZ (not a hex value)"), "Bad %ZZ (not a hex value)");
+ BOOST_CHECK_EQUAL(unescape("Bad %a (should be two hex chars)"), "Bad %a (should be two hex chars)");
+ BOOST_CHECK_EQUAL(unescape("Bad %a"), "Bad %a");
- BOOST_CHECK_EQUAL(unescape(test1), "Hello\x01, world!\xAA ");
-
- BOOST_CHECK_EQUAL(unescape(test2), "Invalid escape %ZZ (not a hex value)");
- BOOST_CHECK_EQUAL(unescape(test3), "Invalid escape %a (should be two hex symbols)");
- BOOST_CHECK_EQUAL(unescape(test4), "Invalid escape %a");
-
- BOOST_CHECK_EQUAL(unescape("%01%2a%3B%c4%de%fA%B5%Cd%EF"),
- "\x01\x2a\x3b\xc4\xde\xfa\xb5\xcd\xef");
+ output_test_stream os;
+ const char str[] = "%01%2a%3B%c4%de%fA%B5%Cd%EF";
+ unescape(os, str, std::strlen(str));
+ BOOST_CHECK(os.is_equal("\x01\x2a\x3b\xc4\xde\xfa\xb5\xcd\xef"));
}
BOOST_AUTO_TEST_SUITE_END() // TestStringHelper