name-component: recognize plain URI syntax

refs #4690

Change-Id: I6e88d88cc62a025edc30b545f1044a225c20213f
diff --git a/src/detail/name-component-types.hpp b/src/detail/name-component-types.hpp
new file mode 100644
index 0000000..8d074fd
--- /dev/null
+++ b/src/detail/name-component-types.hpp
@@ -0,0 +1,314 @@
+/* -*- Mode:C++; c-file-style:"gnu"; indent-tabs-mode:nil; -*- */
+/*
+ * Copyright (c) 2013-2018 Regents of the University of California.
+ *
+ * This file is part of ndn-cxx library (NDN C++ library with eXperimental eXtensions).
+ *
+ * ndn-cxx library is free software: you can redistribute it and/or modify it under the
+ * terms of the GNU Lesser General Public License as published by the Free Software
+ * Foundation, either version 3 of the License, or (at your option) any later version.
+ *
+ * ndn-cxx library is distributed in the hope that it will be useful, but WITHOUT ANY
+ * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+ * PARTICULAR PURPOSE.  See the GNU Lesser General Public License for more details.
+ *
+ * You should have received copies of the GNU General Public License and GNU Lesser
+ * General Public License along with ndn-cxx, e.g., in COPYING.md file.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ *
+ * See AUTHORS.md for complete list of ndn-cxx authors and contributors.
+ */
+
+#ifndef NDN_DETAIL_NAME_COMPONENT_TYPES_HPP
+#define NDN_DETAIL_NAME_COMPONENT_TYPES_HPP
+
+#include "../name-component.hpp"
+
+#include "../util/sha256.hpp"
+#include "../util/string-helper.hpp"
+
+#include <array>
+#include <unordered_map>
+
+namespace ndn {
+namespace name {
+namespace detail {
+
+/** \brief Declare rules regarding a NameComponent type.
+ */
+class ComponentType : noncopyable
+{
+public:
+  using Error = Component::Error;
+
+  virtual
+  ~ComponentType() = default;
+
+  /** \brief Throw Component::Error if \p comp is invalid.
+   */
+  virtual void
+  check(const Component& comp) const
+  {
+  }
+
+  /** \brief Calculate the successor of \p comp.
+   *
+   *  If \p comp is the maximum possible value of this component type, return true to indicate
+   *  that the successor should have a greater TLV-TYPE.
+   */
+  virtual std::pair<bool, Component>
+  getSuccessor(const Component& comp) const
+  {
+    return {false, getSuccessorImpl(comp).second};
+  }
+
+  /** \brief Return the minimum allowable TLV-VALUE of this component type.
+   */
+  virtual const std::vector<uint8_t>&
+  getMinValue() const
+  {
+    static std::vector<uint8_t> value;
+    return value;
+  }
+
+  /** \brief Return the prefix of the alternate URI representation.
+   *
+   *  NDN URI specification allows a name component type to declare an alternate URI representation
+   *  in the form of `<prefix>=<value>`, in addition to the plain `<type-number>=<escaped-value>`
+   *  syntax.
+   *
+   *  \return the `<prefix>` portion of the alternate URI representation.
+   *  \retval nullptr this component does not have an alternate URI representation.
+   */
+  virtual const char*
+  getAltUriPrefix() const
+  {
+    return nullptr;
+  }
+
+  /** \brief Parse component from alternate URI representation.
+   *  \param input the `<value>` portion of the alternate URI representation.
+   *  \throw Component::Error
+   *  \pre getAltUriPrefix() != nullptr
+   */
+  virtual Component
+  parseAltUriValue(const std::string& input) const
+  {
+    BOOST_ASSERT(false);
+    return Component();
+  }
+
+  /** \brief Write URI representation of \p comp to \p os.
+   *
+   *  This base class implementation encodes the component in the plain
+   *  `<type-number>=<escaped-value>` syntax.
+   */
+  virtual void
+  writeUri(std::ostream& os, const Component& comp) const
+  {
+    os << comp.type() << '=';
+    writeUriEscapedValue(os, comp);
+  }
+
+protected:
+  /** \brief Calculate the successor of \p comp, extending TLV-LENGTH if value overflows.
+   *  \return whether TLV-LENGTH was extended, and the successor
+   */
+  std::pair<bool, Block>
+  getSuccessorImpl(const Component& comp) const
+  {
+    EncodingBuffer encoder(comp.size() + 9, 9);
+    // leave room for additional byte when TLV-VALUE overflows, and for TLV-LENGTH size increase
+
+    bool isOverflow = true;
+    size_t i = comp.value_size();
+    for (; isOverflow && i > 0; i--) {
+      uint8_t newValue = static_cast<uint8_t>((comp.value()[i - 1] + 1) & 0xFF);
+      encoder.prependByte(newValue);
+      isOverflow = (newValue == 0);
+    }
+    encoder.prependByteArray(comp.value(), i);
+
+    if (isOverflow) {
+      // new name component has to be extended
+      encoder.appendByte(0);
+    }
+
+    encoder.prependVarNumber(encoder.size());
+    encoder.prependVarNumber(comp.type());
+    return {isOverflow, encoder.block()};
+  }
+
+  /** \brief Write TLV-VALUE as `<escaped-value>` of NDN URI syntax.
+   */
+  void
+  writeUriEscapedValue(std::ostream& os, const Component& comp) const
+  {
+    bool isAllPeriods = std::all_of(comp.value_begin(), comp.value_end(),
+                                    [] (uint8_t x) { return x == '.'; });
+    if (isAllPeriods) {
+      os << "...";
+    }
+    escape(os, reinterpret_cast<const char*>(comp.value()), comp.value_size());
+  }
+};
+
+/** \brief Rules regarding GenericNameComponent.
+ *
+ *  GenericNameComponent has an alternate URI representation that omits the `<type-number>` prefix.
+ *  This must be special-cased in the caller, and is not handled by this class.
+ */
+class GenericNameComponentType final : public ComponentType
+{
+public:
+  void
+  writeUri(std::ostream& os, const Component& comp) const final
+  {
+    writeUriEscapedValue(os, comp);
+  }
+};
+
+/** \brief Rules regarding a component type holding a SHA256 digest value.
+ */
+class Sha256ComponentType final : public ComponentType
+{
+public:
+  Sha256ComponentType(uint32_t type, const std::string& typeName, const std::string& uriPrefix)
+    : m_type(type)
+    , m_typeName(typeName)
+    , m_uriPrefix(uriPrefix)
+  {
+  }
+
+  void
+  check(const Component& comp) const final
+  {
+    if (comp.value_size() != util::Sha256::DIGEST_SIZE) {
+      BOOST_THROW_EXCEPTION(Error(m_typeName + " TLV-LENGTH must be " +
+                                  to_string(util::Sha256::DIGEST_SIZE)));
+    }
+  }
+
+  std::pair<bool, Component>
+  getSuccessor(const Component& comp) const final
+  {
+    bool isExtended = false;
+    Block successor;
+    std::tie(isExtended, successor) = getSuccessorImpl(comp);
+    if (isExtended) {
+      return {true, comp};
+    }
+    return {false, Component(successor)};
+  }
+
+  const std::vector<uint8_t>&
+  getMinValue() const final
+  {
+    static std::vector<uint8_t> value(16);
+    return value;
+  }
+
+  const char*
+  getAltUriPrefix() const final
+  {
+    return m_uriPrefix.data();
+  }
+
+  Component
+  parseAltUriValue(const std::string& input) const final
+  {
+    shared_ptr<Buffer> value;
+    try {
+      value = fromHex(input);
+    }
+    catch (const StringHelperError&) {
+      BOOST_THROW_EXCEPTION(Error("Cannot convert to " + m_typeName + " (invalid hex encoding)"));
+    }
+    return Component(m_type, std::move(value));
+  }
+
+  void
+  writeUri(std::ostream& os, const Component& comp) const final
+  {
+    os << m_uriPrefix << '=';
+    printHex(os, comp.value(), comp.value_size(), false);
+  }
+
+private:
+  uint32_t m_type;
+  std::string m_typeName;
+  std::string m_uriPrefix;
+};
+
+/** \brief Rules regarding NameComponent types.
+ */
+class ComponentTypeTable : noncopyable
+{
+public:
+  ComponentTypeTable();
+
+  /** \brief Retrieve ComponentType by TLV-TYPE.
+   */
+  const ComponentType&
+  get(uint32_t type) const
+  {
+    if (type >= m_table.size() || m_table[type] == nullptr) {
+      return m_baseType;
+    }
+    return *m_table[type];
+  }
+
+  /** \brief Retrieve ComponentType by alternate URI prefix.
+   */
+  const ComponentType*
+  findByUriPrefix(const std::string& prefix) const
+  {
+    auto it = m_uriPrefixes.find(prefix);
+    if (it == m_uriPrefixes.end()) {
+      return nullptr;
+    }
+    return it->second;
+  }
+
+private:
+  void
+  set(uint32_t type, const ComponentType& ct)
+  {
+    m_table.at(type) = &ct;
+    if (ct.getAltUriPrefix() != nullptr) {
+      m_uriPrefixes[ct.getAltUriPrefix()] = &ct;
+    }
+  }
+
+private:
+  ComponentType m_baseType;
+  std::array<const ComponentType*, 32> m_table;
+  std::unordered_map<std::string, const ComponentType*> m_uriPrefixes;
+};
+
+ComponentTypeTable::ComponentTypeTable()
+{
+  m_table.fill(nullptr);
+
+  static GenericNameComponentType ct8;
+  set(tlv::GenericNameComponent, ct8);
+
+  static Sha256ComponentType ct1(tlv::ImplicitSha256DigestComponent,
+                                 "ImplicitSha256DigestComponent", "sha256digest");
+  set(tlv::ImplicitSha256DigestComponent, ct1);
+}
+
+/** \brief Get the global ComponentTypeTable.
+ */
+const ComponentTypeTable&
+getComponentTypeTable()
+{
+  static ComponentTypeTable ctt;
+  return ctt;
+}
+
+} // namespace detail
+} // namespace name
+} // namespace ndn
+
+#endif // NDN_DETAIL_NAME_COMPONENT_TYPES_HPP
diff --git a/src/name-component.cpp b/src/name-component.cpp
index 7252d29..6ac6f31 100644
--- a/src/name-component.cpp
+++ b/src/name-component.cpp
@@ -24,12 +24,9 @@
  */
 
 #include "name-component.hpp"
+#include "detail/name-component-types.hpp"
 
-#include "encoding/block-helpers.hpp"
-#include "encoding/encoding-buffer.hpp"
-#include "util/sha256.hpp"
-#include "util/string-helper.hpp"
-
+#include <cstdlib>
 #include <cstring>
 #include <sstream>
 
@@ -43,23 +40,13 @@
 static_assert(std::is_base_of<tlv::Error, Component::Error>::value,
               "name::Component::Error must inherit from tlv::Error");
 
-static const std::string&
-getSha256DigestUriPrefix()
-{
-  static const std::string prefix{"sha256digest="};
-  return prefix;
-}
-
 void
 Component::ensureValid() const
 {
   if (type() < tlv::NameComponentMin || type() > tlv::NameComponentMax) {
     BOOST_THROW_EXCEPTION(Error("TLV-TYPE " + to_string(type()) + " is not a valid NameComponent"));
   }
-  if (type() == tlv::ImplicitSha256DigestComponent && value_size() != util::Sha256::DIGEST_SIZE) {
-    BOOST_THROW_EXCEPTION(Error("ImplicitSha256DigestComponent TLV-LENGTH must be " +
-                                to_string(util::Sha256::DIGEST_SIZE)));
-  }
+  detail::getComponentTypeTable().get(type()).check(*this);
 }
 
 Component::Component(uint32_t type)
@@ -77,11 +64,13 @@
 Component::Component(uint32_t type, ConstBufferPtr buffer)
   : Block(type, std::move(buffer))
 {
+  ensureValid();
 }
 
 Component::Component(uint32_t type, const uint8_t* value, size_t valueLen)
   : Block(makeBinaryBlock(type, value, valueLen))
 {
+  ensureValid();
 }
 
 Component::Component(const char* str)
@@ -95,68 +84,48 @@
 }
 
 static Component
-parseSha256DigestUri(std::string input)
+parseUriEscapedValue(uint32_t type, const char* input, size_t len)
 {
-  input.erase(0, getSha256DigestUriPrefix().size());
-
-  try {
-    return Component::fromImplicitSha256Digest(fromHex(input));
-  }
-  catch (const StringHelperError&) {
-    BOOST_THROW_EXCEPTION(Component::Error("Cannot convert to a ImplicitSha256DigestComponent "
-                                           "(invalid hex encoding)"));
-  }
-}
-
-Component
-Component::fromEscapedString(std::string input)
-{
-  uint32_t type = tlv::GenericNameComponent;
-  size_t equalPos = input.find('=');
-  if (equalPos != std::string::npos) {
-    if (equalPos + 1 == getSha256DigestUriPrefix().size() &&
-        input.compare(0, getSha256DigestUriPrefix().size(), getSha256DigestUriPrefix()) == 0) {
-      return parseSha256DigestUri(std::move(input));
-    }
-
-    long parsedType = std::strtol(input.data(), nullptr, 10);
-    if (parsedType < tlv::NameComponentMin || parsedType > tlv::NameComponentMax ||
-        parsedType == tlv::ImplicitSha256DigestComponent || parsedType == tlv::GenericNameComponent ||
-        to_string(parsedType).size() != equalPos) {
-      BOOST_THROW_EXCEPTION(Error("Incorrect TLV-TYPE in NameComponent URI"));
-    }
-    type = static_cast<uint32_t>(parsedType);
-    input.erase(0, equalPos + 1);
-  }
-
-  std::string value = unescape(input);
+  std::ostringstream oss;
+  unescape(oss, input, len);
+  std::string value = oss.str();
   if (value.find_first_not_of('.') == std::string::npos) { // all periods
     if (value.size() < 3) {
-      BOOST_THROW_EXCEPTION(Error("Illegal URI (name component cannot be . or ..)"));
+      BOOST_THROW_EXCEPTION(Component::Error("Illegal URI (name component cannot be . or ..)"));
     }
     return Component(type, reinterpret_cast<const uint8_t*>(value.data()), value.size() - 3);
   }
   return Component(type, reinterpret_cast<const uint8_t*>(value.data()), value.size());
 }
 
+Component
+Component::fromEscapedString(const std::string& input)
+{
+  size_t equalPos = input.find('=');
+  if (equalPos == std::string::npos) {
+    return parseUriEscapedValue(tlv::GenericNameComponent, input.data(), input.size());
+  }
+
+  long type = std::strtol(input.data(), nullptr, 10);
+  if (type >= tlv::NameComponentMin && type <= tlv::NameComponentMax &&
+      to_string(type).size() == equalPos) {
+    size_t valuePos = equalPos + 1;
+    return parseUriEscapedValue(static_cast<uint32_t>(type), input.data() + valuePos,
+                                input.size() - valuePos);
+  }
+
+  auto typePrefix = input.substr(0, equalPos);
+  auto ct = detail::getComponentTypeTable().findByUriPrefix(typePrefix);
+  if (ct == nullptr) {
+    BOOST_THROW_EXCEPTION(Error("Incorrect TLV-TYPE '" + typePrefix + "' in NameComponent URI"));
+  }
+  return ct->parseAltUriValue(input.substr(equalPos + 1));
+}
+
 void
 Component::toUri(std::ostream& os) const
 {
-  if (type() == tlv::ImplicitSha256DigestComponent) {
-    os << getSha256DigestUriPrefix();
-    printHex(os, value(), value_size(), false);
-    return;
-  }
-
-  if (type() != tlv::GenericNameComponent) {
-    os << type() << '=';
-  }
-
-  if (std::all_of(value_begin(), value_end(), [] (uint8_t x) { return x == '.'; })) { // all periods
-    os << "...";
-  }
-
-  escape(os, reinterpret_cast<const char*>(value()), value_size());
+  detail::getComponentTypeTable().get(type()).writeUri(os, *this);
 }
 
 std::string
@@ -398,59 +367,20 @@
   return std::memcmp(value(), other.value(), value_size());
 }
 
-static Component
-getDigestSuccessor(const Component& comp)
-{
-  size_t totalLength = 0;
-  EncodingBuffer encoder(comp.size(), 0);
-
-  bool isOverflow = true;
-  size_t i = comp.value_size();
-  for (; isOverflow && i > 0; i--) {
-    uint8_t newValue = static_cast<uint8_t>((comp.value()[i - 1] + 1) & 0xFF);
-    totalLength += encoder.prependByte(newValue);
-    isOverflow = (newValue == 0);
-  }
-  totalLength += encoder.prependByteArray(comp.value(), i);
-
-  if (isOverflow) {
-    return Component(comp.type() + 1);
-  }
-
-  encoder.prependVarNumber(totalLength);
-  encoder.prependVarNumber(comp.type());
-  return encoder.block();
-}
-
 Component
 Component::getSuccessor() const
 {
-  if (isImplicitSha256Digest()) {
-    return getDigestSuccessor(*this);
+  bool isOverflow = false;
+  Component successor;
+  std::tie(isOverflow, successor) =
+    detail::getComponentTypeTable().get(type()).getSuccessor(*this);
+  if (!isOverflow) {
+    return successor;
   }
 
-  size_t totalLength = 0;
-  EncodingBuffer encoder(size() + 9, 9);
-  // leave room for additional byte when TLV-VALUE overflows, and for TLV-LENGTH size increase
-
-  bool isOverflow = true;
-  size_t i = value_size();
-  for (; isOverflow && i > 0; i--) {
-    uint8_t newValue = static_cast<uint8_t>((value()[i - 1] + 1) & 0xFF);
-    totalLength += encoder.prependByte(newValue);
-    isOverflow = (newValue == 0);
-  }
-  totalLength += encoder.prependByteArray(value(), i);
-
-  if (isOverflow) {
-    // new name component has to be extended
-    totalLength += encoder.appendByte(0);
-  }
-
-  encoder.prependVarNumber(totalLength);
-  encoder.prependVarNumber(type());
-
-  return encoder.block();
+  uint32_t type = this->type() + 1;
+  const std::vector<uint8_t>& value = detail::getComponentTypeTable().get(type).getMinValue();
+  return Component(type, value.data(), value.size());
 }
 
 template<encoding::Tag TAG>
diff --git a/src/name-component.hpp b/src/name-component.hpp
index 9f8de4f..9340176 100644
--- a/src/name-component.hpp
+++ b/src/name-component.hpp
@@ -212,7 +212,7 @@
    * @throw Error URI component does not represent a valid NameComponent.
    */
   static Component
-  fromEscapedString(std::string input);
+  fromEscapedString(const std::string& input);
 
   /**
    * @brief Write *this to the output stream, escaping characters according to the NDN URI Scheme
diff --git a/tests/unit-tests/name-component.t.cpp b/tests/unit-tests/name-component.t.cpp
index 0bc7978..0c9b447 100644
--- a/tests/unit-tests/name-component.t.cpp
+++ b/tests/unit-tests/name-component.t.cpp
@@ -42,10 +42,12 @@
   BOOST_CHECK_EQUAL(comp.type(), tlv::GenericNameComponent);
   BOOST_CHECK_EQUAL(comp.toUri(), "ndn-cxx");
   BOOST_CHECK_EQUAL(Component::fromEscapedString("ndn-cxx"), comp);
+  BOOST_CHECK_EQUAL(Component::fromEscapedString("8=ndn-cxx"), comp);
 
   comp.wireDecode("0800"_block);
   BOOST_CHECK_EQUAL(comp.toUri(), "...");
   BOOST_CHECK_EQUAL(Component::fromEscapedString("..."), comp);
+  BOOST_CHECK_EQUAL(Component::fromEscapedString("8=..."), comp);
   BOOST_CHECK_EQUAL(Component::fromEscapedString(".%2E."), comp);
 
   comp.wireDecode("0801 2E"_block);
@@ -60,11 +62,16 @@
   comp.wireDecode("0807 666F6F25626172"_block);
   BOOST_CHECK_EQUAL(comp.toUri(), "foo%25bar");
   BOOST_CHECK_EQUAL(Component::fromEscapedString("foo%25bar"), comp);
+  BOOST_CHECK_EQUAL(Component::fromEscapedString("8=foo%25bar"), comp);
 
   comp.wireDecode("0804 2D2E5F7E"_block);
   BOOST_CHECK_EQUAL(comp.toUri(), "-._~");
   BOOST_CHECK_EQUAL(Component::fromEscapedString("-._~"), comp);
 
+  comp.wireDecode("0803 393D41"_block);
+  BOOST_CHECK_EQUAL(comp.toUri(), "9%3DA");
+  BOOST_CHECK_EQUAL(Component::fromEscapedString("9%3DA"), comp);
+
   comp = Component(":/?#[]@");
   BOOST_CHECK_EQUAL(comp.toUri(), "%3A%2F%3F%23%5B%5D%40");
   BOOST_CHECK_EQUAL(Component::fromEscapedString("%3A%2F%3F%23%5B%5D%40"), comp);
@@ -72,7 +79,6 @@
   BOOST_CHECK_THROW(Component::fromEscapedString(""), Component::Error);
   BOOST_CHECK_THROW(Component::fromEscapedString("."), Component::Error);
   BOOST_CHECK_THROW(Component::fromEscapedString(".."), Component::Error);
-  BOOST_CHECK_THROW(Component::fromEscapedString("8=A"), Component::Error);
 }
 
 BOOST_AUTO_TEST_CASE(Digest)
@@ -80,11 +86,14 @@
   std::string uriPrefix = "sha256digest=";
   std::string hexLower = "28bad4b5275bd392dbb670c75cf0b66f13f7942b21e80f55c0e86b374753a548";
   std::string hexUpper = "28BAD4B5275BD392DBB670C75CF0B66F13F7942B21E80F55C0E86B374753A548";
+  std::string hexPct = "%28%BA%D4%B5%27%5B%D3%92%DB%B6%70%C7%5C%F0%B6%6F"
+                       "%13%F7%94%2B%21%E8%0F%55%C0%E8%6B%37%47%53%A5%48";
 
   Component comp("0120 28BAD4B5275BD392DBB670C75CF0B66F13F7942B21E80F55C0E86B374753A548"_block);
   BOOST_CHECK_EQUAL(comp.toUri(), uriPrefix + hexLower);
   BOOST_CHECK_EQUAL(Component::fromEscapedString(uriPrefix + hexLower), comp);
   BOOST_CHECK_EQUAL(Component::fromEscapedString(uriPrefix + hexUpper), comp);
+  BOOST_CHECK_EQUAL(Component::fromEscapedString("1=" + hexPct), comp);
 
   BOOST_CHECK_THROW(comp.wireDecode("0108 A791806951F25C4D"_block), Component::Error);
   BOOST_CHECK_THROW(Component::fromEscapedString(uriPrefix), Component::Error);
@@ -124,6 +133,7 @@
 
   BOOST_CHECK_THROW(Component::fromEscapedString("0=A"), Component::Error);
   BOOST_CHECK_THROW(Component::fromEscapedString("65536=A"), Component::Error);
+  BOOST_CHECK_THROW(Component::fromEscapedString("4294967296=A"), Component::Error);
   BOOST_CHECK_THROW(Component::fromEscapedString("-1=A"), Component::Error);
   BOOST_CHECK_THROW(Component::fromEscapedString("+=A"), Component::Error);
   BOOST_CHECK_THROW(Component::fromEscapedString("=A"), Component::Error);
@@ -132,6 +142,10 @@
   BOOST_CHECK_THROW(Component::fromEscapedString("09=A"), Component::Error);
   BOOST_CHECK_THROW(Component::fromEscapedString("0x2=A"), Component::Error);
   BOOST_CHECK_THROW(Component::fromEscapedString("+9=A"), Component::Error);
+  BOOST_CHECK_THROW(Component::fromEscapedString(" 9=A"), Component::Error);
+  BOOST_CHECK_THROW(Component::fromEscapedString("9 =A"), Component::Error);
+  BOOST_CHECK_THROW(Component::fromEscapedString("9.0=A"), Component::Error);
+  BOOST_CHECK_THROW(Component::fromEscapedString("9E0=A"), Component::Error);
 }
 
 BOOST_AUTO_TEST_SUITE_END() // Decode