table: refactor NameTree hashtable

refs #3687

Change-Id: I908c1d7e67da861fc86a756fc0bc69b99ee696f7
diff --git a/daemon/table/name-tree.cpp b/daemon/table/name-tree.cpp
index 09b57f7..bbe5784 100644
--- a/daemon/table/name-tree.cpp
+++ b/daemon/table/name-tree.cpp
@@ -25,7 +25,6 @@
 
 #include "name-tree.hpp"
 #include "core/logger.hpp"
-#include "core/city-hash.hpp"
 
 #include <boost/concept/assert.hpp>
 #include <boost/concept_check.hpp>
@@ -36,190 +35,30 @@
 
 NFD_LOG_INIT("NameTree");
 
-// http://en.cppreference.com/w/cpp/concept/ForwardIterator
-BOOST_CONCEPT_ASSERT((boost::ForwardIterator<NameTree::const_iterator>));
-// boost::ForwardIterator follows SGI standard http://www.sgi.com/tech/stl/ForwardIterator.html,
-// which doesn't require DefaultConstructible
-#ifdef HAVE_IS_DEFAULT_CONSTRUCTIBLE
-static_assert(std::is_default_constructible<NameTree::const_iterator>::value,
-              "NameTree::const_iterator must be default-constructible");
-#else
-BOOST_CONCEPT_ASSERT((boost::DefaultConstructible<NameTree::const_iterator>));
-#endif // HAVE_IS_DEFAULT_CONSTRUCTIBLE
-
-class Hash32
-{
-public:
-  static size_t
-  compute(const char* buffer, size_t length)
-  {
-    return static_cast<size_t>(CityHash32(buffer, length));
-  }
-};
-
-class Hash64
-{
-public:
-  static size_t
-  compute(const char* buffer, size_t length)
-  {
-    return static_cast<size_t>(CityHash64(buffer, length));
-  }
-};
-
-/// @cond NoDocumentation
-typedef boost::mpl::if_c<sizeof(size_t) >= 8, Hash64, Hash32>::type CityHash;
-/// @endcond
-
-// Interface of different hash functions
-size_t
-computeHash(const Name& prefix)
-{
-  prefix.wireEncode();  // guarantees prefix's wire buffer is not empty
-
-  size_t hashValue = 0;
-  size_t hashUpdate = 0;
-
-  for (Name::const_iterator it = prefix.begin(); it != prefix.end(); ++it) {
-    const char* wireFormat = reinterpret_cast<const char*>( it->wire() );
-    hashUpdate = CityHash::compute(wireFormat, it->size());
-    hashValue ^= hashUpdate;
-  }
-
-  return hashValue;
-}
-
-std::vector<size_t>
-computeHashSet(const Name& prefix)
-{
-  prefix.wireEncode(); // guarantees prefix's wire buffer is not empty
-
-  size_t hashValue = 0;
-  size_t hashUpdate = 0;
-
-  std::vector<size_t> hashValueSet;
-  hashValueSet.push_back(hashValue);
-
-  for (Name::const_iterator it = prefix.begin(); it != prefix.end(); ++it) {
-    const char* wireFormat = reinterpret_cast<const char*>( it->wire() );
-    hashUpdate = CityHash::compute(wireFormat, it->size());
-    hashValue ^= hashUpdate;
-    hashValueSet.push_back(hashValue);
-  }
-
-  return hashValueSet;
-}
-
 NameTree::NameTree(size_t nBuckets)
-  : m_nItems(0)
-  , m_nBuckets(nBuckets)
-  , m_minNBuckets(nBuckets)
-  , m_enlargeLoadFactor(0.5) // more than 50% buckets loaded
-  , m_enlargeFactor(2)       // double the hash table size
-  , m_shrinkLoadFactor(0.1)  // less than 10% buckets loaded
-  , m_shrinkFactor(0.5)      // reduce the number of buckets by half
+  : m_ht(HashtableOptions(nBuckets))
 {
-  m_enlargeThreshold = static_cast<size_t>(m_enlargeLoadFactor * static_cast<double>(m_nBuckets));
-  m_shrinkThreshold = static_cast<size_t>(m_shrinkLoadFactor * static_cast<double>(m_nBuckets));
-
-  // array of node pointers
-  m_buckets = new Node*[m_nBuckets];
-  // Initialize the pointer array
-  for (size_t i = 0; i < m_nBuckets; ++i) {
-    m_buckets[i] = nullptr;
-  }
 }
 
-NameTree::~NameTree()
-{
-  for (size_t i = 0; i < m_nBuckets; ++i) {
-    if (m_buckets[i] != nullptr) {
-      delete m_buckets[i];
-    }
-  }
-
-  delete[] m_buckets;
-}
-
-// insert() is a private function, and called by only lookup()
-std::pair<shared_ptr<Entry>, bool>
-NameTree::insert(const Name& prefix)
-{
-  NFD_LOG_TRACE("insert " << prefix);
-
-  size_t hashValue = computeHash(prefix);
-  size_t loc = hashValue % m_nBuckets;
-
-  NFD_LOG_TRACE("Name " << prefix << " hash value = " << hashValue << "  location = " << loc);
-
-  // Check if this Name has been stored
-  Node* node = m_buckets[loc];
-  Node* nodePrev = node;
-
-  for (node = m_buckets[loc]; node != nullptr; node = node->m_next) {
-    if (node->m_entry != nullptr) {
-      if (prefix == node->m_entry->m_prefix) {
-         return {node->m_entry, false}; // false: old entry
-      }
-    }
-    nodePrev = node;
-  }
-
-  NFD_LOG_TRACE("Did not find " << prefix << ", need to insert it to the table");
-
-  // If no bucket is empty occupied, we need to create a new node, and it is
-  // linked from nodePrev
-  node = new Node();
-  node->m_prev = nodePrev;
-
-  if (nodePrev == nullptr) {
-    m_buckets[loc] = node;
-  }
-  else{
-    nodePrev->m_next = node;
-  }
-
-  // Create a new Entry
-  auto entry = make_shared<Entry>(prefix);
-  entry->setHash(hashValue);
-  node->m_entry = entry; // link the Entry to its Node
-  entry->m_node = node; // link the node to Entry. Used in eraseEntryIfEmpty.
-
-  return {entry, true}; // true: new entry
-}
-
-// Name Prefix Lookup. Create Name Tree Entry if not found
 shared_ptr<Entry>
-NameTree::lookup(const Name& prefix)
+NameTree::lookup(const Name& name)
 {
-  NFD_LOG_TRACE("lookup " << prefix);
+  NFD_LOG_TRACE("lookup " << name);
 
-  shared_ptr<Entry> entry;
-  shared_ptr<Entry> parent;
+  HashSequence hashes = computeHashes(name);
+  const Node* node = nullptr;
+  Entry* parent = nullptr;
 
-  for (size_t i = 0; i <= prefix.size(); ++i) {
-    Name temp = prefix.getPrefix(i);
-
-    // insert() will create the entry if it does not exist.
+  for (size_t prefixLen = 0; prefixLen <= name.size(); ++prefixLen) {
     bool isNew = false;
-    std::tie(entry, isNew) = insert(temp);
+    std::tie(node, isNew) = m_ht.insert(name, prefixLen, hashes);
 
-    if (isNew) {
-      ++m_nItems; // Increase the counter
-      entry->m_parent = parent;
-
-      if (parent != nullptr) {
-        parent->m_children.push_back(entry);
-      }
+    if (isNew && parent != nullptr) {
+      node->entry->setParent(*parent);
     }
-
-    if (m_nItems > m_enlargeThreshold) {
-      resize(m_enlargeFactor * m_nBuckets);
-    }
-
-    parent = entry;
+    parent = node->entry.get();
   }
-  return entry;
+  return node->entry;
 }
 
 shared_ptr<Entry>
@@ -263,132 +102,46 @@
   return nte;
 }
 
-// return {false: this entry is not empty, true: this entry is empty and erased}
-bool
-NameTree::eraseEntryIfEmpty(shared_ptr<Entry> entry)
+size_t
+NameTree::eraseEntryIfEmpty(Entry* entry)
 {
   BOOST_ASSERT(entry != nullptr);
 
-  NFD_LOG_TRACE("eraseEntryIfEmpty " << entry->getPrefix());
-
-  // first check if this Entry can be erased
-  if (entry->isEmpty()) {
-    // update child-related info in the parent
-    shared_ptr<Entry> parent = entry->getParent();
+  size_t nErased = 0;
+  for (Entry* parent = nullptr; entry != nullptr && entry->isEmpty(); entry = parent) {
+    parent = entry->getParent();
 
     if (parent != nullptr) {
-      std::vector<shared_ptr<Entry>>& parentChildrenList = parent->getChildren();
-
-      bool isFound = false;
-      size_t size = parentChildrenList.size();
-      for (size_t i = 0; i < size; ++i) {
-        if (parentChildrenList[i] == entry) {
-          parentChildrenList[i] = parentChildrenList[size - 1];
-          parentChildrenList.pop_back();
-          isFound = true;
-          break;
-        }
-      }
-
-      BOOST_VERIFY(isFound == true);
+      entry->unsetParent();
     }
 
-    // remove this Entry and its Name Tree Node
-    Node* node = entry->m_node;
-    Node* nodePrev = node->m_prev;
-
-    // configure the previous node
-    if (nodePrev != nullptr) {
-      // link the previous node to the next node
-      nodePrev->m_next = node->m_next;
-    }
-    else {
-      m_buckets[entry->getHash() % m_nBuckets] = node->m_next;
-    }
-
-    // link the previous node with the next node (skip the erased one)
-    if (node->m_next != nullptr) {
-      node->m_next->m_prev = nodePrev;
-      node->m_next = 0;
-    }
-
-    BOOST_ASSERT(node->m_next == nullptr);
-
-    --m_nItems;
-    delete node;
-
-    if (parent != nullptr) {
-      eraseEntryIfEmpty(parent);
-    }
-
-    size_t newNBuckets = static_cast<size_t>(m_shrinkFactor * static_cast<double>(m_nBuckets));
-
-    if (newNBuckets >= m_minNBuckets && m_nItems < m_shrinkThreshold) {
-      resize(newNBuckets);
-    }
-
-    return true;
-
+    m_ht.erase(getNode(*entry));
+    ++nErased;
   }
 
-  return false;
+  if (nErased == 0) {
+    NFD_LOG_TRACE("not-erase " << entry->getName());
+  }
+  return nErased;
 }
 
-// Exact Match
 shared_ptr<Entry>
-NameTree::findExactMatch(const Name& prefix) const
+NameTree::findExactMatch(const Name& name) const
 {
-  NFD_LOG_TRACE("findExactMatch " << prefix);
-
-  size_t hashValue = computeHash(prefix);
-  size_t loc = hashValue % m_nBuckets;
-
-  NFD_LOG_TRACE("Name " << prefix << " hash value = " << hashValue << "  location = " << loc);
-
-  shared_ptr<Entry> entry;
-  Node* node = nullptr;
-
-  for (node = m_buckets[loc]; node != nullptr; node = node->m_next) {
-    entry = node->m_entry;
-    if (entry != nullptr) {
-      if (hashValue == entry->getHash() && prefix == entry->getPrefix()) {
-        return entry;
-      }
-    }
-  }
-
-  // if not found, the default value of entry (null pointer) will be returned
-  entry.reset();
-  return entry;
+  const Node* node = m_ht.find(name, name.size());
+  return node == nullptr ? nullptr : node->entry;
 }
 
 // Longest Prefix Match
 shared_ptr<Entry>
-NameTree::findLongestPrefixMatch(const Name& prefix, const EntrySelector& entrySelector) const
+NameTree::findLongestPrefixMatch(const Name& name, const EntrySelector& entrySelector) const
 {
-  NFD_LOG_TRACE("findLongestPrefixMatch " << prefix);
+  HashSequence hashes = computeHashes(name);
 
-  shared_ptr<Entry> entry;
-  std::vector<size_t> hashValueSet = computeHashSet(prefix);
-
-  size_t hashValue = 0;
-  size_t loc = 0;
-
-  for (int i = static_cast<int>(prefix.size()); i >= 0; --i) {
-    hashValue = hashValueSet[i];
-    loc = hashValue % m_nBuckets;
-
-    Node* node = nullptr;
-    for (node = m_buckets[loc]; node != nullptr; node = node->m_next) {
-      entry = node->m_entry;
-      if (entry != nullptr) {
-        // isPrefixOf() is used to avoid making a copy of the name
-        if (hashValue == entry->getHash() &&
-            entry->getPrefix().isPrefixOf(prefix) &&
-            entrySelector(*entry)) {
-          return entry;
-        }
-      }
+  for (ssize_t prefixLen = name.size(); prefixLen >= 0; --prefixLen) {
+    const Node* node = m_ht.find(name, prefixLen, hashes);
+    if (node != nullptr && entrySelector(*node->entry)) {
+      return node->entry;
     }
   }
 
@@ -396,12 +149,13 @@
 }
 
 shared_ptr<Entry>
-NameTree::findLongestPrefixMatch(shared_ptr<Entry> entry,
+NameTree::findLongestPrefixMatch(shared_ptr<Entry> entry1,
                                  const EntrySelector& entrySelector) const
 {
+  Entry* entry = entry1.get();
   while (entry != nullptr) {
     if (entrySelector(*entry)) {
-      return entry;
+      return entry->shared_from_this();
     }
     entry = entry->getParent();
   }
@@ -436,7 +190,7 @@
   // trie from the root node.
 
   shared_ptr<Entry> entry = findLongestPrefixMatch(prefix, entrySelector);
-  return {Iterator(make_shared<PrefixMatchImpl>(*this, entrySelector), entry), end()};
+  return {Iterator(make_shared<PrefixMatchImpl>(*this, entrySelector), entry.get()), end()};
 }
 
 boost::iterator_range<NameTree::const_iterator>
@@ -453,98 +207,7 @@
 {
   // the first step is to process the root node
   shared_ptr<Entry> entry = findExactMatch(prefix);
-  return {Iterator(make_shared<PartialEnumerationImpl>(*this, entrySubTreeSelector), entry), end()};
-}
-
-// Hash Table Resize
-void
-NameTree::resize(size_t newNBuckets)
-{
-  NFD_LOG_TRACE("resize");
-
-  Node** newBuckets = new Node*[newNBuckets];
-  size_t count = 0;
-
-  // referenced ccnx hashtb.c hashtb_rehash()
-  Node** pp = nullptr;
-  Node* p = nullptr;
-  Node* pre = nullptr;
-  Node* q = nullptr; // record p->m_next
-
-  for (size_t i = 0; i < newNBuckets; ++i) {
-    newBuckets[i] = nullptr;
-  }
-
-  for (size_t i = 0; i < m_nBuckets; ++i) {
-    for (p = m_buckets[i]; p != nullptr; p = q) {
-      ++count;
-      q = p->m_next;
-      BOOST_ASSERT(p->m_entry != nullptr);
-      uint32_t h = p->m_entry->m_hash;
-      uint32_t b = h % newNBuckets;
-      pre = nullptr;
-      for (pp = &newBuckets[b]; *pp != nullptr; pp = &((*pp)->m_next)) {
-        pre = *pp;
-      }
-      p->m_prev = pre;
-      p->m_next = *pp; // Actually *pp always == nullptr in this case
-      *pp = p;
-    }
-  }
-
-  BOOST_ASSERT(count == m_nItems);
-
-  Node** oldBuckets = m_buckets;
-  m_buckets = newBuckets;
-  delete[] oldBuckets;
-
-  m_nBuckets = newNBuckets;
-  m_enlargeThreshold = static_cast<size_t>(m_enlargeLoadFactor * static_cast<double>(m_nBuckets));
-  m_shrinkThreshold = static_cast<size_t>(m_shrinkLoadFactor * static_cast<double>(m_nBuckets));
-}
-
-// For debugging
-void
-NameTree::dump(std::ostream& output) const
-{
-  NFD_LOG_TRACE("dump()");
-
-  Node* node = nullptr;
-  shared_ptr<Entry> entry;
-
-  for (size_t i = 0; i < m_nBuckets; ++i) {
-    for (node = m_buckets[i]; node != nullptr; node = node->m_next) {
-      entry = node->m_entry;
-
-      // if the Entry exist, dump its information
-      if (entry != nullptr) {
-        output << "Bucket" << i << '\t' << entry->m_prefix.toUri() << '\n';
-        output << "\t\tHash " << entry->m_hash << '\n';
-
-        if (entry->m_parent != nullptr) {
-          output << "\t\tparent->" << entry->m_parent->m_prefix.toUri();
-        }
-        else {
-          output << "\t\tROOT";
-        }
-        output << '\n';
-
-        if (!entry->m_children.empty()) {
-          output << "\t\tchildren = " << entry->m_children.size() << '\n';
-
-          for (size_t j = 0; j < entry->m_children.size(); ++j) {
-            output << "\t\t\tChild " << j << " " << entry->m_children[j]->getPrefix() << '\n';
-          }
-        }
-
-      }
-
-    }
-  }
-
-  output << "Bucket count = " << m_nBuckets << '\n';
-  output << "Stored item = " << m_nItems << '\n';
-  output << "--------------------------\n";
+  return {Iterator(make_shared<PartialEnumerationImpl>(*this, entrySubTreeSelector), entry.get()), end()};
 }
 
 } // namespace name_tree