table: NameTree optimization

This commit adds platform-specific hash function selection and hash
table shrink function.

Change-Id: Iaaa0e5b86842d4e0582d3523f0a6b6836e152239
Refs: #1305
diff --git a/daemon/table/name-tree-entry.cpp b/daemon/table/name-tree-entry.cpp
index a3e4880..0e4d821 100644
--- a/daemon/table/name-tree-entry.cpp
+++ b/daemon/table/name-tree-entry.cpp
@@ -29,7 +29,9 @@
 namespace nfd {
 namespace name_tree {
 
-Node::Node() : m_prev(0), m_next(0)
+Node::Node()
+  : m_prev(0)
+  , m_next(0)
 {
 }
 
@@ -54,44 +56,6 @@
 }
 
 void
-Entry::setHash(uint32_t hash)
-{
-  m_hash = hash;
-}
-
-void
-Entry::setParent(shared_ptr<Entry> parent)
-{
-  m_parent = parent;
-}
-
-void
-Entry::setFibEntry(shared_ptr<fib::Entry> fibEntry)
-{
-  if (static_cast<bool>(fibEntry)) {
-    BOOST_ASSERT(!static_cast<bool>(fibEntry->m_nameTreeEntry));
-  }
-
-  if (static_cast<bool>(m_fibEntry)) {
-    m_fibEntry->m_nameTreeEntry.reset();
-  }
-  m_fibEntry = fibEntry;
-  if (static_cast<bool>(m_fibEntry)) {
-    m_fibEntry->m_nameTreeEntry = this->shared_from_this();
-  }
-}
-
-void
-Entry::insertPitEntry(shared_ptr<pit::Entry> pitEntry)
-{
-  BOOST_ASSERT(static_cast<bool>(pitEntry));
-  BOOST_ASSERT(!static_cast<bool>(pitEntry->m_nameTreeEntry));
-
-  m_pitEntries.push_back(pitEntry);
-  pitEntry->m_nameTreeEntry = this->shared_from_this();
-}
-
-void
 Entry::erasePitEntry(shared_ptr<pit::Entry> pitEntry)
 {
   BOOST_ASSERT(static_cast<bool>(pitEntry));
@@ -106,37 +70,5 @@
   pitEntry->m_nameTreeEntry.reset();
 }
 
-void
-Entry::setMeasurementsEntry(shared_ptr<measurements::Entry> measurementsEntry)
-{
-  if (static_cast<bool>(measurementsEntry)) {
-    BOOST_ASSERT(!static_cast<bool>(measurementsEntry->m_nameTreeEntry));
-  }
-
-  if (static_cast<bool>(m_measurementsEntry)) {
-    m_measurementsEntry->m_nameTreeEntry.reset();
-  }
-  m_measurementsEntry = measurementsEntry;
-  if (static_cast<bool>(m_measurementsEntry)) {
-    m_measurementsEntry->m_nameTreeEntry = this->shared_from_this();
-  }
-}
-
-void
-Entry::setStrategyChoiceEntry(shared_ptr<strategy_choice::Entry> strategyChoiceEntry)
-{
-  if (static_cast<bool>(strategyChoiceEntry)) {
-    BOOST_ASSERT(!static_cast<bool>(strategyChoiceEntry->m_nameTreeEntry));
-  }
-
-  if (static_cast<bool>(m_strategyChoiceEntry)) {
-    m_strategyChoiceEntry->m_nameTreeEntry.reset();
-  }
-  m_strategyChoiceEntry = strategyChoiceEntry;
-  if (static_cast<bool>(m_strategyChoiceEntry)) {
-    m_strategyChoiceEntry->m_nameTreeEntry = this->shared_from_this();
-  }
-}
-
 } // namespace name_tree
 } // namespace nfd
diff --git a/daemon/table/name-tree-entry.hpp b/daemon/table/name-tree-entry.hpp
index 0a0e05e..ab2e307 100644
--- a/daemon/table/name-tree-entry.hpp
+++ b/daemon/table/name-tree-entry.hpp
@@ -75,9 +75,9 @@
   getPrefix() const;
 
   void
-  setHash(uint32_t hash);
+  setHash(size_t hash);
 
-  uint32_t
+  size_t
   getHash() const;
 
   void
@@ -126,7 +126,10 @@
   getStrategyChoiceEntry() const;
 
 private:
-  uint32_t m_hash;
+  // Benefits of storing m_hash
+  // 1. m_hash is compared before m_prefix is compared
+  // 2. fast hash table resize support
+  size_t m_hash;
   Name m_prefix;
   shared_ptr<Entry> m_parent;     // Pointing to the parent entry.
   std::vector<shared_ptr<Entry> > m_children; // Children pointers.
@@ -137,6 +140,7 @@
 
   // get the Name Tree Node that is associated with this Name Tree Entry
   Node* m_node;
+
   // Make private members accessible by Name Tree
   friend class nfd::NameTree;
 };
@@ -147,18 +151,30 @@
   return m_prefix;
 }
 
-inline uint32_t
+inline size_t
 Entry::getHash() const
 {
   return m_hash;
 }
 
+inline void
+Entry::setHash(size_t hash)
+{
+  m_hash = hash;
+}
+
 inline shared_ptr<Entry>
 Entry::getParent() const
 {
   return m_parent;
 }
 
+inline void
+Entry::setParent(shared_ptr<Entry> parent)
+{
+  m_parent = parent;
+}
+
 inline std::vector<shared_ptr<name_tree::Entry> >&
 Entry::getChildren()
 {
@@ -186,6 +202,22 @@
   return m_fibEntry;
 }
 
+inline void
+Entry::setFibEntry(shared_ptr<fib::Entry> fibEntry)
+{
+  if (static_cast<bool>(fibEntry)) {
+    BOOST_ASSERT(!static_cast<bool>(fibEntry->m_nameTreeEntry));
+  }
+
+  if (static_cast<bool>(m_fibEntry)) {
+    m_fibEntry->m_nameTreeEntry.reset();
+  }
+  m_fibEntry = fibEntry;
+  if (static_cast<bool>(m_fibEntry)) {
+    m_fibEntry->m_nameTreeEntry = this->shared_from_this();
+  }
+}
+
 inline bool
 Entry::hasPitEntries() const
 {
@@ -198,18 +230,60 @@
   return m_pitEntries;
 }
 
+inline void
+Entry::insertPitEntry(shared_ptr<pit::Entry> pitEntry)
+{
+  BOOST_ASSERT(static_cast<bool>(pitEntry));
+  BOOST_ASSERT(!static_cast<bool>(pitEntry->m_nameTreeEntry));
+
+  m_pitEntries.push_back(pitEntry);
+  pitEntry->m_nameTreeEntry = this->shared_from_this();
+}
+
 inline shared_ptr<measurements::Entry>
 Entry::getMeasurementsEntry() const
 {
   return m_measurementsEntry;
 }
 
+inline void
+Entry::setMeasurementsEntry(shared_ptr<measurements::Entry> measurementsEntry)
+{
+  if (static_cast<bool>(measurementsEntry)) {
+    BOOST_ASSERT(!static_cast<bool>(measurementsEntry->m_nameTreeEntry));
+  }
+
+  if (static_cast<bool>(m_measurementsEntry)) {
+    m_measurementsEntry->m_nameTreeEntry.reset();
+  }
+  m_measurementsEntry = measurementsEntry;
+  if (static_cast<bool>(m_measurementsEntry)) {
+    m_measurementsEntry->m_nameTreeEntry = this->shared_from_this();
+  }
+}
+
 inline shared_ptr<strategy_choice::Entry>
 Entry::getStrategyChoiceEntry() const
 {
   return m_strategyChoiceEntry;
 }
 
+inline void
+Entry::setStrategyChoiceEntry(shared_ptr<strategy_choice::Entry> strategyChoiceEntry)
+{
+  if (static_cast<bool>(strategyChoiceEntry)) {
+    BOOST_ASSERT(!static_cast<bool>(strategyChoiceEntry->m_nameTreeEntry));
+  }
+
+  if (static_cast<bool>(m_strategyChoiceEntry)) {
+    m_strategyChoiceEntry->m_nameTreeEntry.reset();
+  }
+  m_strategyChoiceEntry = strategyChoiceEntry;
+  if (static_cast<bool>(m_strategyChoiceEntry)) {
+    m_strategyChoiceEntry->m_nameTreeEntry = this->shared_from_this();
+  }
+}
+
 } // namespace name_tree
 } // namespace nfd
 
diff --git a/daemon/table/name-tree.cpp b/daemon/table/name-tree.cpp
index 101014a..b42b240 100644
--- a/daemon/table/name-tree.cpp
+++ b/daemon/table/name-tree.cpp
@@ -26,8 +26,7 @@
 
 #include "name-tree.hpp"
 #include "core/logger.hpp"
-
-#include <boost/functional/hash.hpp>
+#include "core/city-hash.hpp"
 
 namespace nfd {
 
@@ -35,20 +34,67 @@
 
 namespace name_tree {
 
-// Interface of different hash functions
-uint32_t
-hashName(const Name& prefix)
+class Hash32
 {
-  // fixed value. Used for debugging only.
-  uint32_t ret = 0;
+public:
+  static size_t
+  compute(const char* buffer, size_t length)
+  {
+    return static_cast<size_t>(CityHash32(buffer, length));
+  }
+};
 
-  // Boost hash
-  // requires the /boost/functional/hash.hpp header file
-  // TODO: improve hash efficiency with Name type
-  boost::hash<std::string> stringHash;
-  ret = stringHash(prefix.toUri());
+class Hash64
+{
+public:
+  static size_t
+  compute(const char* buffer, size_t length)
+  {
+    return static_cast<size_t>(CityHash64(buffer, length));
+  }
+};
 
-  return ret;
+typedef boost::mpl::if_c<sizeof(size_t) >= 8, Hash64, Hash32>::type CityHash;
+
+// Interface of different hash functions
+size_t
+computeHash(const Name& prefix)
+{
+  prefix.wireEncode();  // guarantees prefix's wire buffer is not empty
+
+  size_t hashValue = 0;
+  size_t hashUpdate = 0;
+
+  for (Name::const_iterator it = prefix.begin(); it != prefix.end(); it++)
+    {
+      const char* wireFormat = reinterpret_cast<const char*>( it->wire() );
+      hashUpdate = CityHash::compute(wireFormat, it->size());
+      hashValue ^= hashUpdate;
+    }
+
+  return hashValue;
+}
+
+std::vector<size_t>
+computeHashSet(const Name& prefix)
+{
+  prefix.wireEncode();  // guarantees prefix's wire buffer is not empty
+
+  size_t hashValue = 0;
+  size_t hashUpdate = 0;
+
+  std::vector<size_t> hashValueSet;
+  hashValueSet.push_back(hashValue);
+
+  for (Name::const_iterator it = prefix.begin(); it != prefix.end(); it++)
+    {
+      const char* wireFormat = reinterpret_cast<const char*>( it->wire() );
+      hashUpdate = CityHash::compute(wireFormat, it->size());
+      hashValue ^= hashUpdate;
+      hashValueSet.push_back(hashValue);
+    }
+
+  return hashValueSet;
 }
 
 } // namespace name_tree
@@ -56,11 +102,17 @@
 NameTree::NameTree(size_t nBuckets)
   : m_nItems(0)
   , m_nBuckets(nBuckets)
-  , m_loadFactor(0.5)
-  , m_resizeFactor(2)
+  , m_minNBuckets(nBuckets)
+  , m_enlargeLoadFactor(0.5)       // more than 50% buckets loaded
+  , m_enlargeFactor(2)       // double the hash table size
+  , m_shrinkLoadFactor(0.1) // less than 10% buckets loaded
+  , m_shrinkFactor(0.5)     // reduce the number of buckets by half
   , m_endIterator(FULL_ENUMERATE_TYPE, *this, m_end)
 {
-  m_resizeThreshold = static_cast<size_t>(m_loadFactor *
+  m_enlargeThreshold = static_cast<size_t>(m_enlargeLoadFactor *
+                                          static_cast<double>(m_nBuckets));
+
+  m_shrinkThreshold = static_cast<size_t>(m_shrinkLoadFactor *
                                           static_cast<double>(m_nBuckets));
 
   // array of node pointers
@@ -74,8 +126,9 @@
 {
   for (size_t i = 0; i < m_nBuckets; i++)
     {
-      if (m_buckets[i] != 0)
+      if (m_buckets[i] != 0) {
         delete m_buckets[i];
+      }
     }
 
   delete [] m_buckets;
@@ -87,8 +140,8 @@
 {
   NFD_LOG_TRACE("insert " << prefix);
 
-  uint32_t hashValue = name_tree::hashName(prefix);
-  uint32_t loc = hashValue % m_nBuckets;
+  size_t hashValue = name_tree::computeHash(prefix);
+  size_t loc = hashValue % m_nBuckets;
 
   NFD_LOG_TRACE("Name " << prefix << " hash value = " << hashValue << "  location = " << loc);
 
@@ -152,7 +205,7 @@
 
       if (ret.second == true)
         {
-          m_nItems++; /* Increase the counter */
+          m_nItems++; // Increase the counter
           entry->m_parent = parent;
 
           if (static_cast<bool>(parent))
@@ -161,9 +214,9 @@
             }
         }
 
-      if (m_nItems > m_resizeThreshold)
+      if (m_nItems > m_enlargeThreshold)
         {
-          resize(m_resizeFactor * m_nBuckets);
+          resize(m_enlargeFactor * m_nBuckets);
         }
 
       parent = entry;
@@ -177,8 +230,8 @@
 {
   NFD_LOG_TRACE("findExactMatch " << prefix);
 
-  uint32_t hashValue = name_tree::hashName(prefix);
-  uint32_t loc = hashValue % m_nBuckets;
+  size_t hashValue = name_tree::computeHash(prefix);
+  size_t loc = hashValue % m_nBuckets;
 
   NFD_LOG_TRACE("Name " << prefix << " hash value = " << hashValue <<
                 "  location = " << loc);
@@ -204,23 +257,42 @@
 }
 
 // Longest Prefix Match
-// Return the longest matching Entry address
-// start from the full name, and then remove 1 name comp each time
 shared_ptr<name_tree::Entry>
 NameTree::findLongestPrefixMatch(const Name& prefix, const name_tree::EntrySelector& entrySelector) const
 {
   NFD_LOG_TRACE("findLongestPrefixMatch " << prefix);
 
   shared_ptr<name_tree::Entry> entry;
+  std::vector<size_t> hashValueSet = name_tree::computeHashSet(prefix);
 
-  for (int i = prefix.size(); i >= 0; i--)
+  size_t hashValue = 0;
+  size_t loc = 0;
+
+  for (int i = static_cast<int>(prefix.size()); i >= 0; i--)
     {
-      entry = findExactMatch(prefix.getPrefix(i));
-      if (static_cast<bool>(entry) && entrySelector(*entry))
-        return entry;
+      hashValue = hashValueSet[i];
+      loc = hashValue % m_nBuckets;
+
+      name_tree::Node* node = 0;
+      for (node = m_buckets[loc]; node != 0; node = node->m_next)
+        {
+          entry = node->m_entry;
+          if (static_cast<bool>(entry))
+            {
+              // isPrefixOf() is used to avoid making a copy of the name
+              if (hashValue == entry->getHash() &&
+                  entry->getPrefix().isPrefixOf(prefix) &&
+                  entrySelector(*entry))
+                {
+                  return entry;
+                }
+            } // if entry
+        } // for node
     }
 
-  return shared_ptr<name_tree::Entry>();
+  // if not found, the default value of entry (null pointer) will be returned
+  entry.reset();
+  return entry;
 }
 
 shared_ptr<name_tree::Entry>
@@ -301,6 +373,14 @@
       if (static_cast<bool>(parent))
         eraseEntryIfEmpty(parent);
 
+      size_t newNBuckets = static_cast<size_t>(m_shrinkFactor *
+                                     static_cast<double>(m_nBuckets));
+
+      if (newNBuckets >= m_minNBuckets && m_nItems < m_shrinkThreshold)
+        {
+          resize(newNBuckets);
+        }
+
       return true;
 
     } // if this entry is empty
@@ -434,10 +514,14 @@
 
   name_tree::Node** oldBuckets = m_buckets;
   m_buckets = newBuckets;
-  delete oldBuckets;
+  delete [] oldBuckets;
 
   m_nBuckets = newNBuckets;
-  m_resizeThreshold = (int)(m_loadFactor * (double)m_nBuckets);
+
+  m_enlargeThreshold = static_cast<size_t>(m_enlargeLoadFactor *
+                                              static_cast<double>(m_nBuckets));
+  m_shrinkThreshold = static_cast<size_t>(m_shrinkLoadFactor *
+                                              static_cast<double>(m_nBuckets));
 }
 
 // For debugging
diff --git a/daemon/table/name-tree.hpp b/daemon/table/name-tree.hpp
index c210a76..4048137 100644
--- a/daemon/table/name-tree.hpp
+++ b/daemon/table/name-tree.hpp
@@ -34,14 +34,17 @@
 namespace name_tree {
 
 /**
- * \brief Compute the hash value of the given name prefix.
- * \todo 1) have another parameter that specifies the number of components
- * 2) optimize the hash function to return a list of have values for all
- * the (or a certain number of, like a batch operation) prefixes. 3) move
- * the hash-related code to a separate file in /core or ndn-cpp-dev lib.
+ * \brief Compute the hash value of the given name prefix's WIRE FORMAT
  */
-uint32_t
-hashName(const Name& prefix);
+size_t
+computeHash(const Name& prefix);
+
+/**
+ * \brief Incrementally compute hash values
+ * \return Return a vector of hash values, starting from the root prefix
+ */
+std::vector<size_t>
+computeHashSet(const Name& prefix);
 
 /// a predicate to accept or reject an Entry in find operations
 typedef function<bool (const Entry& entry)> EntrySelector;
@@ -80,7 +83,7 @@
   class const_iterator;
 
   explicit
-  NameTree(size_t nBuckets);
+  NameTree(size_t nBuckets = 1024);
 
   ~NameTree();
 
@@ -254,9 +257,13 @@
 private:
   size_t                        m_nItems;  // Number of items being stored
   size_t                        m_nBuckets; // Number of hash buckets
-  double                        m_loadFactor;
-  size_t                        m_resizeThreshold;
-  int                           m_resizeFactor;
+  size_t                        m_minNBuckets; // Minimum number of hash buckets
+  double                        m_enlargeLoadFactor;
+  size_t                        m_enlargeThreshold;
+  int                           m_enlargeFactor;
+  double                        m_shrinkLoadFactor;
+  size_t                        m_shrinkThreshold;
+  double                        m_shrinkFactor;
   name_tree::Node**             m_buckets; // Name Tree Buckets in the NPHT
   shared_ptr<name_tree::Entry>  m_end;
   const_iterator                m_endIterator;