table: NameTree optimization

This commit adds platform-specific hash function selection and hash
table shrink function.

Change-Id: Iaaa0e5b86842d4e0582d3523f0a6b6836e152239
Refs: #1305
diff --git a/daemon/table/name-tree.cpp b/daemon/table/name-tree.cpp
index 101014a..b42b240 100644
--- a/daemon/table/name-tree.cpp
+++ b/daemon/table/name-tree.cpp
@@ -26,8 +26,7 @@
 
 #include "name-tree.hpp"
 #include "core/logger.hpp"
-
-#include <boost/functional/hash.hpp>
+#include "core/city-hash.hpp"
 
 namespace nfd {
 
@@ -35,20 +34,67 @@
 
 namespace name_tree {
 
-// Interface of different hash functions
-uint32_t
-hashName(const Name& prefix)
+class Hash32
 {
-  // fixed value. Used for debugging only.
-  uint32_t ret = 0;
+public:
+  static size_t
+  compute(const char* buffer, size_t length)
+  {
+    return static_cast<size_t>(CityHash32(buffer, length));
+  }
+};
 
-  // Boost hash
-  // requires the /boost/functional/hash.hpp header file
-  // TODO: improve hash efficiency with Name type
-  boost::hash<std::string> stringHash;
-  ret = stringHash(prefix.toUri());
+class Hash64
+{
+public:
+  static size_t
+  compute(const char* buffer, size_t length)
+  {
+    return static_cast<size_t>(CityHash64(buffer, length));
+  }
+};
 
-  return ret;
+typedef boost::mpl::if_c<sizeof(size_t) >= 8, Hash64, Hash32>::type CityHash;
+
+// Interface of different hash functions
+size_t
+computeHash(const Name& prefix)
+{
+  prefix.wireEncode();  // guarantees prefix's wire buffer is not empty
+
+  size_t hashValue = 0;
+  size_t hashUpdate = 0;
+
+  for (Name::const_iterator it = prefix.begin(); it != prefix.end(); it++)
+    {
+      const char* wireFormat = reinterpret_cast<const char*>( it->wire() );
+      hashUpdate = CityHash::compute(wireFormat, it->size());
+      hashValue ^= hashUpdate;
+    }
+
+  return hashValue;
+}
+
+std::vector<size_t>
+computeHashSet(const Name& prefix)
+{
+  prefix.wireEncode();  // guarantees prefix's wire buffer is not empty
+
+  size_t hashValue = 0;
+  size_t hashUpdate = 0;
+
+  std::vector<size_t> hashValueSet;
+  hashValueSet.push_back(hashValue);
+
+  for (Name::const_iterator it = prefix.begin(); it != prefix.end(); it++)
+    {
+      const char* wireFormat = reinterpret_cast<const char*>( it->wire() );
+      hashUpdate = CityHash::compute(wireFormat, it->size());
+      hashValue ^= hashUpdate;
+      hashValueSet.push_back(hashValue);
+    }
+
+  return hashValueSet;
 }
 
 } // namespace name_tree
@@ -56,11 +102,17 @@
 NameTree::NameTree(size_t nBuckets)
   : m_nItems(0)
   , m_nBuckets(nBuckets)
-  , m_loadFactor(0.5)
-  , m_resizeFactor(2)
+  , m_minNBuckets(nBuckets)
+  , m_enlargeLoadFactor(0.5)       // more than 50% buckets loaded
+  , m_enlargeFactor(2)       // double the hash table size
+  , m_shrinkLoadFactor(0.1) // less than 10% buckets loaded
+  , m_shrinkFactor(0.5)     // reduce the number of buckets by half
   , m_endIterator(FULL_ENUMERATE_TYPE, *this, m_end)
 {
-  m_resizeThreshold = static_cast<size_t>(m_loadFactor *
+  m_enlargeThreshold = static_cast<size_t>(m_enlargeLoadFactor *
+                                          static_cast<double>(m_nBuckets));
+
+  m_shrinkThreshold = static_cast<size_t>(m_shrinkLoadFactor *
                                           static_cast<double>(m_nBuckets));
 
   // array of node pointers
@@ -74,8 +126,9 @@
 {
   for (size_t i = 0; i < m_nBuckets; i++)
     {
-      if (m_buckets[i] != 0)
+      if (m_buckets[i] != 0) {
         delete m_buckets[i];
+      }
     }
 
   delete [] m_buckets;
@@ -87,8 +140,8 @@
 {
   NFD_LOG_TRACE("insert " << prefix);
 
-  uint32_t hashValue = name_tree::hashName(prefix);
-  uint32_t loc = hashValue % m_nBuckets;
+  size_t hashValue = name_tree::computeHash(prefix);
+  size_t loc = hashValue % m_nBuckets;
 
   NFD_LOG_TRACE("Name " << prefix << " hash value = " << hashValue << "  location = " << loc);
 
@@ -152,7 +205,7 @@
 
       if (ret.second == true)
         {
-          m_nItems++; /* Increase the counter */
+          m_nItems++; // Increase the counter
           entry->m_parent = parent;
 
           if (static_cast<bool>(parent))
@@ -161,9 +214,9 @@
             }
         }
 
-      if (m_nItems > m_resizeThreshold)
+      if (m_nItems > m_enlargeThreshold)
         {
-          resize(m_resizeFactor * m_nBuckets);
+          resize(m_enlargeFactor * m_nBuckets);
         }
 
       parent = entry;
@@ -177,8 +230,8 @@
 {
   NFD_LOG_TRACE("findExactMatch " << prefix);
 
-  uint32_t hashValue = name_tree::hashName(prefix);
-  uint32_t loc = hashValue % m_nBuckets;
+  size_t hashValue = name_tree::computeHash(prefix);
+  size_t loc = hashValue % m_nBuckets;
 
   NFD_LOG_TRACE("Name " << prefix << " hash value = " << hashValue <<
                 "  location = " << loc);
@@ -204,23 +257,42 @@
 }
 
 // Longest Prefix Match
-// Return the longest matching Entry address
-// start from the full name, and then remove 1 name comp each time
 shared_ptr<name_tree::Entry>
 NameTree::findLongestPrefixMatch(const Name& prefix, const name_tree::EntrySelector& entrySelector) const
 {
   NFD_LOG_TRACE("findLongestPrefixMatch " << prefix);
 
   shared_ptr<name_tree::Entry> entry;
+  std::vector<size_t> hashValueSet = name_tree::computeHashSet(prefix);
 
-  for (int i = prefix.size(); i >= 0; i--)
+  size_t hashValue = 0;
+  size_t loc = 0;
+
+  for (int i = static_cast<int>(prefix.size()); i >= 0; i--)
     {
-      entry = findExactMatch(prefix.getPrefix(i));
-      if (static_cast<bool>(entry) && entrySelector(*entry))
-        return entry;
+      hashValue = hashValueSet[i];
+      loc = hashValue % m_nBuckets;
+
+      name_tree::Node* node = 0;
+      for (node = m_buckets[loc]; node != 0; node = node->m_next)
+        {
+          entry = node->m_entry;
+          if (static_cast<bool>(entry))
+            {
+              // isPrefixOf() is used to avoid making a copy of the name
+              if (hashValue == entry->getHash() &&
+                  entry->getPrefix().isPrefixOf(prefix) &&
+                  entrySelector(*entry))
+                {
+                  return entry;
+                }
+            } // if entry
+        } // for node
     }
 
-  return shared_ptr<name_tree::Entry>();
+  // if not found, the default value of entry (null pointer) will be returned
+  entry.reset();
+  return entry;
 }
 
 shared_ptr<name_tree::Entry>
@@ -301,6 +373,14 @@
       if (static_cast<bool>(parent))
         eraseEntryIfEmpty(parent);
 
+      size_t newNBuckets = static_cast<size_t>(m_shrinkFactor *
+                                     static_cast<double>(m_nBuckets));
+
+      if (newNBuckets >= m_minNBuckets && m_nItems < m_shrinkThreshold)
+        {
+          resize(newNBuckets);
+        }
+
       return true;
 
     } // if this entry is empty
@@ -434,10 +514,14 @@
 
   name_tree::Node** oldBuckets = m_buckets;
   m_buckets = newBuckets;
-  delete oldBuckets;
+  delete [] oldBuckets;
 
   m_nBuckets = newNBuckets;
-  m_resizeThreshold = (int)(m_loadFactor * (double)m_nBuckets);
+
+  m_enlargeThreshold = static_cast<size_t>(m_enlargeLoadFactor *
+                                              static_cast<double>(m_nBuckets));
+  m_shrinkThreshold = static_cast<size_t>(m_shrinkLoadFactor *
+                                              static_cast<double>(m_nBuckets));
 }
 
 // For debugging