table: NameTree optimization
This commit adds platform-specific hash function selection and hash
table shrink function.
Change-Id: Iaaa0e5b86842d4e0582d3523f0a6b6836e152239
Refs: #1305
diff --git a/daemon/table/name-tree-entry.cpp b/daemon/table/name-tree-entry.cpp
index a3e4880..0e4d821 100644
--- a/daemon/table/name-tree-entry.cpp
+++ b/daemon/table/name-tree-entry.cpp
@@ -29,7 +29,9 @@
namespace nfd {
namespace name_tree {
-Node::Node() : m_prev(0), m_next(0)
+Node::Node()
+ : m_prev(0)
+ , m_next(0)
{
}
@@ -54,44 +56,6 @@
}
void
-Entry::setHash(uint32_t hash)
-{
- m_hash = hash;
-}
-
-void
-Entry::setParent(shared_ptr<Entry> parent)
-{
- m_parent = parent;
-}
-
-void
-Entry::setFibEntry(shared_ptr<fib::Entry> fibEntry)
-{
- if (static_cast<bool>(fibEntry)) {
- BOOST_ASSERT(!static_cast<bool>(fibEntry->m_nameTreeEntry));
- }
-
- if (static_cast<bool>(m_fibEntry)) {
- m_fibEntry->m_nameTreeEntry.reset();
- }
- m_fibEntry = fibEntry;
- if (static_cast<bool>(m_fibEntry)) {
- m_fibEntry->m_nameTreeEntry = this->shared_from_this();
- }
-}
-
-void
-Entry::insertPitEntry(shared_ptr<pit::Entry> pitEntry)
-{
- BOOST_ASSERT(static_cast<bool>(pitEntry));
- BOOST_ASSERT(!static_cast<bool>(pitEntry->m_nameTreeEntry));
-
- m_pitEntries.push_back(pitEntry);
- pitEntry->m_nameTreeEntry = this->shared_from_this();
-}
-
-void
Entry::erasePitEntry(shared_ptr<pit::Entry> pitEntry)
{
BOOST_ASSERT(static_cast<bool>(pitEntry));
@@ -106,37 +70,5 @@
pitEntry->m_nameTreeEntry.reset();
}
-void
-Entry::setMeasurementsEntry(shared_ptr<measurements::Entry> measurementsEntry)
-{
- if (static_cast<bool>(measurementsEntry)) {
- BOOST_ASSERT(!static_cast<bool>(measurementsEntry->m_nameTreeEntry));
- }
-
- if (static_cast<bool>(m_measurementsEntry)) {
- m_measurementsEntry->m_nameTreeEntry.reset();
- }
- m_measurementsEntry = measurementsEntry;
- if (static_cast<bool>(m_measurementsEntry)) {
- m_measurementsEntry->m_nameTreeEntry = this->shared_from_this();
- }
-}
-
-void
-Entry::setStrategyChoiceEntry(shared_ptr<strategy_choice::Entry> strategyChoiceEntry)
-{
- if (static_cast<bool>(strategyChoiceEntry)) {
- BOOST_ASSERT(!static_cast<bool>(strategyChoiceEntry->m_nameTreeEntry));
- }
-
- if (static_cast<bool>(m_strategyChoiceEntry)) {
- m_strategyChoiceEntry->m_nameTreeEntry.reset();
- }
- m_strategyChoiceEntry = strategyChoiceEntry;
- if (static_cast<bool>(m_strategyChoiceEntry)) {
- m_strategyChoiceEntry->m_nameTreeEntry = this->shared_from_this();
- }
-}
-
} // namespace name_tree
} // namespace nfd
diff --git a/daemon/table/name-tree-entry.hpp b/daemon/table/name-tree-entry.hpp
index 0a0e05e..ab2e307 100644
--- a/daemon/table/name-tree-entry.hpp
+++ b/daemon/table/name-tree-entry.hpp
@@ -75,9 +75,9 @@
getPrefix() const;
void
- setHash(uint32_t hash);
+ setHash(size_t hash);
- uint32_t
+ size_t
getHash() const;
void
@@ -126,7 +126,10 @@
getStrategyChoiceEntry() const;
private:
- uint32_t m_hash;
+ // Benefits of storing m_hash
+ // 1. m_hash is compared before m_prefix is compared
+ // 2. fast hash table resize support
+ size_t m_hash;
Name m_prefix;
shared_ptr<Entry> m_parent; // Pointing to the parent entry.
std::vector<shared_ptr<Entry> > m_children; // Children pointers.
@@ -137,6 +140,7 @@
// get the Name Tree Node that is associated with this Name Tree Entry
Node* m_node;
+
// Make private members accessible by Name Tree
friend class nfd::NameTree;
};
@@ -147,18 +151,30 @@
return m_prefix;
}
-inline uint32_t
+inline size_t
Entry::getHash() const
{
return m_hash;
}
+inline void
+Entry::setHash(size_t hash)
+{
+ m_hash = hash;
+}
+
inline shared_ptr<Entry>
Entry::getParent() const
{
return m_parent;
}
+inline void
+Entry::setParent(shared_ptr<Entry> parent)
+{
+ m_parent = parent;
+}
+
inline std::vector<shared_ptr<name_tree::Entry> >&
Entry::getChildren()
{
@@ -186,6 +202,22 @@
return m_fibEntry;
}
+inline void
+Entry::setFibEntry(shared_ptr<fib::Entry> fibEntry)
+{
+ if (static_cast<bool>(fibEntry)) {
+ BOOST_ASSERT(!static_cast<bool>(fibEntry->m_nameTreeEntry));
+ }
+
+ if (static_cast<bool>(m_fibEntry)) {
+ m_fibEntry->m_nameTreeEntry.reset();
+ }
+ m_fibEntry = fibEntry;
+ if (static_cast<bool>(m_fibEntry)) {
+ m_fibEntry->m_nameTreeEntry = this->shared_from_this();
+ }
+}
+
inline bool
Entry::hasPitEntries() const
{
@@ -198,18 +230,60 @@
return m_pitEntries;
}
+inline void
+Entry::insertPitEntry(shared_ptr<pit::Entry> pitEntry)
+{
+ BOOST_ASSERT(static_cast<bool>(pitEntry));
+ BOOST_ASSERT(!static_cast<bool>(pitEntry->m_nameTreeEntry));
+
+ m_pitEntries.push_back(pitEntry);
+ pitEntry->m_nameTreeEntry = this->shared_from_this();
+}
+
inline shared_ptr<measurements::Entry>
Entry::getMeasurementsEntry() const
{
return m_measurementsEntry;
}
+inline void
+Entry::setMeasurementsEntry(shared_ptr<measurements::Entry> measurementsEntry)
+{
+ if (static_cast<bool>(measurementsEntry)) {
+ BOOST_ASSERT(!static_cast<bool>(measurementsEntry->m_nameTreeEntry));
+ }
+
+ if (static_cast<bool>(m_measurementsEntry)) {
+ m_measurementsEntry->m_nameTreeEntry.reset();
+ }
+ m_measurementsEntry = measurementsEntry;
+ if (static_cast<bool>(m_measurementsEntry)) {
+ m_measurementsEntry->m_nameTreeEntry = this->shared_from_this();
+ }
+}
+
inline shared_ptr<strategy_choice::Entry>
Entry::getStrategyChoiceEntry() const
{
return m_strategyChoiceEntry;
}
+inline void
+Entry::setStrategyChoiceEntry(shared_ptr<strategy_choice::Entry> strategyChoiceEntry)
+{
+ if (static_cast<bool>(strategyChoiceEntry)) {
+ BOOST_ASSERT(!static_cast<bool>(strategyChoiceEntry->m_nameTreeEntry));
+ }
+
+ if (static_cast<bool>(m_strategyChoiceEntry)) {
+ m_strategyChoiceEntry->m_nameTreeEntry.reset();
+ }
+ m_strategyChoiceEntry = strategyChoiceEntry;
+ if (static_cast<bool>(m_strategyChoiceEntry)) {
+ m_strategyChoiceEntry->m_nameTreeEntry = this->shared_from_this();
+ }
+}
+
} // namespace name_tree
} // namespace nfd
diff --git a/daemon/table/name-tree.cpp b/daemon/table/name-tree.cpp
index 101014a..b42b240 100644
--- a/daemon/table/name-tree.cpp
+++ b/daemon/table/name-tree.cpp
@@ -26,8 +26,7 @@
#include "name-tree.hpp"
#include "core/logger.hpp"
-
-#include <boost/functional/hash.hpp>
+#include "core/city-hash.hpp"
namespace nfd {
@@ -35,20 +34,67 @@
namespace name_tree {
-// Interface of different hash functions
-uint32_t
-hashName(const Name& prefix)
+class Hash32
{
- // fixed value. Used for debugging only.
- uint32_t ret = 0;
+public:
+ static size_t
+ compute(const char* buffer, size_t length)
+ {
+ return static_cast<size_t>(CityHash32(buffer, length));
+ }
+};
- // Boost hash
- // requires the /boost/functional/hash.hpp header file
- // TODO: improve hash efficiency with Name type
- boost::hash<std::string> stringHash;
- ret = stringHash(prefix.toUri());
+class Hash64
+{
+public:
+ static size_t
+ compute(const char* buffer, size_t length)
+ {
+ return static_cast<size_t>(CityHash64(buffer, length));
+ }
+};
- return ret;
+typedef boost::mpl::if_c<sizeof(size_t) >= 8, Hash64, Hash32>::type CityHash;
+
+// Interface of different hash functions
+size_t
+computeHash(const Name& prefix)
+{
+ prefix.wireEncode(); // guarantees prefix's wire buffer is not empty
+
+ size_t hashValue = 0;
+ size_t hashUpdate = 0;
+
+ for (Name::const_iterator it = prefix.begin(); it != prefix.end(); it++)
+ {
+ const char* wireFormat = reinterpret_cast<const char*>( it->wire() );
+ hashUpdate = CityHash::compute(wireFormat, it->size());
+ hashValue ^= hashUpdate;
+ }
+
+ return hashValue;
+}
+
+std::vector<size_t>
+computeHashSet(const Name& prefix)
+{
+ prefix.wireEncode(); // guarantees prefix's wire buffer is not empty
+
+ size_t hashValue = 0;
+ size_t hashUpdate = 0;
+
+ std::vector<size_t> hashValueSet;
+ hashValueSet.push_back(hashValue);
+
+ for (Name::const_iterator it = prefix.begin(); it != prefix.end(); it++)
+ {
+ const char* wireFormat = reinterpret_cast<const char*>( it->wire() );
+ hashUpdate = CityHash::compute(wireFormat, it->size());
+ hashValue ^= hashUpdate;
+ hashValueSet.push_back(hashValue);
+ }
+
+ return hashValueSet;
}
} // namespace name_tree
@@ -56,11 +102,17 @@
NameTree::NameTree(size_t nBuckets)
: m_nItems(0)
, m_nBuckets(nBuckets)
- , m_loadFactor(0.5)
- , m_resizeFactor(2)
+ , m_minNBuckets(nBuckets)
+ , m_enlargeLoadFactor(0.5) // more than 50% buckets loaded
+ , m_enlargeFactor(2) // double the hash table size
+ , m_shrinkLoadFactor(0.1) // less than 10% buckets loaded
+ , m_shrinkFactor(0.5) // reduce the number of buckets by half
, m_endIterator(FULL_ENUMERATE_TYPE, *this, m_end)
{
- m_resizeThreshold = static_cast<size_t>(m_loadFactor *
+ m_enlargeThreshold = static_cast<size_t>(m_enlargeLoadFactor *
+ static_cast<double>(m_nBuckets));
+
+ m_shrinkThreshold = static_cast<size_t>(m_shrinkLoadFactor *
static_cast<double>(m_nBuckets));
// array of node pointers
@@ -74,8 +126,9 @@
{
for (size_t i = 0; i < m_nBuckets; i++)
{
- if (m_buckets[i] != 0)
+ if (m_buckets[i] != 0) {
delete m_buckets[i];
+ }
}
delete [] m_buckets;
@@ -87,8 +140,8 @@
{
NFD_LOG_TRACE("insert " << prefix);
- uint32_t hashValue = name_tree::hashName(prefix);
- uint32_t loc = hashValue % m_nBuckets;
+ size_t hashValue = name_tree::computeHash(prefix);
+ size_t loc = hashValue % m_nBuckets;
NFD_LOG_TRACE("Name " << prefix << " hash value = " << hashValue << " location = " << loc);
@@ -152,7 +205,7 @@
if (ret.second == true)
{
- m_nItems++; /* Increase the counter */
+ m_nItems++; // Increase the counter
entry->m_parent = parent;
if (static_cast<bool>(parent))
@@ -161,9 +214,9 @@
}
}
- if (m_nItems > m_resizeThreshold)
+ if (m_nItems > m_enlargeThreshold)
{
- resize(m_resizeFactor * m_nBuckets);
+ resize(m_enlargeFactor * m_nBuckets);
}
parent = entry;
@@ -177,8 +230,8 @@
{
NFD_LOG_TRACE("findExactMatch " << prefix);
- uint32_t hashValue = name_tree::hashName(prefix);
- uint32_t loc = hashValue % m_nBuckets;
+ size_t hashValue = name_tree::computeHash(prefix);
+ size_t loc = hashValue % m_nBuckets;
NFD_LOG_TRACE("Name " << prefix << " hash value = " << hashValue <<
" location = " << loc);
@@ -204,23 +257,42 @@
}
// Longest Prefix Match
-// Return the longest matching Entry address
-// start from the full name, and then remove 1 name comp each time
shared_ptr<name_tree::Entry>
NameTree::findLongestPrefixMatch(const Name& prefix, const name_tree::EntrySelector& entrySelector) const
{
NFD_LOG_TRACE("findLongestPrefixMatch " << prefix);
shared_ptr<name_tree::Entry> entry;
+ std::vector<size_t> hashValueSet = name_tree::computeHashSet(prefix);
- for (int i = prefix.size(); i >= 0; i--)
+ size_t hashValue = 0;
+ size_t loc = 0;
+
+ for (int i = static_cast<int>(prefix.size()); i >= 0; i--)
{
- entry = findExactMatch(prefix.getPrefix(i));
- if (static_cast<bool>(entry) && entrySelector(*entry))
- return entry;
+ hashValue = hashValueSet[i];
+ loc = hashValue % m_nBuckets;
+
+ name_tree::Node* node = 0;
+ for (node = m_buckets[loc]; node != 0; node = node->m_next)
+ {
+ entry = node->m_entry;
+ if (static_cast<bool>(entry))
+ {
+ // isPrefixOf() is used to avoid making a copy of the name
+ if (hashValue == entry->getHash() &&
+ entry->getPrefix().isPrefixOf(prefix) &&
+ entrySelector(*entry))
+ {
+ return entry;
+ }
+ } // if entry
+ } // for node
}
- return shared_ptr<name_tree::Entry>();
+ // if not found, the default value of entry (null pointer) will be returned
+ entry.reset();
+ return entry;
}
shared_ptr<name_tree::Entry>
@@ -301,6 +373,14 @@
if (static_cast<bool>(parent))
eraseEntryIfEmpty(parent);
+ size_t newNBuckets = static_cast<size_t>(m_shrinkFactor *
+ static_cast<double>(m_nBuckets));
+
+ if (newNBuckets >= m_minNBuckets && m_nItems < m_shrinkThreshold)
+ {
+ resize(newNBuckets);
+ }
+
return true;
} // if this entry is empty
@@ -434,10 +514,14 @@
name_tree::Node** oldBuckets = m_buckets;
m_buckets = newBuckets;
- delete oldBuckets;
+ delete [] oldBuckets;
m_nBuckets = newNBuckets;
- m_resizeThreshold = (int)(m_loadFactor * (double)m_nBuckets);
+
+ m_enlargeThreshold = static_cast<size_t>(m_enlargeLoadFactor *
+ static_cast<double>(m_nBuckets));
+ m_shrinkThreshold = static_cast<size_t>(m_shrinkLoadFactor *
+ static_cast<double>(m_nBuckets));
}
// For debugging
diff --git a/daemon/table/name-tree.hpp b/daemon/table/name-tree.hpp
index c210a76..4048137 100644
--- a/daemon/table/name-tree.hpp
+++ b/daemon/table/name-tree.hpp
@@ -34,14 +34,17 @@
namespace name_tree {
/**
- * \brief Compute the hash value of the given name prefix.
- * \todo 1) have another parameter that specifies the number of components
- * 2) optimize the hash function to return a list of have values for all
- * the (or a certain number of, like a batch operation) prefixes. 3) move
- * the hash-related code to a separate file in /core or ndn-cpp-dev lib.
+ * \brief Compute the hash value of the given name prefix's WIRE FORMAT
*/
-uint32_t
-hashName(const Name& prefix);
+size_t
+computeHash(const Name& prefix);
+
+/**
+ * \brief Incrementally compute hash values
+ * \return Return a vector of hash values, starting from the root prefix
+ */
+std::vector<size_t>
+computeHashSet(const Name& prefix);
/// a predicate to accept or reject an Entry in find operations
typedef function<bool (const Entry& entry)> EntrySelector;
@@ -80,7 +83,7 @@
class const_iterator;
explicit
- NameTree(size_t nBuckets);
+ NameTree(size_t nBuckets = 1024);
~NameTree();
@@ -254,9 +257,13 @@
private:
size_t m_nItems; // Number of items being stored
size_t m_nBuckets; // Number of hash buckets
- double m_loadFactor;
- size_t m_resizeThreshold;
- int m_resizeFactor;
+ size_t m_minNBuckets; // Minimum number of hash buckets
+ double m_enlargeLoadFactor;
+ size_t m_enlargeThreshold;
+ int m_enlargeFactor;
+ double m_shrinkLoadFactor;
+ size_t m_shrinkThreshold;
+ double m_shrinkFactor;
name_tree::Node** m_buckets; // Name Tree Buckets in the NPHT
shared_ptr<name_tree::Entry> m_end;
const_iterator m_endIterator;