table: refactor NameTree hashtable
refs #3687
Change-Id: I908c1d7e67da861fc86a756fc0bc69b99ee696f7
diff --git a/daemon/table/name-tree.cpp b/daemon/table/name-tree.cpp
index 09b57f7..bbe5784 100644
--- a/daemon/table/name-tree.cpp
+++ b/daemon/table/name-tree.cpp
@@ -25,7 +25,6 @@
#include "name-tree.hpp"
#include "core/logger.hpp"
-#include "core/city-hash.hpp"
#include <boost/concept/assert.hpp>
#include <boost/concept_check.hpp>
@@ -36,190 +35,30 @@
NFD_LOG_INIT("NameTree");
-// http://en.cppreference.com/w/cpp/concept/ForwardIterator
-BOOST_CONCEPT_ASSERT((boost::ForwardIterator<NameTree::const_iterator>));
-// boost::ForwardIterator follows SGI standard http://www.sgi.com/tech/stl/ForwardIterator.html,
-// which doesn't require DefaultConstructible
-#ifdef HAVE_IS_DEFAULT_CONSTRUCTIBLE
-static_assert(std::is_default_constructible<NameTree::const_iterator>::value,
- "NameTree::const_iterator must be default-constructible");
-#else
-BOOST_CONCEPT_ASSERT((boost::DefaultConstructible<NameTree::const_iterator>));
-#endif // HAVE_IS_DEFAULT_CONSTRUCTIBLE
-
-class Hash32
-{
-public:
- static size_t
- compute(const char* buffer, size_t length)
- {
- return static_cast<size_t>(CityHash32(buffer, length));
- }
-};
-
-class Hash64
-{
-public:
- static size_t
- compute(const char* buffer, size_t length)
- {
- return static_cast<size_t>(CityHash64(buffer, length));
- }
-};
-
-/// @cond NoDocumentation
-typedef boost::mpl::if_c<sizeof(size_t) >= 8, Hash64, Hash32>::type CityHash;
-/// @endcond
-
-// Interface of different hash functions
-size_t
-computeHash(const Name& prefix)
-{
- prefix.wireEncode(); // guarantees prefix's wire buffer is not empty
-
- size_t hashValue = 0;
- size_t hashUpdate = 0;
-
- for (Name::const_iterator it = prefix.begin(); it != prefix.end(); ++it) {
- const char* wireFormat = reinterpret_cast<const char*>( it->wire() );
- hashUpdate = CityHash::compute(wireFormat, it->size());
- hashValue ^= hashUpdate;
- }
-
- return hashValue;
-}
-
-std::vector<size_t>
-computeHashSet(const Name& prefix)
-{
- prefix.wireEncode(); // guarantees prefix's wire buffer is not empty
-
- size_t hashValue = 0;
- size_t hashUpdate = 0;
-
- std::vector<size_t> hashValueSet;
- hashValueSet.push_back(hashValue);
-
- for (Name::const_iterator it = prefix.begin(); it != prefix.end(); ++it) {
- const char* wireFormat = reinterpret_cast<const char*>( it->wire() );
- hashUpdate = CityHash::compute(wireFormat, it->size());
- hashValue ^= hashUpdate;
- hashValueSet.push_back(hashValue);
- }
-
- return hashValueSet;
-}
-
NameTree::NameTree(size_t nBuckets)
- : m_nItems(0)
- , m_nBuckets(nBuckets)
- , m_minNBuckets(nBuckets)
- , m_enlargeLoadFactor(0.5) // more than 50% buckets loaded
- , m_enlargeFactor(2) // double the hash table size
- , m_shrinkLoadFactor(0.1) // less than 10% buckets loaded
- , m_shrinkFactor(0.5) // reduce the number of buckets by half
+ : m_ht(HashtableOptions(nBuckets))
{
- m_enlargeThreshold = static_cast<size_t>(m_enlargeLoadFactor * static_cast<double>(m_nBuckets));
- m_shrinkThreshold = static_cast<size_t>(m_shrinkLoadFactor * static_cast<double>(m_nBuckets));
-
- // array of node pointers
- m_buckets = new Node*[m_nBuckets];
- // Initialize the pointer array
- for (size_t i = 0; i < m_nBuckets; ++i) {
- m_buckets[i] = nullptr;
- }
}
-NameTree::~NameTree()
-{
- for (size_t i = 0; i < m_nBuckets; ++i) {
- if (m_buckets[i] != nullptr) {
- delete m_buckets[i];
- }
- }
-
- delete[] m_buckets;
-}
-
-// insert() is a private function, and called by only lookup()
-std::pair<shared_ptr<Entry>, bool>
-NameTree::insert(const Name& prefix)
-{
- NFD_LOG_TRACE("insert " << prefix);
-
- size_t hashValue = computeHash(prefix);
- size_t loc = hashValue % m_nBuckets;
-
- NFD_LOG_TRACE("Name " << prefix << " hash value = " << hashValue << " location = " << loc);
-
- // Check if this Name has been stored
- Node* node = m_buckets[loc];
- Node* nodePrev = node;
-
- for (node = m_buckets[loc]; node != nullptr; node = node->m_next) {
- if (node->m_entry != nullptr) {
- if (prefix == node->m_entry->m_prefix) {
- return {node->m_entry, false}; // false: old entry
- }
- }
- nodePrev = node;
- }
-
- NFD_LOG_TRACE("Did not find " << prefix << ", need to insert it to the table");
-
- // If no bucket is empty occupied, we need to create a new node, and it is
- // linked from nodePrev
- node = new Node();
- node->m_prev = nodePrev;
-
- if (nodePrev == nullptr) {
- m_buckets[loc] = node;
- }
- else{
- nodePrev->m_next = node;
- }
-
- // Create a new Entry
- auto entry = make_shared<Entry>(prefix);
- entry->setHash(hashValue);
- node->m_entry = entry; // link the Entry to its Node
- entry->m_node = node; // link the node to Entry. Used in eraseEntryIfEmpty.
-
- return {entry, true}; // true: new entry
-}
-
-// Name Prefix Lookup. Create Name Tree Entry if not found
shared_ptr<Entry>
-NameTree::lookup(const Name& prefix)
+NameTree::lookup(const Name& name)
{
- NFD_LOG_TRACE("lookup " << prefix);
+ NFD_LOG_TRACE("lookup " << name);
- shared_ptr<Entry> entry;
- shared_ptr<Entry> parent;
+ HashSequence hashes = computeHashes(name);
+ const Node* node = nullptr;
+ Entry* parent = nullptr;
- for (size_t i = 0; i <= prefix.size(); ++i) {
- Name temp = prefix.getPrefix(i);
-
- // insert() will create the entry if it does not exist.
+ for (size_t prefixLen = 0; prefixLen <= name.size(); ++prefixLen) {
bool isNew = false;
- std::tie(entry, isNew) = insert(temp);
+ std::tie(node, isNew) = m_ht.insert(name, prefixLen, hashes);
- if (isNew) {
- ++m_nItems; // Increase the counter
- entry->m_parent = parent;
-
- if (parent != nullptr) {
- parent->m_children.push_back(entry);
- }
+ if (isNew && parent != nullptr) {
+ node->entry->setParent(*parent);
}
-
- if (m_nItems > m_enlargeThreshold) {
- resize(m_enlargeFactor * m_nBuckets);
- }
-
- parent = entry;
+ parent = node->entry.get();
}
- return entry;
+ return node->entry;
}
shared_ptr<Entry>
@@ -263,132 +102,46 @@
return nte;
}
-// return {false: this entry is not empty, true: this entry is empty and erased}
-bool
-NameTree::eraseEntryIfEmpty(shared_ptr<Entry> entry)
+size_t
+NameTree::eraseEntryIfEmpty(Entry* entry)
{
BOOST_ASSERT(entry != nullptr);
- NFD_LOG_TRACE("eraseEntryIfEmpty " << entry->getPrefix());
-
- // first check if this Entry can be erased
- if (entry->isEmpty()) {
- // update child-related info in the parent
- shared_ptr<Entry> parent = entry->getParent();
+ size_t nErased = 0;
+ for (Entry* parent = nullptr; entry != nullptr && entry->isEmpty(); entry = parent) {
+ parent = entry->getParent();
if (parent != nullptr) {
- std::vector<shared_ptr<Entry>>& parentChildrenList = parent->getChildren();
-
- bool isFound = false;
- size_t size = parentChildrenList.size();
- for (size_t i = 0; i < size; ++i) {
- if (parentChildrenList[i] == entry) {
- parentChildrenList[i] = parentChildrenList[size - 1];
- parentChildrenList.pop_back();
- isFound = true;
- break;
- }
- }
-
- BOOST_VERIFY(isFound == true);
+ entry->unsetParent();
}
- // remove this Entry and its Name Tree Node
- Node* node = entry->m_node;
- Node* nodePrev = node->m_prev;
-
- // configure the previous node
- if (nodePrev != nullptr) {
- // link the previous node to the next node
- nodePrev->m_next = node->m_next;
- }
- else {
- m_buckets[entry->getHash() % m_nBuckets] = node->m_next;
- }
-
- // link the previous node with the next node (skip the erased one)
- if (node->m_next != nullptr) {
- node->m_next->m_prev = nodePrev;
- node->m_next = 0;
- }
-
- BOOST_ASSERT(node->m_next == nullptr);
-
- --m_nItems;
- delete node;
-
- if (parent != nullptr) {
- eraseEntryIfEmpty(parent);
- }
-
- size_t newNBuckets = static_cast<size_t>(m_shrinkFactor * static_cast<double>(m_nBuckets));
-
- if (newNBuckets >= m_minNBuckets && m_nItems < m_shrinkThreshold) {
- resize(newNBuckets);
- }
-
- return true;
-
+ m_ht.erase(getNode(*entry));
+ ++nErased;
}
- return false;
+ if (nErased == 0) {
+ NFD_LOG_TRACE("not-erase " << entry->getName());
+ }
+ return nErased;
}
-// Exact Match
shared_ptr<Entry>
-NameTree::findExactMatch(const Name& prefix) const
+NameTree::findExactMatch(const Name& name) const
{
- NFD_LOG_TRACE("findExactMatch " << prefix);
-
- size_t hashValue = computeHash(prefix);
- size_t loc = hashValue % m_nBuckets;
-
- NFD_LOG_TRACE("Name " << prefix << " hash value = " << hashValue << " location = " << loc);
-
- shared_ptr<Entry> entry;
- Node* node = nullptr;
-
- for (node = m_buckets[loc]; node != nullptr; node = node->m_next) {
- entry = node->m_entry;
- if (entry != nullptr) {
- if (hashValue == entry->getHash() && prefix == entry->getPrefix()) {
- return entry;
- }
- }
- }
-
- // if not found, the default value of entry (null pointer) will be returned
- entry.reset();
- return entry;
+ const Node* node = m_ht.find(name, name.size());
+ return node == nullptr ? nullptr : node->entry;
}
// Longest Prefix Match
shared_ptr<Entry>
-NameTree::findLongestPrefixMatch(const Name& prefix, const EntrySelector& entrySelector) const
+NameTree::findLongestPrefixMatch(const Name& name, const EntrySelector& entrySelector) const
{
- NFD_LOG_TRACE("findLongestPrefixMatch " << prefix);
+ HashSequence hashes = computeHashes(name);
- shared_ptr<Entry> entry;
- std::vector<size_t> hashValueSet = computeHashSet(prefix);
-
- size_t hashValue = 0;
- size_t loc = 0;
-
- for (int i = static_cast<int>(prefix.size()); i >= 0; --i) {
- hashValue = hashValueSet[i];
- loc = hashValue % m_nBuckets;
-
- Node* node = nullptr;
- for (node = m_buckets[loc]; node != nullptr; node = node->m_next) {
- entry = node->m_entry;
- if (entry != nullptr) {
- // isPrefixOf() is used to avoid making a copy of the name
- if (hashValue == entry->getHash() &&
- entry->getPrefix().isPrefixOf(prefix) &&
- entrySelector(*entry)) {
- return entry;
- }
- }
+ for (ssize_t prefixLen = name.size(); prefixLen >= 0; --prefixLen) {
+ const Node* node = m_ht.find(name, prefixLen, hashes);
+ if (node != nullptr && entrySelector(*node->entry)) {
+ return node->entry;
}
}
@@ -396,12 +149,13 @@
}
shared_ptr<Entry>
-NameTree::findLongestPrefixMatch(shared_ptr<Entry> entry,
+NameTree::findLongestPrefixMatch(shared_ptr<Entry> entry1,
const EntrySelector& entrySelector) const
{
+ Entry* entry = entry1.get();
while (entry != nullptr) {
if (entrySelector(*entry)) {
- return entry;
+ return entry->shared_from_this();
}
entry = entry->getParent();
}
@@ -436,7 +190,7 @@
// trie from the root node.
shared_ptr<Entry> entry = findLongestPrefixMatch(prefix, entrySelector);
- return {Iterator(make_shared<PrefixMatchImpl>(*this, entrySelector), entry), end()};
+ return {Iterator(make_shared<PrefixMatchImpl>(*this, entrySelector), entry.get()), end()};
}
boost::iterator_range<NameTree::const_iterator>
@@ -453,98 +207,7 @@
{
// the first step is to process the root node
shared_ptr<Entry> entry = findExactMatch(prefix);
- return {Iterator(make_shared<PartialEnumerationImpl>(*this, entrySubTreeSelector), entry), end()};
-}
-
-// Hash Table Resize
-void
-NameTree::resize(size_t newNBuckets)
-{
- NFD_LOG_TRACE("resize");
-
- Node** newBuckets = new Node*[newNBuckets];
- size_t count = 0;
-
- // referenced ccnx hashtb.c hashtb_rehash()
- Node** pp = nullptr;
- Node* p = nullptr;
- Node* pre = nullptr;
- Node* q = nullptr; // record p->m_next
-
- for (size_t i = 0; i < newNBuckets; ++i) {
- newBuckets[i] = nullptr;
- }
-
- for (size_t i = 0; i < m_nBuckets; ++i) {
- for (p = m_buckets[i]; p != nullptr; p = q) {
- ++count;
- q = p->m_next;
- BOOST_ASSERT(p->m_entry != nullptr);
- uint32_t h = p->m_entry->m_hash;
- uint32_t b = h % newNBuckets;
- pre = nullptr;
- for (pp = &newBuckets[b]; *pp != nullptr; pp = &((*pp)->m_next)) {
- pre = *pp;
- }
- p->m_prev = pre;
- p->m_next = *pp; // Actually *pp always == nullptr in this case
- *pp = p;
- }
- }
-
- BOOST_ASSERT(count == m_nItems);
-
- Node** oldBuckets = m_buckets;
- m_buckets = newBuckets;
- delete[] oldBuckets;
-
- m_nBuckets = newNBuckets;
- m_enlargeThreshold = static_cast<size_t>(m_enlargeLoadFactor * static_cast<double>(m_nBuckets));
- m_shrinkThreshold = static_cast<size_t>(m_shrinkLoadFactor * static_cast<double>(m_nBuckets));
-}
-
-// For debugging
-void
-NameTree::dump(std::ostream& output) const
-{
- NFD_LOG_TRACE("dump()");
-
- Node* node = nullptr;
- shared_ptr<Entry> entry;
-
- for (size_t i = 0; i < m_nBuckets; ++i) {
- for (node = m_buckets[i]; node != nullptr; node = node->m_next) {
- entry = node->m_entry;
-
- // if the Entry exist, dump its information
- if (entry != nullptr) {
- output << "Bucket" << i << '\t' << entry->m_prefix.toUri() << '\n';
- output << "\t\tHash " << entry->m_hash << '\n';
-
- if (entry->m_parent != nullptr) {
- output << "\t\tparent->" << entry->m_parent->m_prefix.toUri();
- }
- else {
- output << "\t\tROOT";
- }
- output << '\n';
-
- if (!entry->m_children.empty()) {
- output << "\t\tchildren = " << entry->m_children.size() << '\n';
-
- for (size_t j = 0; j < entry->m_children.size(); ++j) {
- output << "\t\t\tChild " << j << " " << entry->m_children[j]->getPrefix() << '\n';
- }
- }
-
- }
-
- }
- }
-
- output << "Bucket count = " << m_nBuckets << '\n';
- output << "Stored item = " << m_nItems << '\n';
- output << "--------------------------\n";
+ return {Iterator(make_shared<PartialEnumerationImpl>(*this, entrySubTreeSelector), entry.get()), end()};
}
} // namespace name_tree