storage: skiplist for repo index
This version of skiplist is low efficient because of the map in it
Change-Id: Idecc9016619ca9ab5cbb17b078f71b1cc3d4b03e
refs #1737
diff --git a/src/common.hpp b/src/common.hpp
index 70d959f..1f27bb3 100644
--- a/src/common.hpp
+++ b/src/common.hpp
@@ -34,11 +34,15 @@
#include <boost/utility.hpp>
#include <boost/random/mersenne_twister.hpp>
#include <boost/random/uniform_int_distribution.hpp>
+#include <boost/random/geometric_distribution.hpp>
#include <map>
#include <string>
#include <vector>
+#include <queue>
+#include <list>
#include <algorithm>
+#include <iostream>
namespace repo {
diff --git a/src/storage/skiplist.hpp b/src/storage/skiplist.hpp
new file mode 100644
index 0000000..86602b5
--- /dev/null
+++ b/src/storage/skiplist.hpp
@@ -0,0 +1,370 @@
+/* -*- Mode:C++; c-file-style:"gnu"; indent-tabs-mode:nil; -*- */
+/**
+* Copyright (c) 2014, Regents of the University of California.
+*
+* This file is part of NDN repo-ng (Next generation of NDN repository).
+* See AUTHORS.md for complete list of repo-ng authors and contributors.
+*
+* repo-ng is free software: you can redistribute it and/or modify it under the terms
+* of the GNU General Public License as published by the Free Software Foundation,
+* either version 3 of the License, or (at your option) any later version.
+*
+* repo-ng is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+* PURPOSE. See the GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License along with
+* repo-ng, e.g., in COPYING.md file. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef REPO_STORAGE_SKIPLIST_HPP
+#define REPO_STORAGE_SKIPLIST_HPP
+
+#include "common.hpp"
+
+#include <boost/utility.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_int_distribution.hpp>
+
+namespace repo {
+
+class SkipList32Layers25Probabilty
+{
+public:
+ static size_t
+ getMaxLayers()
+ {
+ return 32;
+ }
+
+ static double
+ getProbability()
+ {
+ return 0.25; /* 25% */
+ }
+};
+
+template<typename T, typename Compare = std::less<T>,
+ typename Traits = SkipList32Layers25Probabilty >
+class SkipList
+{
+public:
+ //@brief layer of skiplist is std::list
+ typedef std::list<T> SkipListLayer;
+ //@brief iterator of skiplist
+ typedef typename SkipListLayer::iterator iterator;
+
+public:
+ explicit
+ SkipList();
+
+ ~SkipList();
+
+ size_t
+ size() const;
+
+public: // enumeration
+
+ iterator
+ begin() const;
+
+ iterator
+ end() const;
+
+ iterator
+ find(const T& key);
+
+ iterator
+ lower_bound(const T& key);
+
+ std::pair<iterator, bool>
+ insert(const T& key);
+
+ iterator
+ erase(iterator it);
+
+private:
+ size_t
+ pickRandomLayer() const;
+
+private:
+ //@brief store multiple layers
+ typedef std::list<shared_ptr<SkipListLayer> > SkipListHeadLayer;
+ //@brief iterate one layer of skiplist
+ typedef typename SkipListLayer::iterator layer_iterator;
+ //@brief iterate among different layers
+ typedef typename SkipListHeadLayer::iterator head_iterator;
+ typedef typename SkipListHeadLayer::reverse_iterator head_reverse_iterator;
+
+private:
+ SkipListHeadLayer m_skipList;
+ //@brief keep the locations of iterator where element is inserted in each layer
+ //key of external map is the layer of skiplist
+ //value is the internal map which maps the key T with the iterator of each layer
+ std::map<size_t, std::map<T, layer_iterator, Compare> > m_layerToEntry;
+ Compare m_compare;
+};
+
+template<typename T, typename Compare, typename Traits>
+inline
+SkipList<T, Compare, Traits>::SkipList()
+{
+ shared_ptr<SkipListLayer> zeroLayer = make_shared<SkipListLayer>();
+ m_skipList.push_back(zeroLayer);
+}
+
+template<typename T, typename Compare, typename Traits>
+inline
+SkipList<T, Compare, Traits>::~SkipList()
+{
+}
+
+template<typename T, typename Compare, typename Traits>
+inline size_t
+SkipList<T, Compare, Traits>::size() const
+{
+ return (*m_skipList.begin())->size();
+}
+
+template<typename T, typename Compare, typename Traits>
+inline typename SkipList<T, Compare, Traits>::iterator
+SkipList<T, Compare, Traits>::begin() const
+{
+ return (*m_skipList.begin())->begin();
+}
+
+template<typename T, typename Compare, typename Traits>
+inline typename SkipList<T, Compare, Traits>::iterator
+SkipList<T, Compare, Traits>::end() const
+{
+ return (*m_skipList.begin())->end();
+}
+
+template<typename T, typename Compare, typename Traits>
+inline typename SkipList<T, Compare, Traits>::iterator
+SkipList<T, Compare, Traits>::find(const T& key)
+{
+ iterator it = this->lower_bound(key);
+ if (it == this->end() || *it != key)
+ return this->end();
+ return it;
+}
+
+template<typename T, typename Compare, typename Traits>
+inline typename SkipList<T, Compare, Traits>::iterator
+SkipList<T, Compare, Traits>::lower_bound(const T& key)
+{
+ bool isIterated = false;
+ bool isIdentical = false;
+ head_reverse_iterator topLayer = m_skipList.rbegin();
+ layer_iterator head = (*topLayer)->begin();
+
+ if (!(*topLayer)->empty()) {
+ size_t layer = m_skipList.size() - 1;
+ for (head_reverse_iterator headReverseIterator = topLayer;
+ headReverseIterator != m_skipList.rend(); ++headReverseIterator) {
+ if (!isIterated)
+ head = (*headReverseIterator)->begin();
+
+ if (head != (*headReverseIterator)->end()) {
+ if (!isIterated && (!m_compare(*head, key) && !m_compare(key, *head))) {
+ if (layer > 0) {
+ layer--;
+ continue; // try lower layer
+ }
+ else {
+ isIterated = true;
+ isIdentical = true;
+ }
+ }
+ else {
+ layer_iterator layerIterator = head;
+ while (m_compare((*layerIterator) , key)) {
+ head = layerIterator;
+ isIterated = true;
+
+ ++layerIterator;
+ if (layerIterator == (*headReverseIterator)->end())
+ break;
+ }
+
+ }
+ }
+
+ if (layer > 0) {
+ //find the head in the lower layer
+ head = m_layerToEntry[layer - 1][*head];
+ }
+ else { //if we reached the first layer
+ if (isIterated) {
+ if (!isIdentical)
+ ++head;
+ //result = head;
+ return head;
+ }
+ else {
+ return head;
+ }
+ }
+
+ layer--;
+ }
+ }
+ return (*m_skipList.begin())->end();
+}
+
+template<typename T, typename Compare, typename Traits>
+inline size_t
+SkipList<T, Compare, Traits>::pickRandomLayer() const
+{
+ static boost::random::mt19937 gen;
+ static boost::random::geometric_distribution<size_t> dist(Traits::getProbability());
+ return std::min(dist(gen), Traits::getMaxLayers());
+}
+
+template<typename T, typename Compare, typename Traits>
+inline std::pair<typename SkipList<T, Compare, Traits>::iterator ,bool>
+SkipList<T, Compare, Traits>::insert(const T& key)
+{
+ bool insertInFront = false;
+ bool isIterated = false;
+ head_reverse_iterator topLayer = m_skipList.rbegin();
+ std::vector<layer_iterator> updateTable(Traits::getMaxLayers());
+ layer_iterator head = (*topLayer)->begin();
+
+ if (!(*topLayer)->empty()) {
+ size_t layer = m_skipList.size() - 1;
+ for (head_reverse_iterator headReverseIterator = topLayer;
+ headReverseIterator != m_skipList.rend(); ++headReverseIterator) {
+ if (!isIterated)
+ head = (*headReverseIterator)->begin();
+
+ updateTable[layer] = head;
+
+ if (head != (*headReverseIterator)->end()) {
+ if (!isIterated && !m_compare(*head, key)) {
+ --updateTable[layer];
+ insertInFront = true;
+ }
+ else {
+ layer_iterator layerIterator = head;
+
+ while (m_compare(*layerIterator , key)) {
+ head = layerIterator;
+ updateTable[layer] = layerIterator;
+ isIterated = true;
+
+ ++layerIterator;
+ if (layerIterator == (*headReverseIterator)->end())
+ break;
+ }
+
+ }
+ }
+
+ if (layer > 0)
+ head = m_layerToEntry[layer - 1][*head]; // move HEAD to the lower layer
+
+ layer--;
+ }
+ }
+ else {
+ updateTable[0] = (*topLayer)->begin(); //initialization
+ }
+
+ head = updateTable[0];
+ ++head;
+
+ bool isInBoundaries = (head != (*m_skipList.begin())->end());
+ bool isKeyIdentical = false;
+ if (isInBoundaries) {
+ isKeyIdentical = (!m_compare(*head, key) && !m_compare(key, *head));
+ }
+ if (isKeyIdentical) {
+ return std::make_pair(head, false);
+ }
+
+ size_t randomLayer = pickRandomLayer();
+
+ while (m_skipList.size() < randomLayer + 1) {
+ boost::shared_ptr<SkipListLayer> newLayer = make_shared<SkipListLayer>();
+ m_skipList.push_back(newLayer);
+
+ updateTable[(m_skipList.size() - 1)] = newLayer->begin();
+ }
+
+ size_t layer = 0;
+ layer_iterator result;
+
+ for (head_iterator headIterator = m_skipList.begin();
+ headIterator != m_skipList.end() && layer <= randomLayer; ++headIterator) {
+ if (updateTable[layer] == (*headIterator)->end() && !insertInFront) {
+ (*headIterator)->push_back(key);
+ layer_iterator last = (*headIterator)->end();
+ --last;
+ m_layerToEntry[layer][key] = last;
+
+ }
+ else if (updateTable[layer] == (*headIterator)->end() && insertInFront) {
+ (*headIterator)->push_front(key);
+ m_layerToEntry[layer][key] = (*headIterator)->begin();
+ }
+ else {
+ ++updateTable[layer]; // insert after
+ iterator position = (*headIterator)->insert(updateTable[layer], key);
+ m_layerToEntry[layer][key] = position; // save iterator where item was inserted
+ }
+ if (layer == 0)
+ result = m_layerToEntry[layer][key];
+ layer++;
+ }
+ return make_pair(result, true);
+}
+
+template<typename T, typename Compare, typename Traits>
+inline typename SkipList<T, Compare, Traits>::iterator
+SkipList<T, Compare, Traits>::erase(typename SkipList<T, Compare, Traits>::iterator it)
+{
+ if (it != end()) {
+ int layer = 1;
+ head_iterator headIterator = m_skipList.begin();
+ headIterator++;
+ for (; headIterator != m_skipList.end();) {
+ const std::map<T, layer_iterator, Compare>& layerIterators = m_layerToEntry[layer];
+ if(!layerIterators.empty()) {
+ typename std::map<T, layer_iterator, Compare>::const_iterator eraseIterator =
+ layerIterators.find(*it);
+ if (eraseIterator != layerIterators.end()) {
+ (*headIterator)->erase(eraseIterator->second);
+ m_layerToEntry[layer].erase(*it);
+ //remove layers that do not contain any elements (starting from the second layer)
+ if ((*headIterator)->empty()) {
+ // delete headIterator;
+ headIterator = m_skipList.erase(headIterator);
+ }
+ else {
+ ++headIterator;
+ }
+ layer++;
+ }
+ else {
+ break;
+ }
+ }
+ }
+
+ m_layerToEntry[0].erase(*it);
+ m_skipList.end();
+ typename SkipList<T, Compare, Traits>::iterator returnIterator =
+ (*m_skipList.begin())->erase(it);
+
+ return returnIterator;
+ }
+ else {
+ return end();
+ }
+}
+
+} // namespace repo
+
+#endif // REPO_STORAGE_SKIPLIST_HPP
diff --git a/tests/dataset-fixtures.hpp b/tests/dataset-fixtures.hpp
index 3adf0b4..3cc2685 100644
--- a/tests/dataset-fixtures.hpp
+++ b/tests/dataset-fixtures.hpp
@@ -50,6 +50,23 @@
typedef std::list<std::pair<ndn::Interest, ndn::shared_ptr<ndn::Data> > > InterestContainer;
InterestContainer interests;
+
+
+ struct DataSetNameCompare
+ {
+ bool operator()(const ndn::Name& a, const ndn::Name& b) const
+ {
+ return a < b;
+ }
+ };
+
+ struct DataSetDataCompare
+ {
+ bool operator()(const Data& a, const Data& b) const
+ {
+ return a.getName() < b.getName();
+ }
+ };
};
diff --git a/tests/unit/skiplist.cpp b/tests/unit/skiplist.cpp
new file mode 100644
index 0000000..34083d6
--- /dev/null
+++ b/tests/unit/skiplist.cpp
@@ -0,0 +1,159 @@
+/* -*- Mode:C++; c-file-style:"gnu"; indent-tabs-mode:nil; -*- */
+/**
+ * Copyright (c) 2014, Regents of the University of California.
+ *
+ * This file is part of NDN repo-ng (Next generation of NDN repository).
+ * See AUTHORS.md for complete list of repo-ng authors and contributors.
+ *
+ * repo-ng is free software: you can redistribute it and/or modify it under the terms
+ * of the GNU General Public License as published by the Free Software Foundation,
+ * either version 3 of the License, or (at your option) any later version.
+ *
+ * repo-ng is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+ * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE. See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * repo-ng, e.g., in COPYING.md file. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "storage/skiplist.hpp"
+
+#include "../sqlite-fixture.hpp"
+#include "../dataset-fixtures.hpp"
+
+#include <boost/test/unit_test.hpp>
+#include <boost/concept_check.hpp>
+#include <iostream>
+
+namespace repo {
+namespace tests {
+
+BOOST_AUTO_TEST_SUITE(SkipList)
+
+template<class Dataset>
+class Fixture : public Dataset
+{
+};
+
+BOOST_FIXTURE_TEST_CASE_TEMPLATE(NdnNameSkipList, T, DatasetFixtures, Fixture<T>)
+{
+ repo::SkipList<ndn::Name, typename T::DataSetNameCompare> skipList;
+ //Insert
+ for (typename T::DataContainer::iterator i = this->data.begin();
+ i != this->data.end(); ++i) {
+ skipList.insert((*i)->getName());
+ }
+
+ //find and erase
+ for (typename T::DataContainer::iterator i = this->data.begin();
+ i != this->data.end(); ++i) {
+ typename repo::SkipList<ndn::Name, typename T::DataSetNameCompare>::iterator findIterator =
+ skipList.find((*i)->getName());
+ skipList.erase(findIterator);
+ }
+
+ //@todo test lower_bound
+}
+
+BOOST_AUTO_TEST_CASE(IntGtSkipList)
+{
+ typedef repo::SkipList<int, std::greater<int> > IntGtSkipList;
+ IntGtSkipList sl;
+ BOOST_CONCEPT_ASSERT((boost::BidirectionalIterator<IntGtSkipList::iterator>));
+
+ // initial state
+ BOOST_CHECK_EQUAL(sl.size(), 0);
+ BOOST_CHECK(sl.begin() == sl.end());
+ BOOST_CHECK(sl.lower_bound(10) == sl.end());
+
+ // initial contents
+ sl.insert(10);
+ sl.insert(20);
+ sl.insert(30);
+ BOOST_CHECK_EQUAL(sl.size(), 3);
+ // contents: [30,20,10]
+
+ // iterators
+ IntGtSkipList::iterator it1 = sl.begin();
+ IntGtSkipList::iterator it2 = sl.end();
+ --it2;
+ BOOST_CHECK_EQUAL(*it1, 30);
+ BOOST_CHECK_EQUAL(*it2, 10);
+ ++it1;
+ BOOST_CHECK_EQUAL(*it1, 20);
+ IntGtSkipList::iterator it3 = it1;
+ ++it1;
+ BOOST_CHECK(it2 == it1);
+ BOOST_CHECK_EQUAL(*it3, 20);
+
+ // lower_bound
+ IntGtSkipList::iterator found = sl.lower_bound(35);
+ BOOST_CHECK(found == sl.begin());
+ BOOST_CHECK_EQUAL(*found, 30);
+ found = sl.lower_bound(30);
+ BOOST_CHECK_EQUAL(*found, 30);
+ found = sl.lower_bound(25);
+ BOOST_CHECK_EQUAL(*found, 20);
+ found = sl.lower_bound(20);
+ BOOST_CHECK_EQUAL(*found, 20);
+ found = sl.lower_bound(15);
+ BOOST_CHECK_EQUAL(*found, 10);
+ found = sl.lower_bound(10);
+ BOOST_CHECK_EQUAL(*found, 10);
+ found = sl.lower_bound(5);
+ BOOST_CHECK(found == sl.end());
+
+ // insert duplicate
+ std::pair<IntGtSkipList::iterator, bool> insertRes = sl.insert(10);
+ BOOST_CHECK_EQUAL(insertRes.second, false);
+ BOOST_CHECK_EQUAL(*(insertRes.first), 10);
+ BOOST_CHECK_EQUAL(sl.size(), 3);
+ insertRes = sl.insert(20);
+ BOOST_CHECK_EQUAL(insertRes.second, false);
+ BOOST_CHECK_EQUAL(*(insertRes.first), 20);
+ BOOST_CHECK_EQUAL(sl.size(), 3);
+ insertRes = sl.insert(30);
+ BOOST_CHECK_EQUAL(insertRes.second, false);
+ BOOST_CHECK_EQUAL(*(insertRes.first), 30);
+ BOOST_CHECK_EQUAL(sl.size(), 3);
+
+ // insert non-duplicate
+ insertRes = sl.insert(5);
+ BOOST_CHECK_EQUAL(insertRes.second, true);
+ BOOST_CHECK_EQUAL(*(insertRes.first), 5);
+ BOOST_CHECK_EQUAL(sl.size(), 4);
+ insertRes = sl.insert(35);
+ BOOST_CHECK_EQUAL(insertRes.second, true);
+ BOOST_CHECK_EQUAL(*(insertRes.first), 35);
+ BOOST_CHECK_EQUAL(sl.size(), 5);
+ // contents: [35,30,20,10,5]
+
+ // erase
+ it1 = sl.erase(sl.begin());
+ // contents: [30,20,10,5]
+ BOOST_CHECK_EQUAL(*it1, 30);
+ BOOST_CHECK_EQUAL(sl.size(), 4);
+ it2 = sl.end();
+ --it2;
+ it1 = sl.erase(it2);
+ // contents: [30,20,10]
+ BOOST_CHECK(it1 == sl.end());
+ BOOST_CHECK_EQUAL(sl.size(), 3);
+ it2 = sl.lower_bound(20);
+ it1 = sl.erase(it2);
+ // contents: [30,10]
+ BOOST_CHECK_EQUAL(*it1, 10);
+ BOOST_CHECK_EQUAL(sl.size(), 2);
+ it3 = it1;
+ --it1;
+ BOOST_CHECK(it1 == sl.begin());
+ BOOST_CHECK_EQUAL(*it1, 30);
+ ++it3;
+ BOOST_CHECK(it3 == sl.end());
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+
+} // namespace tests
+} // namespace repo
diff --git a/wscript b/wscript
index b97c9f6..ccd14d3 100644
--- a/wscript
+++ b/wscript
@@ -29,7 +29,7 @@
conf.env['WITH_TOOLS'] = conf.options.with_tools
- USED_BOOST_LIBS = ['system', 'iostreams', 'filesystem']
+ USED_BOOST_LIBS = ['system', 'iostreams', 'filesystem', 'random']
if conf.env['WITH_TESTS']:
USED_BOOST_LIBS += ['unit_test_framework']
conf.check_boost(lib=USED_BOOST_LIBS, mandatory=True)