storage: skiplist for repo index

This version of skiplist is low efficient because of the map in it

Change-Id: Idecc9016619ca9ab5cbb17b078f71b1cc3d4b03e
refs #1737
diff --git a/src/common.hpp b/src/common.hpp
index 70d959f..1f27bb3 100644
--- a/src/common.hpp
+++ b/src/common.hpp
@@ -34,11 +34,15 @@
 #include <boost/utility.hpp>
 #include <boost/random/mersenne_twister.hpp>
 #include <boost/random/uniform_int_distribution.hpp>
+#include <boost/random/geometric_distribution.hpp>
 
 #include <map>
 #include <string>
 #include <vector>
+#include <queue>
+#include <list>
 #include <algorithm>
+#include <iostream>
 
 namespace repo {
 
diff --git a/src/storage/skiplist.hpp b/src/storage/skiplist.hpp
new file mode 100644
index 0000000..86602b5
--- /dev/null
+++ b/src/storage/skiplist.hpp
@@ -0,0 +1,370 @@
+/* -*- Mode:C++; c-file-style:"gnu"; indent-tabs-mode:nil; -*- */
+/**
+* Copyright (c) 2014, Regents of the University of California.
+*
+* This file is part of NDN repo-ng (Next generation of NDN repository).
+* See AUTHORS.md for complete list of repo-ng authors and contributors.
+*
+* repo-ng is free software: you can redistribute it and/or modify it under the terms
+* of the GNU General Public License as published by the Free Software Foundation,
+* either version 3 of the License, or (at your option) any later version.
+*
+* repo-ng is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+* PURPOSE. See the GNU General Public License for more details.
+*
+* You should have received a copy of the GNU General Public License along with
+* repo-ng, e.g., in COPYING.md file. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#ifndef REPO_STORAGE_SKIPLIST_HPP
+#define REPO_STORAGE_SKIPLIST_HPP
+
+#include "common.hpp"
+
+#include <boost/utility.hpp>
+#include <boost/random/mersenne_twister.hpp>
+#include <boost/random/uniform_int_distribution.hpp>
+
+namespace repo {
+
+class SkipList32Layers25Probabilty
+{
+public:
+  static size_t
+  getMaxLayers()
+  {
+    return 32;
+  }
+
+  static double
+  getProbability()
+  {
+    return 0.25; /* 25% */
+  }
+};
+
+template<typename T, typename Compare = std::less<T>,
+         typename Traits = SkipList32Layers25Probabilty >
+class SkipList
+{
+public:
+  //@brief layer of skiplist is std::list
+  typedef std::list<T> SkipListLayer;
+  //@brief iterator of skiplist
+  typedef typename SkipListLayer::iterator iterator;
+
+public:
+  explicit
+  SkipList();
+
+  ~SkipList();
+
+  size_t
+  size() const;
+
+public: // enumeration
+
+  iterator
+  begin() const;
+
+  iterator
+  end() const;
+
+  iterator
+  find(const T& key);
+
+  iterator
+  lower_bound(const T& key);
+
+  std::pair<iterator, bool>
+  insert(const T& key);
+
+  iterator
+  erase(iterator it);
+
+private:
+  size_t
+  pickRandomLayer() const;
+
+private:
+  //@brief store multiple layers
+  typedef std::list<shared_ptr<SkipListLayer> > SkipListHeadLayer;
+  //@brief iterate one layer of skiplist
+  typedef typename SkipListLayer::iterator layer_iterator;
+  //@brief iterate among different layers
+  typedef typename SkipListHeadLayer::iterator head_iterator;
+  typedef typename SkipListHeadLayer::reverse_iterator head_reverse_iterator;
+
+private:
+  SkipListHeadLayer m_skipList;
+  //@brief keep the locations of iterator where element is inserted in each layer
+  //key of external map is the layer of skiplist
+  //value is the internal map which maps the key T with the iterator of each layer
+  std::map<size_t, std::map<T, layer_iterator, Compare> > m_layerToEntry;
+  Compare m_compare;
+};
+
+template<typename T, typename Compare, typename Traits>
+inline
+SkipList<T, Compare, Traits>::SkipList()
+{
+  shared_ptr<SkipListLayer> zeroLayer = make_shared<SkipListLayer>();
+  m_skipList.push_back(zeroLayer);
+}
+
+template<typename T, typename Compare, typename Traits>
+inline
+SkipList<T, Compare, Traits>::~SkipList()
+{
+}
+
+template<typename T, typename Compare, typename Traits>
+inline size_t
+SkipList<T, Compare, Traits>::size() const
+{
+  return (*m_skipList.begin())->size();
+}
+
+template<typename T, typename Compare, typename Traits>
+inline typename SkipList<T, Compare, Traits>::iterator
+SkipList<T, Compare, Traits>::begin() const
+{
+   return (*m_skipList.begin())->begin();
+}
+
+template<typename T, typename Compare, typename Traits>
+inline typename SkipList<T, Compare, Traits>::iterator
+SkipList<T, Compare, Traits>::end() const
+{
+  return (*m_skipList.begin())->end();
+}
+
+template<typename T, typename Compare, typename Traits>
+inline typename SkipList<T, Compare, Traits>::iterator
+SkipList<T, Compare, Traits>::find(const T& key)
+{
+  iterator it = this->lower_bound(key);
+  if (it == this->end() || *it != key)
+    return this->end();
+  return it;
+}
+
+template<typename T, typename Compare, typename Traits>
+inline typename SkipList<T, Compare, Traits>::iterator
+SkipList<T, Compare, Traits>::lower_bound(const T& key)
+{
+  bool isIterated = false;
+  bool isIdentical = false;
+  head_reverse_iterator topLayer = m_skipList.rbegin();
+  layer_iterator head = (*topLayer)->begin();
+
+  if (!(*topLayer)->empty()) {
+    size_t layer = m_skipList.size() - 1;
+    for (head_reverse_iterator headReverseIterator = topLayer;
+         headReverseIterator != m_skipList.rend(); ++headReverseIterator) {
+      if (!isIterated)
+        head = (*headReverseIterator)->begin();
+
+      if (head != (*headReverseIterator)->end()) {
+        if (!isIterated && (!m_compare(*head, key) && !m_compare(key, *head))) {
+          if (layer > 0) {
+            layer--;
+            continue; // try lower layer
+          }
+          else {
+            isIterated = true;
+            isIdentical = true;
+          }
+        }
+        else {
+          layer_iterator layerIterator = head;
+          while (m_compare((*layerIterator) , key)) {
+            head = layerIterator;
+            isIterated = true;
+
+            ++layerIterator;
+            if (layerIterator == (*headReverseIterator)->end())
+              break;
+          }
+
+        }
+      }
+
+      if (layer > 0) {
+        //find the head in the lower layer
+        head = m_layerToEntry[layer - 1][*head];
+      }
+      else {  //if we reached the first layer
+        if (isIterated) {
+          if (!isIdentical)
+            ++head;
+          //result = head;
+          return head;
+        }
+        else {
+          return head;
+        }
+      }
+
+      layer--;
+    }
+  }
+  return (*m_skipList.begin())->end();
+}
+
+template<typename T, typename Compare, typename Traits>
+inline size_t
+SkipList<T, Compare, Traits>::pickRandomLayer() const
+{
+  static boost::random::mt19937 gen;
+  static boost::random::geometric_distribution<size_t> dist(Traits::getProbability());
+  return std::min(dist(gen), Traits::getMaxLayers());
+}
+
+template<typename T, typename Compare, typename Traits>
+inline std::pair<typename SkipList<T, Compare, Traits>::iterator ,bool>
+SkipList<T, Compare, Traits>::insert(const T& key)
+{
+  bool insertInFront = false;
+  bool isIterated = false;
+  head_reverse_iterator topLayer = m_skipList.rbegin();
+  std::vector<layer_iterator> updateTable(Traits::getMaxLayers());
+  layer_iterator head = (*topLayer)->begin();
+
+  if (!(*topLayer)->empty()) {
+    size_t layer = m_skipList.size() - 1;
+    for (head_reverse_iterator headReverseIterator = topLayer;
+         headReverseIterator != m_skipList.rend(); ++headReverseIterator) {
+      if (!isIterated)
+        head = (*headReverseIterator)->begin();
+
+      updateTable[layer] = head;
+
+      if (head != (*headReverseIterator)->end()) {
+        if (!isIterated && !m_compare(*head, key)) {
+          --updateTable[layer];
+          insertInFront = true;
+        }
+        else {
+          layer_iterator layerIterator = head;
+
+          while (m_compare(*layerIterator , key)) {
+            head = layerIterator;
+            updateTable[layer] = layerIterator;
+            isIterated = true;
+
+            ++layerIterator;
+            if (layerIterator == (*headReverseIterator)->end())
+              break;
+          }
+
+        }
+      }
+
+      if (layer > 0)
+        head = m_layerToEntry[layer - 1][*head]; // move HEAD to the lower layer
+
+      layer--;
+    }
+  }
+  else {
+    updateTable[0] = (*topLayer)->begin(); //initialization
+  }
+
+  head = updateTable[0];
+  ++head;
+
+  bool isInBoundaries = (head != (*m_skipList.begin())->end());
+  bool isKeyIdentical = false;
+  if (isInBoundaries) {
+    isKeyIdentical = (!m_compare(*head, key) && !m_compare(key, *head));
+  }
+  if (isKeyIdentical) {
+    return std::make_pair(head, false);
+  }
+
+  size_t randomLayer = pickRandomLayer();
+
+  while (m_skipList.size() < randomLayer + 1) {
+     boost::shared_ptr<SkipListLayer> newLayer = make_shared<SkipListLayer>();
+    m_skipList.push_back(newLayer);
+
+    updateTable[(m_skipList.size() - 1)] = newLayer->begin();
+  }
+
+  size_t layer = 0;
+  layer_iterator result;
+
+  for (head_iterator headIterator = m_skipList.begin();
+       headIterator != m_skipList.end() && layer <= randomLayer; ++headIterator) {
+    if (updateTable[layer] == (*headIterator)->end() && !insertInFront) {
+      (*headIterator)->push_back(key);
+      layer_iterator last = (*headIterator)->end();
+      --last;
+      m_layerToEntry[layer][key] = last;
+
+    }
+    else if (updateTable[layer] == (*headIterator)->end() && insertInFront) {
+      (*headIterator)->push_front(key);
+      m_layerToEntry[layer][key] = (*headIterator)->begin();
+    }
+    else {
+      ++updateTable[layer]; // insert after
+      iterator position = (*headIterator)->insert(updateTable[layer], key);
+      m_layerToEntry[layer][key] = position;  // save iterator where item was inserted
+    }
+    if (layer == 0)
+      result = m_layerToEntry[layer][key];
+    layer++;
+  }
+  return make_pair(result, true);
+}
+
+template<typename T, typename Compare, typename Traits>
+inline typename SkipList<T, Compare, Traits>::iterator
+SkipList<T, Compare, Traits>::erase(typename SkipList<T, Compare, Traits>::iterator it)
+{
+  if (it != end()) {
+    int layer = 1;
+    head_iterator headIterator = m_skipList.begin();
+    headIterator++;
+    for (; headIterator != m_skipList.end();) {
+      const std::map<T, layer_iterator, Compare>& layerIterators = m_layerToEntry[layer];
+      if(!layerIterators.empty()) {
+        typename std::map<T, layer_iterator, Compare>::const_iterator eraseIterator =
+          layerIterators.find(*it);
+        if (eraseIterator != layerIterators.end()) {
+          (*headIterator)->erase(eraseIterator->second);
+          m_layerToEntry[layer].erase(*it);
+          //remove layers that do not contain any elements (starting from the second layer)
+          if ((*headIterator)->empty()) {
+            // delete headIterator;
+            headIterator = m_skipList.erase(headIterator);
+          }
+          else {
+            ++headIterator;
+          }
+          layer++;
+        }
+        else {
+          break;
+        }
+      }
+    }
+
+    m_layerToEntry[0].erase(*it);
+    m_skipList.end();
+    typename SkipList<T, Compare, Traits>::iterator returnIterator =
+      (*m_skipList.begin())->erase(it);
+
+    return returnIterator;
+  }
+  else {
+    return end();
+  }
+}
+
+} // namespace repo
+
+#endif // REPO_STORAGE_SKIPLIST_HPP
diff --git a/tests/dataset-fixtures.hpp b/tests/dataset-fixtures.hpp
index 3adf0b4..3cc2685 100644
--- a/tests/dataset-fixtures.hpp
+++ b/tests/dataset-fixtures.hpp
@@ -50,6 +50,23 @@
 
   typedef std::list<std::pair<ndn::Interest, ndn::shared_ptr<ndn::Data> > > InterestContainer;
   InterestContainer interests;
+
+
+  struct DataSetNameCompare
+  {
+    bool operator()(const ndn::Name& a, const ndn::Name& b) const
+    {
+      return a < b;
+    }
+  };
+
+  struct DataSetDataCompare
+  {
+    bool operator()(const Data& a, const Data& b) const
+    {
+      return a.getName() < b.getName();
+    }
+  };
 };
 
 
diff --git a/tests/unit/skiplist.cpp b/tests/unit/skiplist.cpp
new file mode 100644
index 0000000..34083d6
--- /dev/null
+++ b/tests/unit/skiplist.cpp
@@ -0,0 +1,159 @@
+/* -*- Mode:C++; c-file-style:"gnu"; indent-tabs-mode:nil; -*- */
+/**
+ * Copyright (c) 2014,  Regents of the University of California.
+ *
+ * This file is part of NDN repo-ng (Next generation of NDN repository).
+ * See AUTHORS.md for complete list of repo-ng authors and contributors.
+ *
+ * repo-ng is free software: you can redistribute it and/or modify it under the terms
+ * of the GNU General Public License as published by the Free Software Foundation,
+ * either version 3 of the License, or (at your option) any later version.
+ *
+ * repo-ng is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
+ * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ * PURPOSE.  See the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * repo-ng, e.g., in COPYING.md file.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "storage/skiplist.hpp"
+
+#include "../sqlite-fixture.hpp"
+#include "../dataset-fixtures.hpp"
+
+#include <boost/test/unit_test.hpp>
+#include <boost/concept_check.hpp>
+#include <iostream>
+
+namespace repo {
+namespace tests {
+
+BOOST_AUTO_TEST_SUITE(SkipList)
+
+template<class Dataset>
+class Fixture : public Dataset
+{
+};
+
+BOOST_FIXTURE_TEST_CASE_TEMPLATE(NdnNameSkipList, T, DatasetFixtures, Fixture<T>)
+{
+  repo::SkipList<ndn::Name, typename T::DataSetNameCompare> skipList;
+  //Insert
+  for (typename T::DataContainer::iterator i = this->data.begin();
+       i != this->data.end(); ++i) {
+    skipList.insert((*i)->getName());
+  }
+
+  //find and erase
+  for (typename T::DataContainer::iterator i = this->data.begin();
+       i != this->data.end(); ++i) {
+    typename repo::SkipList<ndn::Name, typename T::DataSetNameCompare>::iterator findIterator =
+      skipList.find((*i)->getName());
+    skipList.erase(findIterator);
+  }
+
+  //@todo test lower_bound
+}
+
+BOOST_AUTO_TEST_CASE(IntGtSkipList)
+{
+  typedef repo::SkipList<int, std::greater<int> > IntGtSkipList;
+  IntGtSkipList sl;
+  BOOST_CONCEPT_ASSERT((boost::BidirectionalIterator<IntGtSkipList::iterator>));
+
+  // initial state
+  BOOST_CHECK_EQUAL(sl.size(), 0);
+  BOOST_CHECK(sl.begin() == sl.end());
+  BOOST_CHECK(sl.lower_bound(10) == sl.end());
+
+  // initial contents
+  sl.insert(10);
+  sl.insert(20);
+  sl.insert(30);
+  BOOST_CHECK_EQUAL(sl.size(), 3);
+  // contents: [30,20,10]
+
+  // iterators
+  IntGtSkipList::iterator it1 = sl.begin();
+  IntGtSkipList::iterator it2 = sl.end();
+  --it2;
+  BOOST_CHECK_EQUAL(*it1, 30);
+  BOOST_CHECK_EQUAL(*it2, 10);
+  ++it1;
+  BOOST_CHECK_EQUAL(*it1, 20);
+  IntGtSkipList::iterator it3 = it1;
+  ++it1;
+  BOOST_CHECK(it2 == it1);
+  BOOST_CHECK_EQUAL(*it3, 20);
+
+  // lower_bound
+  IntGtSkipList::iterator found = sl.lower_bound(35);
+  BOOST_CHECK(found == sl.begin());
+  BOOST_CHECK_EQUAL(*found, 30);
+  found = sl.lower_bound(30);
+  BOOST_CHECK_EQUAL(*found, 30);
+  found = sl.lower_bound(25);
+  BOOST_CHECK_EQUAL(*found, 20);
+  found = sl.lower_bound(20);
+  BOOST_CHECK_EQUAL(*found, 20);
+  found = sl.lower_bound(15);
+  BOOST_CHECK_EQUAL(*found, 10);
+  found = sl.lower_bound(10);
+  BOOST_CHECK_EQUAL(*found, 10);
+  found = sl.lower_bound(5);
+  BOOST_CHECK(found == sl.end());
+
+  // insert duplicate
+  std::pair<IntGtSkipList::iterator, bool> insertRes = sl.insert(10);
+  BOOST_CHECK_EQUAL(insertRes.second, false);
+  BOOST_CHECK_EQUAL(*(insertRes.first), 10);
+  BOOST_CHECK_EQUAL(sl.size(), 3);
+  insertRes = sl.insert(20);
+  BOOST_CHECK_EQUAL(insertRes.second, false);
+  BOOST_CHECK_EQUAL(*(insertRes.first), 20);
+  BOOST_CHECK_EQUAL(sl.size(), 3);
+  insertRes = sl.insert(30);
+  BOOST_CHECK_EQUAL(insertRes.second, false);
+  BOOST_CHECK_EQUAL(*(insertRes.first), 30);
+  BOOST_CHECK_EQUAL(sl.size(), 3);
+
+  // insert non-duplicate
+  insertRes = sl.insert(5);
+  BOOST_CHECK_EQUAL(insertRes.second, true);
+  BOOST_CHECK_EQUAL(*(insertRes.first), 5);
+  BOOST_CHECK_EQUAL(sl.size(), 4);
+  insertRes = sl.insert(35);
+  BOOST_CHECK_EQUAL(insertRes.second, true);
+  BOOST_CHECK_EQUAL(*(insertRes.first), 35);
+  BOOST_CHECK_EQUAL(sl.size(), 5);
+  // contents: [35,30,20,10,5]
+
+  // erase
+  it1 = sl.erase(sl.begin());
+  // contents: [30,20,10,5]
+  BOOST_CHECK_EQUAL(*it1, 30);
+  BOOST_CHECK_EQUAL(sl.size(), 4);
+  it2 = sl.end();
+  --it2;
+  it1 = sl.erase(it2);
+  // contents: [30,20,10]
+  BOOST_CHECK(it1 == sl.end());
+  BOOST_CHECK_EQUAL(sl.size(), 3);
+  it2 = sl.lower_bound(20);
+  it1 = sl.erase(it2);
+  // contents: [30,10]
+  BOOST_CHECK_EQUAL(*it1, 10);
+  BOOST_CHECK_EQUAL(sl.size(), 2);
+  it3 = it1;
+  --it1;
+  BOOST_CHECK(it1 == sl.begin());
+  BOOST_CHECK_EQUAL(*it1, 30);
+  ++it3;
+  BOOST_CHECK(it3 == sl.end());
+}
+
+BOOST_AUTO_TEST_SUITE_END()
+
+} // namespace tests
+} // namespace repo
diff --git a/wscript b/wscript
index b97c9f6..ccd14d3 100644
--- a/wscript
+++ b/wscript
@@ -29,7 +29,7 @@
 
     conf.env['WITH_TOOLS'] = conf.options.with_tools
 
-    USED_BOOST_LIBS = ['system', 'iostreams', 'filesystem']
+    USED_BOOST_LIBS = ['system', 'iostreams', 'filesystem', 'random']
     if conf.env['WITH_TESTS']:
         USED_BOOST_LIBS += ['unit_test_framework']
     conf.check_boost(lib=USED_BOOST_LIBS, mandatory=True)