bloom-filter: code cleanup

Change-Id: I72cec068778d26c62b05060030013f148ef7c5da
diff --git a/PSync/detail/bloom-filter.cpp b/PSync/detail/bloom-filter.cpp
index f700f16..89447c0 100644
--- a/PSync/detail/bloom-filter.cpp
+++ b/PSync/detail/bloom-filter.cpp
@@ -47,22 +47,19 @@
 #include "PSync/detail/util.hpp"
 
 #include <ndn-cxx/util/exception.hpp>
-#include <ndn-cxx/util/logger.hpp>
 
 #include <algorithm>
 #include <cmath>
 #include <cstddef>
-#include <iterator>
-#include <limits>
 #include <cstdlib>
-
-// https://github.com/ArashPartow/bloom
-
-NDN_LOG_INIT(psync.BloomFilter);
+#include <limits>
 
 namespace psync {
 
-static const std::size_t bits_per_char = 0x08;
+// https://github.com/ArashPartow/bloom
+
+static const std::size_t bits_per_char = 0x08;    // 8 bits in 1 char(unsigned)
+
 static const unsigned char bit_mask[bits_per_char] = {
   0x01,  //00000001
   0x02,  //00000010
@@ -74,89 +71,154 @@
   0x80   //10000000
 };
 
-BloomParameters::BloomParameters()
-: minimum_size(1)
-, maximum_size(std::numeric_limits<unsigned int>::max())
-, minimum_number_of_hashes(1)
-, maximum_number_of_hashes(std::numeric_limits<unsigned int>::max())
-, projected_element_count(200)
-, false_positive_probability(1.0 / projected_element_count)
-, random_seed(0xA5A5A5A55A5A5A5AULL)
-{}
-
-bool
-BloomParameters::compute_optimal_parameters()
+class bloom_parameters
 {
-  if (!(*this)) {
-    return false;
-  }
+public:
 
-  double min_m = std::numeric_limits<double>::infinity();
-  double min_k = 0.0;
-  double curr_m = 0.0;
-  double k = 1.0;
+   bloom_parameters()
+   : minimum_size(1),
+     maximum_size(std::numeric_limits<unsigned int>::max()),
+     minimum_number_of_hashes(1),
+     maximum_number_of_hashes(std::numeric_limits<unsigned int>::max()),
+     projected_element_count(200),
+     false_positive_probability(1.0 / projected_element_count),
+     random_seed(0xA5A5A5A55A5A5A5AULL)
+   {}
 
-  while (k < 1000.0)
-  {
-    double numerator   = (- k * projected_element_count);
-    double denominator = std::log(1.0 - std::pow(false_positive_probability, 1.0 / k));
-    curr_m = numerator / denominator;
-    if (curr_m < min_m)
-    {
-      min_m = curr_m;
-      min_k = k;
-    }
-    k += 1.0;
-  }
+   inline bool operator!()
+   {
+      return (minimum_size > maximum_size)      ||
+             (minimum_number_of_hashes > maximum_number_of_hashes) ||
+             (minimum_number_of_hashes < 1)     ||
+             (0 == maximum_number_of_hashes)    ||
+             (0 == projected_element_count)     ||
+             (false_positive_probability < 0.0) ||
+             (std::numeric_limits<double>::infinity() == std::abs(false_positive_probability)) ||
+             (0 == random_seed)                 ||
+             (0xFFFFFFFFFFFFFFFFULL == random_seed);
+   }
 
-  optimal_parameters_t& optp = optimal_parameters;
+   // Allowable min/max size of the bloom filter in bits
+   unsigned int minimum_size;
+   unsigned int maximum_size;
 
-  optp.number_of_hashes = static_cast<unsigned int>(min_k);
-  optp.table_size = static_cast<unsigned int>(min_m);
-  optp.table_size += (((optp.table_size % bits_per_char) != 0) ? (bits_per_char - (optp.table_size % bits_per_char)) : 0);
+   // Allowable min/max number of hash functions
+   unsigned int minimum_number_of_hashes;
+   unsigned int maximum_number_of_hashes;
 
-  if (optp.number_of_hashes < minimum_number_of_hashes)
-     optp.number_of_hashes = minimum_number_of_hashes;
-  else if (optp.number_of_hashes > maximum_number_of_hashes)
-     optp.number_of_hashes = maximum_number_of_hashes;
+   // The approximate number of elements to be inserted
+   // into the bloom filter, should be within one order
+   // of magnitude. The default is 200.
+   unsigned int projected_element_count;
 
-  if (optp.table_size < minimum_size)
-     optp.table_size = minimum_size;
-  else if (optp.table_size > maximum_size)
-     optp.table_size = maximum_size;
+   // The approximate false positive probability expected
+   // from the bloom filter. The default is assumed to be
+   // the reciprocal of the projected_element_count.
+   double false_positive_probability;
 
-  return true;
-}
+   unsigned long long int random_seed;
 
-BloomFilter::BloomFilter()
- : bit_table_(0)
- , salt_count_(0)
- , table_size_(0)
- , raw_table_size_(0)
- , projected_element_count_(0)
- , inserted_element_count_(0)
- , random_seed_(0)
- , desired_false_positive_probability_(0.0)
-{}
+   struct optimal_parameters_t
+   {
+      optimal_parameters_t()
+      : number_of_hashes(0),
+        table_size(0)
+      {}
 
-BloomFilter::BloomFilter(const BloomParameters& p)
- : bit_table_(0)
- , projected_element_count_(p.projected_element_count)
- , inserted_element_count_(0)
- , random_seed_((p.random_seed * 0xA5A5A5A5) + 1)
- , desired_false_positive_probability_(p.false_positive_probability)
+      unsigned int number_of_hashes;
+      unsigned int table_size;
+   };
+
+   optimal_parameters_t optimal_parameters;
+
+   bool compute_optimal_parameters()
+   {
+      /*
+        Note:
+        The following will attempt to find the number of hash functions
+        and minimum amount of storage bits required to construct a bloom
+        filter consistent with the user defined false positive probability
+        and estimated element insertion count.
+      */
+
+      if (!(*this))
+         return false;
+
+      double min_m  = std::numeric_limits<double>::infinity();
+      double min_k  = 0.0;
+      double k      = 1.0;
+
+      while (k < 1000.0)
+      {
+         const double numerator   = (- k * projected_element_count);
+         const double denominator = std::log(1.0 - std::pow(false_positive_probability, 1.0 / k));
+
+         const double curr_m = numerator / denominator;
+
+         if (curr_m < min_m)
+         {
+            min_m = curr_m;
+            min_k = k;
+         }
+
+         k += 1.0;
+      }
+
+      optimal_parameters_t& optp = optimal_parameters;
+
+      optp.number_of_hashes = static_cast<unsigned int>(min_k);
+
+      optp.table_size = static_cast<unsigned int>(min_m);
+
+      optp.table_size += (((optp.table_size % bits_per_char) != 0) ? (bits_per_char - (optp.table_size % bits_per_char)) : 0);
+
+      if (optp.number_of_hashes < minimum_number_of_hashes)
+         optp.number_of_hashes = minimum_number_of_hashes;
+      else if (optp.number_of_hashes > maximum_number_of_hashes)
+         optp.number_of_hashes = maximum_number_of_hashes;
+
+      if (optp.table_size < minimum_size)
+         optp.table_size = minimum_size;
+      else if (optp.table_size > maximum_size)
+         optp.table_size = maximum_size;
+
+      return true;
+   }
+
+};
+
+
+BloomFilter::BloomFilter(const bloom_parameters& p)
+: projected_element_count_(p.projected_element_count),
+  inserted_element_count_(0),
+  random_seed_((p.random_seed * 0xA5A5A5A5) + 1),
+  desired_false_positive_probability_(p.false_positive_probability)
 {
   salt_count_ = p.optimal_parameters.number_of_hashes;
   table_size_ = p.optimal_parameters.table_size;
+
   generate_unique_salt();
-  raw_table_size_ = table_size_ / bits_per_char;
-  //bit_table_ = new cell_type[static_cast<std::size_t>(raw_table_size_)];
-  bit_table_.resize(static_cast<std::size_t>(raw_table_size_), 0x00);
+
+  bit_table_.resize(table_size_ / bits_per_char, static_cast<cell_type>(0x00));
+}
+
+static bloom_parameters
+makeParameters(unsigned int projected_element_count,
+               double false_positive_probability)
+{
+  bloom_parameters p;
+  p.projected_element_count = projected_element_count;
+  p.false_positive_probability = false_positive_probability;
+
+  if (!p.compute_optimal_parameters()) {
+    NDN_THROW(BloomFilter::Error("Bloom filter parameters are not correct!"));
+  }
+  return p;
 }
 
 BloomFilter::BloomFilter(unsigned int projected_element_count,
                          double false_positive_probability)
- : BloomFilter(getParameters(projected_element_count, false_positive_probability))
+  : BloomFilter(makeParameters(projected_element_count, false_positive_probability))
 {
 }
 
@@ -165,42 +227,25 @@
                          const ndn::name::Component& bfName)
   : BloomFilter(projected_element_count, false_positive_probability)
 {
-  std::vector<BloomFilter::cell_type> table(bfName.value_begin(), bfName.value_end());
-
-  if (table.size() != raw_table_size_) {
-    NDN_THROW(Error("Received BloomFilter cannot be decoded!"));
+  std::vector<cell_type> table(bfName.value_begin(), bfName.value_end());
+  if (table.size() != table_size_ / bits_per_char) {
+    NDN_THROW(Error("Bloom filter cannot be decoded!"));
   }
-  bit_table_ = table;
-}
-
-BloomParameters
-BloomFilter::getParameters(unsigned int projected_element_count,
-                           double false_positive_probability)
-{
-  BloomParameters opt;
-  opt.false_positive_probability = false_positive_probability;
-  opt.projected_element_count = projected_element_count;
-
-  if (!opt) {
-    NDN_LOG_WARN("Bloom parameters are not correct!");
-  }
-
-  opt.compute_optimal_parameters();
-  return opt;
+  bit_table_ = std::move(table);
 }
 
 void
 BloomFilter::appendToName(ndn::Name& name) const
 {
   name.appendNumber(projected_element_count_);
-  name.appendNumber((int)(desired_false_positive_probability_ * 1000));
+  name.appendNumber(static_cast<uint64_t>(desired_false_positive_probability_ * 1000));
   name.append(bit_table_.begin(), bit_table_.end());
 }
 
 void
 BloomFilter::clear()
 {
-  bit_table_.resize(static_cast<std::size_t>(raw_table_size_), 0x00);
+  std::fill(bit_table_.begin(), bit_table_.end(), static_cast<cell_type>(0x00));
   inserted_element_count_ = 0;
 }
 
@@ -208,12 +253,15 @@
 BloomFilter::insert(const std::string& key)
 {
   std::size_t bit_index = 0;
-  std::size_t bit = 0;
+  std::size_t bit       = 0;
+
   for (std::size_t i = 0; i < salt_.size(); ++i)
   {
-     compute_indices(murmurHash3(salt_[i], key), bit_index, bit);
-     bit_table_[bit_index/bits_per_char] |= bit_mask[bit];
+    compute_indices(murmurHash3(salt_[i], key), bit_index, bit);
+
+    bit_table_[bit_index / bits_per_char] |= bit_mask[bit];
   }
+
   ++inserted_element_count_;
 }
 
@@ -221,12 +269,14 @@
 BloomFilter::contains(const std::string& key) const
 {
   std::size_t bit_index = 0;
-  std::size_t bit = 0;
+  std::size_t bit       = 0;
 
   for (std::size_t i = 0; i < salt_.size(); ++i)
   {
     compute_indices(murmurHash3(salt_[i], key), bit_index, bit);
-    if ((bit_table_[bit_index/bits_per_char] & bit_mask[bit]) != bit_mask[bit]) {
+
+    if ((bit_table_[bit_index / bits_per_char] & bit_mask[bit]) != bit_mask[bit])
+    {
       return false;
     }
   }
@@ -234,10 +284,11 @@
   return true;
 }
 
-std::vector <BloomFilter::cell_type>
-BloomFilter::table() const
+void
+BloomFilter::compute_indices(const bloom_type& hash, std::size_t& bit_index, std::size_t& bit) const
 {
-  return bit_table_;
+  bit_index = hash % table_size_;
+  bit       = bit_index % bits_per_char;
 }
 
 void
@@ -250,6 +301,7 @@
     hash function with different values seems to be adequate.
   */
   const unsigned int predef_salt_count = 128;
+
   static const bloom_type predef_salt[predef_salt_count] =
                              {
                                 0xAAAAAAAA, 0x55555555, 0x33333333, 0xCCCCCCCC,
@@ -291,19 +343,31 @@
     std::copy(predef_salt,
               predef_salt + salt_count_,
               std::back_inserter(salt_));
-    for (unsigned int i = 0; i < salt_.size(); ++i)
+
+    for (std::size_t i = 0; i < salt_.size(); ++i)
     {
+      /*
+         Note:
+         This is done to integrate the user defined random seed,
+         so as to allow for the generation of unique bloom filter
+         instances.
+      */
       salt_[i] = salt_[i] * salt_[(i + 3) % salt_.size()] + static_cast<bloom_type>(random_seed_);
     }
   }
   else
   {
-    std::copy(predef_salt,predef_salt + predef_salt_count,std::back_inserter(salt_));
+    std::copy(predef_salt, predef_salt + predef_salt_count, std::back_inserter(salt_));
+
     srand(static_cast<unsigned int>(random_seed_));
+
     while (salt_.size() < salt_count_)
     {
       bloom_type current_salt = static_cast<bloom_type>(rand()) * static_cast<bloom_type>(rand());
-      if (0 == current_salt) continue;
+
+      if (0 == current_salt)
+        continue;
+
       if (salt_.end() == std::find(salt_.begin(), salt_.end(), current_salt))
       {
         salt_.push_back(current_salt);
@@ -312,39 +376,4 @@
   }
 }
 
-void
-BloomFilter::compute_indices(const bloom_type& hash,
-                             std::size_t& bit_index, std::size_t& bit) const
-{
-  bit_index = hash % table_size_;
-  bit = bit_index % bits_per_char;
-}
-
-bool
-operator==(const BloomFilter& bf1, const BloomFilter& bf2)
-{
-  auto table1 = bf1.table();
-  auto table2 = bf2.table();
-
-  if (table1.size() != table2.size()) {
-    return false;
-  }
-
-  for (size_t i = 0; i < table1.size(); i++) {
-    if (table1[i] != table2[i]) {
-      return false;
-    }
-  }
-  return true;
-}
-
-std::ostream&
-operator<<(std::ostream& out, const BloomFilter& bf)
-{
-  for (const auto& element : bf.table()) {
-    out << unsigned(element);
-  }
-  return out;
-}
-
 } // namespace psync
diff --git a/PSync/detail/bloom-filter.hpp b/PSync/detail/bloom-filter.hpp
index f7e3726..ff0d213 100644
--- a/PSync/detail/bloom-filter.hpp
+++ b/PSync/detail/bloom-filter.hpp
@@ -47,63 +47,17 @@
 #define PSYNC_DETAIL_BLOOM_FILTER_HPP
 
 #include <ndn-cxx/name.hpp>
+#include <ndn-cxx/util/string-helper.hpp>
 
 #include <string>
 #include <vector>
-#include <cmath>
-#include <cstdlib>
 
 namespace psync {
 
-struct optimal_parameters_t
-{
-  optimal_parameters_t()
-  : number_of_hashes(0),
-    table_size(0)
-  {}
-
-  unsigned int number_of_hashes;
-  unsigned int table_size;
-};
-
-class BloomParameters
-{
-public:
-  BloomParameters();
-
-  bool
-  compute_optimal_parameters();
-
-  bool operator!() const
-  {
-    return (minimum_size > maximum_size)      ||
-           (minimum_number_of_hashes > maximum_number_of_hashes) ||
-           (minimum_number_of_hashes < 1)     ||
-           (0 == maximum_number_of_hashes)    ||
-           (0 == projected_element_count)     ||
-           (false_positive_probability < 0.0) ||
-           (std::numeric_limits<double>::infinity() == std::abs(false_positive_probability)) ||
-           (0 == random_seed)                 ||
-           (0xFFFFFFFFFFFFFFFFULL == random_seed);
-  }
-
-  unsigned int           minimum_size;
-  unsigned int           maximum_size;
-  unsigned int           minimum_number_of_hashes;
-  unsigned int           maximum_number_of_hashes;
-  unsigned int           projected_element_count;
-  double                 false_positive_probability;
-  unsigned long long int random_seed;
-  optimal_parameters_t   optimal_parameters;
-};
+class bloom_parameters;
 
 class BloomFilter
 {
-protected:
-  typedef uint32_t bloom_type;
-  typedef uint8_t cell_type;
-  typedef std::vector <cell_type>::iterator Iterator;
-
 public:
   class Error : public std::runtime_error
   {
@@ -111,9 +65,7 @@
     using std::runtime_error::runtime_error;
   };
 
-  BloomFilter();
-
-  explicit BloomFilter(const BloomParameters& p);
+  BloomFilter() = default;
 
   BloomFilter(unsigned int projected_element_count,
               double false_positive_probability);
@@ -122,10 +74,6 @@
               double false_positive_probability,
               const ndn::name::Component& bfName);
 
-  BloomParameters
-  getParameters(unsigned int projected_element_count,
-                double false_positive_probability);
-
   /**
    * @brief Append our bloom filter to the given name
    *
@@ -146,35 +94,53 @@
   bool
   contains(const std::string& key) const;
 
-  std::vector<cell_type>
-  table() const;
-
 private:
+  typedef uint32_t bloom_type;
+  typedef uint8_t cell_type;
+
+  explicit
+  BloomFilter(const bloom_parameters& p);
+
+  void
+  compute_indices(const bloom_type& hash, std::size_t& bit_index, std::size_t& bit) const;
+
   void
   generate_unique_salt();
 
-  void
-  compute_indices(const bloom_type& hash,
-                  std::size_t& bit_index, std::size_t& bit) const;
+private: // non-member operators
+  // NOTE: the following "hidden friend" operators are available via
+  //       argument-dependent lookup only and must be defined inline.
+
+  friend bool
+  operator==(const BloomFilter& lhs, const BloomFilter& rhs)
+  {
+    return lhs.bit_table_ == rhs.bit_table_;
+  }
+
+  friend bool
+  operator!=(const BloomFilter& lhs, const BloomFilter& rhs)
+  {
+    return lhs.bit_table_ != rhs.bit_table_;
+  }
+
+  friend std::ostream&
+  operator<<(std::ostream& os, const BloomFilter& bf)
+  {
+    ndn::printHex(os, bf.bit_table_.data(), bf.bit_table_.size(), false);
+    return os;
+  }
 
 private:
-  std::vector <bloom_type> salt_;
-  std::vector <cell_type>  bit_table_;
-  unsigned int             salt_count_;
-  unsigned int             table_size_; // 8 * raw_table_size;
-  unsigned int             raw_table_size_;
-  unsigned int             projected_element_count_;
-  unsigned int             inserted_element_count_;
-  unsigned long long int   random_seed_;
-  double                   desired_false_positive_probability_;
+  std::vector<bloom_type> salt_;
+  std::vector<cell_type>  bit_table_;
+  unsigned int            salt_count_ = 0;
+  unsigned int            table_size_ = 0;
+  unsigned int            projected_element_count_ = 0;
+  unsigned int            inserted_element_count_ = 0;
+  unsigned long long int  random_seed_ = 0;
+  double                  desired_false_positive_probability_ = 0.0;
 };
 
-bool
-operator==(const BloomFilter& bf1, const BloomFilter& bf2);
-
-std::ostream&
-operator<<(std::ostream& out, const BloomFilter& bf);
-
 } // namespace psync
 
 #endif // PSYNC_DETAIL_BLOOM_FILTER_HPP