iblt: consolidate peeling logic

operator- and listEntries() are combined into one function to reduce
memory copying.

refs #4911

Change-Id: I1129af6d6926df5e57cb4fce7015d55c05f64f10
diff --git a/PSync/detail/iblt.cpp b/PSync/detail/iblt.cpp
index 55f9a66..bb4ef56 100644
--- a/PSync/detail/iblt.cpp
+++ b/PSync/detail/iblt.cpp
@@ -107,15 +107,15 @@
   }
 }
 
-void
-IBLT::update(int plusOrMinus, uint32_t key)
+static inline void
+ibltUpdate(std::vector<HashTableEntry>& ht, int32_t plusOrMinus, uint32_t key)
 {
-  size_t bucketsPerHash = m_hashTable.size() / N_HASH;
+  size_t bucketsPerHash = ht.size() / N_HASH;
 
   for (size_t i = 0; i < N_HASH; i++) {
     size_t startEntry = i * bucketsPerHash;
     uint32_t h = murmurHash3(i, key);
-    HashTableEntry& entry = m_hashTable.at(startEntry + (h % bucketsPerHash));
+    HashTableEntry& entry = ht.at(startEntry + (h % bucketsPerHash));
     entry.count += plusOrMinus;
     entry.keySum ^= key;
     entry.keyCheck ^= murmurHash3(N_HASHCHECK, key);
@@ -125,63 +125,13 @@
 void
 IBLT::insert(uint32_t key)
 {
-  update(INSERT, key);
+  ibltUpdate(m_hashTable, 1, key);
 }
 
 void
 IBLT::erase(uint32_t key)
 {
-  update(ERASE, key);
-}
-
-bool
-IBLT::listEntries(std::set<uint32_t>& positive, std::set<uint32_t>& negative) const
-{
-  IBLT peeled = *this;
-
-  size_t nErased = 0;
-  do {
-    nErased = 0;
-    for (const auto& entry : peeled.m_hashTable) {
-      if (entry.isPure()) {
-        if (entry.count == 1) {
-          positive.insert(entry.keySum);
-        }
-        else {
-          negative.insert(entry.keySum);
-        }
-        peeled.update(-entry.count, entry.keySum);
-        ++nErased;
-      }
-    }
-  } while (nErased > 0);
-
-  // If any buckets for one of the hash functions is not empty,
-  // then we didn't peel them all:
-  for (const auto& entry : peeled.m_hashTable) {
-    if (!entry.isEmpty()) {
-      return false;
-    }
-  }
-
-  return true;
-}
-
-IBLT
-IBLT::operator-(const IBLT& other) const
-{
-  BOOST_ASSERT(m_hashTable.size() == other.m_hashTable.size());
-
-  IBLT result(*this);
-  for (size_t i = 0; i < m_hashTable.size(); i++) {
-    HashTableEntry& e1 = result.m_hashTable.at(i);
-    const HashTableEntry& e2 = other.m_hashTable.at(i);
-    e1.count -= e2.count;
-    e1.keySum ^= e2.keySum;
-    e1.keyCheck ^= e2.keyCheck;
-  }
-
-  return result;
+  ibltUpdate(m_hashTable, -1, key);
 }
 
 void
@@ -216,4 +166,49 @@
   return os;
 }
 
+IBLTDiff
+operator-(const IBLT& lhs, const IBLT& rhs)
+{
+  const auto& lht = lhs.getHashTable();
+  const auto& rht = rhs.getHashTable();
+  BOOST_ASSERT(lht.size() == rht.size());
+
+  std::vector<HashTableEntry> peeled(lht.size());
+  std::transform(lht.begin(), lht.end(), rht.begin(), peeled.begin(),
+    [] (const HashTableEntry& lhe, const HashTableEntry& rhe) {
+      HashTableEntry diff;
+      diff.count = lhe.count - rhe.count;
+      diff.keySum = lhe.keySum ^ rhe.keySum;
+      diff.keyCheck = lhe.keyCheck ^ rhe.keyCheck;
+      return diff;
+    }
+  );
+
+  std::set<uint32_t> positive;
+  std::set<uint32_t> negative;
+
+  size_t nErased = 0;
+  do {
+    nErased = 0;
+    for (const auto& entry : peeled) {
+      if (entry.isPure()) {
+        if (entry.count == 1) {
+          positive.insert(entry.keySum);
+        }
+        else {
+          negative.insert(entry.keySum);
+        }
+        ibltUpdate(peeled, -entry.count, entry.keySum);
+        ++nErased;
+      }
+    }
+  } while (nErased > 0);
+
+  // If any buckets for one of the hash functions is not empty,
+  // then we didn't peel them all:
+  bool canDecode = std::all_of(peeled.begin(), peeled.end(),
+                               [] (const HashTableEntry& entry) { return entry.isEmpty(); });
+  return {canDecode, std::move(positive), std::move(negative)};
+}
+
 } // namespace psync::detail
diff --git a/PSync/detail/iblt.hpp b/PSync/detail/iblt.hpp
index a7f5bf0..510614f 100644
--- a/PSync/detail/iblt.hpp
+++ b/PSync/detail/iblt.hpp
@@ -124,21 +124,6 @@
   void
   erase(uint32_t key);
 
-  /**
-   * @brief List all the entries in the IBLT
-   *
-   * This is called on a difference of two IBLTs: ownIBLT - rcvdIBLT
-   * Entries listed in positive are in ownIBLT but not in rcvdIBLT
-   * Entries listed in negative are in rcvdIBLT but not in ownIBLT
-   *
-   * @return whether decoding completed successfully
-   */
-  bool
-  listEntries(std::set<uint32_t>& positive, std::set<uint32_t>& negative) const;
-
-  IBLT
-  operator-(const IBLT& other) const;
-
   const std::vector<HashTableEntry>&
   getHashTable() const
   {
@@ -157,10 +142,6 @@
   void
   appendToName(ndn::Name& name) const;
 
-private:
-  void
-  update(int plusOrMinus, uint32_t key);
-
 private: // non-member operators
   // NOTE: the following "hidden friend" operators are available via
   //       argument-dependent lookup only and must be defined inline.
@@ -174,14 +155,34 @@
 
 private:
   std::vector<HashTableEntry> m_hashTable;
-  static constexpr int INSERT = 1;
-  static constexpr int ERASE = -1;
   CompressionScheme m_compressionScheme;
 };
 
 std::ostream&
 operator<<(std::ostream& os, const IBLT& iblt);
 
+/** @brief Represent the difference between two IBLTs, */
+struct IBLTDiff
+{
+  /** @brief Whether decoding completed successfully. */
+  bool canDecode = false;
+
+  /** @brief Entries in lhs but not rhs. */
+  std::set<uint32_t> positive;
+
+  /** @brief Entries in rhs but not lhs. */
+  std::set<uint32_t> negative;
+};
+
+/**
+ * @brief Compute the difference between two IBLTs.
+ * @param lhs own IBLT.
+ * @param rhs received IBLT. It must have same hashtable size @p lhs hashtable.
+ * @return decoding result.
+ */
+IBLTDiff
+operator-(const IBLT& lhs, const IBLT& rhs);
+
 } // namespace psync::detail
 
 #endif // PSYNC_DETAIL_IBLT_HPP
diff --git a/PSync/full-producer.cpp b/PSync/full-producer.cpp
index 8a09562..4a3401d 100644
--- a/PSync/full-producer.cpp
+++ b/PSync/full-producer.cpp
@@ -189,19 +189,15 @@
   }
 
   auto diff = m_iblt - iblt;
-
-  std::set<uint32_t> positive;
-  std::set<uint32_t> negative;
-
-  if (!diff.listEntries(positive, negative)) {
-    NDN_LOG_TRACE("Cannot decode differences, positive: " << positive.size()
-                  << " negative: " << negative.size() << " m_threshold: "
+  if (!diff.canDecode) {
+    NDN_LOG_TRACE("Cannot decode differences, positive: " << diff.positive.size()
+                  << " negative: " << diff.negative.size() << " m_threshold: "
                   << m_threshold);
 
     // Send all data if greater then threshold, else send positive below as usual
     // Or send if we can't get neither positive nor negative differences
-    if (positive.size() + negative.size() >= m_threshold ||
-        (positive.empty() && negative.empty())) {
+    if (diff.positive.size() + diff.negative.size() >= m_threshold ||
+        (diff.positive.empty() && diff.negative.empty())) {
       detail::State state;
       for (const auto& content : m_prefixes) {
         if (content.second != 0) {
@@ -225,13 +221,13 @@
   }
 
   detail::State state;
-  for (const auto& hash : positive) {
+  for (const auto& hash : diff.positive) {
     auto nameIt = m_biMap.left.find(hash);
     if (nameIt != m_biMap.left.end()) {
       ndn::Name nameWithoutSeq = nameIt->second.getPrefix(-1);
       // Don't sync up sequence number zero
       if (m_prefixes[nameWithoutSeq] != 0 &&
-          !isFutureHash(nameWithoutSeq, negative)) {
+          !isFutureHash(nameWithoutSeq, diff.negative)) {
         state.addContent(nameIt->second);
       }
     }
@@ -323,13 +319,10 @@
   for (auto it = m_pendingEntries.begin(); it != m_pendingEntries.end();) {
     const auto& entry = it->second;
     auto diff = m_iblt - entry.iblt;
-    std::set<uint32_t> positive;
-    std::set<uint32_t> negative;
-
-    if (!diff.listEntries(positive, negative)) {
+    if (!diff.canDecode) {
       NDN_LOG_TRACE("Decode failed for pending interest");
-      if (positive.size() + negative.size() >= m_threshold ||
-          (positive.empty() && negative.empty())) {
+      if (diff.positive.size() + diff.negative.size() >= m_threshold ||
+          (diff.positive.empty() && diff.negative.empty())) {
         NDN_LOG_TRACE("pos + neg > threshold or no diff can be found, erase pending interest");
         it = m_pendingEntries.erase(it);
         continue;
@@ -337,7 +330,7 @@
     }
 
     detail::State state;
-    for (const auto& hash : positive) {
+    for (const auto& hash : diff.positive) {
       auto nameIt = m_biMap.left.find(hash);
       if (nameIt != m_biMap.left.end()) {
         if (m_prefixes[nameIt->second.getPrefix(-1)] != 0) {
diff --git a/PSync/partial-producer.cpp b/PSync/partial-producer.cpp
index 21439db..7fe202a 100644
--- a/PSync/partial-producer.cpp
+++ b/PSync/partial-producer.cpp
@@ -1,6 +1,6 @@
 /* -*- Mode:C++; c-file-style:"gnu"; indent-tabs-mode:nil; -*- */
 /*
- * Copyright (c) 2014-2023,  The University of Memphis
+ * Copyright (c) 2014-2024,  The University of Memphis
  *
  * This file is part of PSync.
  * See AUTHORS.md for complete list of PSync authors and contributors.
@@ -159,19 +159,14 @@
   }
 
   // get the difference
-  auto diff = m_iblt - iblt;
-
   // non-empty positive means we have some elements that the others don't
-  std::set<uint32_t> positive;
-  std::set<uint32_t> negative;
+  auto diff = m_iblt - iblt;
 
   NDN_LOG_TRACE("Number elements in IBF: " << m_prefixes.size());
 
-  bool peel = diff.listEntries(positive, negative);
+  NDN_LOG_TRACE("diff.canDecode: " << diff.canDecode);
 
-  NDN_LOG_TRACE("Result of listEntries on the difference: " << peel);
-
-  if (!peel) {
+  if (!diff.canDecode) {
     NDN_LOG_DEBUG("Can't decode the difference, sending application Nack");
     sendApplicationNack(interestName);
     return;
@@ -179,9 +174,9 @@
 
   // generate content for Sync reply
   detail::State state;
-  NDN_LOG_TRACE("Size of positive set " << positive.size());
-  NDN_LOG_TRACE("Size of negative set " << negative.size());
-  for (const auto& hash : positive) {
+  NDN_LOG_TRACE("Size of positive set " << diff.positive.size());
+  NDN_LOG_TRACE("Size of negative set " << diff.negative.size());
+  for (const auto& hash : diff.positive) {
     auto nameIt = m_biMap.left.find(hash);
     if (nameIt != m_biMap.left.end()) {
       if (bf.contains(nameIt->second.getPrefix(-1))) {
@@ -192,9 +187,9 @@
     }
   }
 
-  NDN_LOG_TRACE("m_threshold: " << m_threshold << " Total: " << positive.size() + negative.size());
+  NDN_LOG_TRACE("m_threshold: " << m_threshold << " Total: " << diff.positive.size() + diff.negative.size());
 
-  if (positive.size() + negative.size() >= m_threshold || !state.getContent().empty()) {
+  if (diff.positive.size() + diff.negative.size() >= m_threshold || !state.getContent().empty()) {
 
     // send back data
     ndn::Name syncDataName = interestName;
@@ -221,24 +216,20 @@
     const PendingEntryInfo& entry = it->second;
 
     auto diff = m_iblt - entry.iblt;
-    std::set<uint32_t> positive;
-    std::set<uint32_t> negative;
 
-    bool peel = diff.listEntries(positive, negative);
-
-    NDN_LOG_TRACE("Result of listEntries on the difference: " << peel);
+    NDN_LOG_TRACE("diff.canDecode: " << diff.canDecode);
 
     NDN_LOG_TRACE("Number elements in IBF: " << m_prefixes.size());
-    NDN_LOG_TRACE("m_threshold: " << m_threshold << " Total: " << positive.size() + negative.size());
+    NDN_LOG_TRACE("m_threshold: " << m_threshold << " Total: " << diff.positive.size() + diff.negative.size());
 
-    if (!peel) {
+    if (!diff.canDecode) {
       NDN_LOG_TRACE("Decoding of differences with stored IBF unsuccessful, deleting pending interest");
       m_pendingEntries.erase(it++);
       continue;
     }
 
     detail::State state;
-    if (entry.bf.contains(prefix) || positive.size() + negative.size() >= m_threshold) {
+    if (entry.bf.contains(prefix) || diff.positive.size() + diff.negative.size() >= m_threshold) {
       if (entry.bf.contains(prefix)) {
         state.addContent(ndn::Name(prefix).appendNumber(m_prefixes[prefix]));
         NDN_LOG_DEBUG("sending sync content " << prefix << " " << std::to_string(m_prefixes[prefix]));
diff --git a/tests/test-iblt.cpp b/tests/test-iblt.cpp
index 7843558..d42d174 100644
--- a/tests/test-iblt.cpp
+++ b/tests/test-iblt.cpp
@@ -1,6 +1,6 @@
 /* -*- Mode:C++; c-file-style:"gnu"; indent-tabs-mode:nil; -*- */
 /*
- * Copyright (c) 2014-2023,  The University of Memphis
+ * Copyright (c) 2014-2024,  The University of Memphis
  *
  * This file is part of PSync.
  * See AUTHORS.md for complete list of PSync authors and contributors.
@@ -146,13 +146,10 @@
   uint32_t hash2 = murmurHash3(11, prefix2);
   rcvdIBF.insert(hash2);
 
-  IBLT diff = ownIBF - rcvdIBF;
-  std::set<uint32_t> positive;
-  std::set<uint32_t> negative;
-
-  BOOST_CHECK(diff.listEntries(positive, negative));
-  BOOST_CHECK(*positive.begin() == hash1);
-  BOOST_CHECK(*negative.begin() == hash2);
+  auto diff = ownIBF - rcvdIBF;
+  BOOST_CHECK(diff.canDecode);
+  BOOST_CHECK(*diff.positive.begin() == hash1);
+  BOOST_CHECK(*diff.negative.begin() == hash2);
 }
 
 BOOST_AUTO_TEST_CASE(Difference)
@@ -162,32 +159,30 @@
   IBLT ownIBF(size, CompressionScheme::DEFAULT);
   IBLT rcvdIBF = ownIBF;
 
-  IBLT diff = ownIBF - rcvdIBF;
+  auto diff = ownIBF - rcvdIBF;
+  // non-empty Positive means we have some elements that the others don't
 
-  std::set<uint32_t> positive; // non-empty Positive means we have some elements that the others don't
-  std::set<uint32_t> negative;
-
-  BOOST_CHECK(diff.listEntries(positive, negative));
-  BOOST_CHECK_EQUAL(positive.size(), 0);
-  BOOST_CHECK_EQUAL(negative.size(), 0);
+  BOOST_CHECK(diff.canDecode);
+  BOOST_CHECK_EQUAL(diff.positive.size(), 0);
+  BOOST_CHECK_EQUAL(diff.negative.size(), 0);
 
   auto prefix = Name("/test/memphis").appendNumber(1);
   uint32_t newHash = murmurHash3(11, prefix);
   ownIBF.insert(newHash);
 
   diff = ownIBF - rcvdIBF;
-  BOOST_CHECK(diff.listEntries(positive, negative));
-  BOOST_CHECK_EQUAL(positive.size(), 1);
-  BOOST_CHECK_EQUAL(negative.size(), 0);
+  BOOST_CHECK(diff.canDecode);
+  BOOST_CHECK_EQUAL(diff.positive.size(), 1);
+  BOOST_CHECK_EQUAL(diff.negative.size(), 0);
 
   prefix = Name("/test/csu").appendNumber(1);
   newHash = murmurHash3(11, prefix);
   rcvdIBF.insert(newHash);
 
   diff = ownIBF - rcvdIBF;
-  BOOST_CHECK(diff.listEntries(positive, negative));
-  BOOST_CHECK_EQUAL(positive.size(), 1);
-  BOOST_CHECK_EQUAL(negative.size(), 1);
+  BOOST_CHECK(diff.canDecode);
+  BOOST_CHECK_EQUAL(diff.positive.size(), 1);
+  BOOST_CHECK_EQUAL(diff.negative.size(), 1);
 }
 
 BOOST_AUTO_TEST_CASE(DifferenceBwOversizedIBFs)
@@ -212,17 +207,15 @@
   uint32_t newHash = murmurHash3(11, prefix);
   ownIBF.insert(newHash);
 
-  IBLT diff = ownIBF - rcvdIBF;
+  auto diff = ownIBF - rcvdIBF;
+  BOOST_CHECK(diff.canDecode);
+  BOOST_CHECK_EQUAL(diff.positive.size(), 1);
+  BOOST_CHECK_EQUAL(*diff.positive.begin(), newHash);
+  BOOST_CHECK_EQUAL(diff.negative.size(), 0);
 
-  std::set<uint32_t> positive;
-  std::set<uint32_t> negative;
-  BOOST_CHECK(diff.listEntries(positive, negative));
-  BOOST_CHECK_EQUAL(positive.size(), 1);
-  BOOST_CHECK_EQUAL(*positive.begin(), newHash);
-  BOOST_CHECK_EQUAL(negative.size(), 0);
-
-  BOOST_CHECK(!ownIBF.listEntries(positive, negative));
-  BOOST_CHECK(!rcvdIBF.listEntries(positive, negative));
+  IBLT emptyIBF(size, CompressionScheme::DEFAULT);
+  BOOST_CHECK(!(ownIBF - emptyIBF).canDecode);
+  BOOST_CHECK(!(rcvdIBF - emptyIBF).canDecode);
 }
 
 BOOST_AUTO_TEST_SUITE_END()