experiment: Add multiple failure experiment

Change-Id: Ia7e856606ec483f0bb63840a0e40399058e5b6ac
diff --git a/bin/minindn b/bin/minindn
index 9c2d240..1c92097 100755
--- a/bin/minindn
+++ b/bin/minindn
@@ -7,6 +7,7 @@
 from mininet.link import TCLink
 from mininet.conf_parser import parse_hosts, parse_links
 
+from ndn.experiments.multiple_failure_experiment import MultipleFailureExperiment
 from ndn.experiments.pingall_experiment import PingallExperiment
 from ndn.experiments.failure_experiment import FailureExperiment
 from ndn.ndn_host import NdnHost, CpuLimitedNdnHost
@@ -32,6 +33,7 @@
     pingall = False
     hr = False
     failure = False
+    isMultipleFailure = False
 
     parser = optparse.OptionParser(usage)
 
@@ -53,6 +55,9 @@
     parser.add_option("--failure", action="store_true", dest="failure",
     help="Run failure experiment, specify the number of pings using pingall")
 
+    parser.add_option("--multiple-failure", action="store_true", dest="isMultipleFailure",
+    help="Run multiple failure experiment; each node will fail and recover once")
+
     parser.add_option("--no-cli", action="store_false", dest="isCliEnabled",
         help="Run experiments and exit without showing the command line interface")
 
@@ -64,6 +69,7 @@
     hr = options.hr
     faces = options.faces
     failure = options.failure
+    isMultipleFailure = options.isMultipleFailure
     isCliEnabled = options.isCliEnabled
 
     if ctime is None:
@@ -77,7 +83,7 @@
     else:
         file = arg[0]
 
-    return file, testbed, pingall, ctime, hr, faces, failure, isCliEnabled
+    return file, testbed, pingall, ctime, hr, faces, failure, isMultipleFailure, isCliEnabled
 
 class NdnTopo(Topo):
     def __init__(self, conf_arq, **opts):
@@ -105,7 +111,7 @@
 
         info('Parse of ' + conf_arq + ' done.\n')
 
-def execute(template_file='minindn.conf', testbed=False, pingall=None, ctime=None, hr=False, faces=3, failure=False, isCliEnabled=True):
+def execute(template_file='minindn.conf', testbed=False, pingall=None, ctime=None, hr=False, faces=3, failure=False, isMultipleFailure=False, isCliEnabled=True):
     "Create a network based on template_file"
 
     home = expanduser("~")
@@ -163,7 +169,10 @@
 
     nodes = nodes[0:-1]
 
-    if failure is True:
+    if isMultipleFailure is True:
+        test = MultipleFailureExperiment(net, nodes, ctime, Nfd.STRATEGY_BEST_ROUTE_V3)
+        test.start()
+    elif failure is True:
         test = FailureExperiment(net, nodes, ctime, Nfd.STRATEGY_BEST_ROUTE_V3)
         test.start()
     elif pingall is not None:
@@ -183,7 +192,7 @@
 if __name__ == '__main__':
     hosts_conf = []
     links_conf = []
-    template, testbed, pingall, ctime, hr, faces, failure, isCliEnabled = parse_args()
+    template, testbed, pingall, ctime, hr, faces, failure, isMultipleFailure, isCliEnabled = parse_args()
 
     setLogLevel('info')
-    execute(template, testbed, pingall, ctime, hr, faces, failure, isCliEnabled)
+    execute(template, testbed, pingall, ctime, hr, faces, failure, isMultipleFailure, isCliEnabled)
diff --git a/ndn/experiments/experiment.py b/ndn/experiments/experiment.py
index 9487e22..80fba81 100644
--- a/ndn/experiments/experiment.py
+++ b/ndn/experiments/experiment.py
@@ -58,13 +58,16 @@
                 host.nfd.stop()
             sys.exit(1)
 
+    def ping(self, source, dest, nPings):
+        # Use "&" to run in background and perform parallel pings
+        print "Scheduling ping(s) from %s to %s" % (source.name, dest.name)
+        source.cmd("ndnping -t -c "+ str(nPings) + " /ndn/edu/" + dest.name + " >> ping-data/" + dest.name + ".txt &")
+        time.sleep(0.2)
+
     def startPings(self):
         for host in self.net.hosts:
             for other in self.net.hosts:
                 # Do not ping self
                 if host.name != other.name:
-                    # Use "&" to run in background and perform parallel pings
-                    print "Scheduling ping(s) from %s to %s" % (host.name, other.name)
-                    host.cmd("ndnping -t -c "+ str(self.nPings) + " /ndn/edu/" + other.name + " > ping-data/" + other.name + ".txt &")
-                    time.sleep(0.2)
+                    self.ping(host, other, self.nPings)
 
diff --git a/ndn/experiments/multiple_failure_experiment.py b/ndn/experiments/multiple_failure_experiment.py
new file mode 100644
index 0000000..db7d536
--- /dev/null
+++ b/ndn/experiments/multiple_failure_experiment.py
@@ -0,0 +1,67 @@
+#!/usr/bin/python
+
+from ndn.experiments.experiment import Experiment
+from ndn.nlsr import Nlsr
+
+import time
+
+class MultipleFailureExperiment(Experiment):
+
+    def __init__(self, net, nodes, convergenceTime, strategy):
+
+        self.PING_COLLECTION_TIME_BEFORE_FAILURE = 60
+
+        self.FAILURE_INTERVAL = 60
+        self.RECOVERY_INTERVAL = 60
+
+        # This is the number of pings required to make it through the full experiment
+        nInitialPings = self.PING_COLLECTION_TIME_BEFORE_FAILURE + len(net.hosts)*(self.FAILURE_INTERVAL + self.RECOVERY_INTERVAL)
+        print("Scheduling with %s initial pings" % nInitialPings)
+
+        Experiment.__init__(self, net, nodes, convergenceTime, nInitialPings, strategy)
+
+    def failNode(self, host):
+        print("Bringing %s down" % host.name)
+        host.nfd.stop()
+
+    def recoverNode(self, host):
+        print("Bringing %s up" % host.name)
+        host.nfd.start()
+        host.nlsr.start()
+        host.nfd.setStrategy("/ndn/edu", self.strategy)
+        host.cmd("ndnpingserver /ndn/edu/" + str(host) + " > ping-server &")
+
+    def run(self):
+        self.startPings()
+
+        # After the pings are scheduled, collect pings for 1 minute
+        time.sleep(self.PING_COLLECTION_TIME_BEFORE_FAILURE)
+
+        nNodesRemainingToFail = len(self.net.hosts)
+
+        # Fail and recover each node
+        for host in self.net.hosts:
+            # Fail the node
+            self.failNode(host)
+
+            # Stay in failure state for FAILURE_INTERVAL
+            time.sleep(self.FAILURE_INTERVAL)
+
+            # Bring the node back up
+            self.recoverNode(host)
+
+            # Number of pings required to reach the end of the test
+            nPings = self.RECOVERY_INTERVAL + nNodesRemainingToFail*(self.FAILURE_INTERVAL + self.RECOVERY_INTERVAL)
+            nNodesRemainingToFail = nNodesRemainingToFail - 1
+
+            # Wait for NFD and NLSR to fully recover
+            time.sleep(1)
+            print("Scheduling with %s remaining pings" % nPings)
+
+            # Restart pings
+            for other in self.net.hosts:
+                # Do not ping self
+                if host.name != other.name:
+                    self.ping(host, other, nPings)
+
+            time.sleep(self.RECOVERY_INTERVAL)