Mickey Sweatt | 527b049 | 2016-03-02 11:07:48 -0800 | [diff] [blame] | 1 | #include "torrent-manager.hpp" |
| 2 | |
| 3 | #include "file-manifest.hpp" |
| 4 | #include "torrent-file.hpp" |
| 5 | |
| 6 | #include <boost/filesystem.hpp> |
| 7 | #include <boost/filesystem/fstream.hpp> |
| 8 | |
| 9 | #include <ndn-cxx/data.hpp> |
| 10 | #include <ndn-cxx/security/key-chain.hpp> |
| 11 | #include <ndn-cxx/security/signing-helpers.hpp> |
| 12 | #include <ndn-cxx/util/io.hpp> |
| 13 | |
| 14 | #include <set> |
| 15 | #include <string> |
| 16 | #include <unordered_map> |
| 17 | #include <vector> |
| 18 | |
| 19 | namespace fs = boost::filesystem; |
| 20 | |
| 21 | using std::string; |
| 22 | using std::vector; |
| 23 | |
| 24 | namespace { |
| 25 | // TODO(msweatt) Move this to a utility |
| 26 | template<typename T> |
| 27 | static vector<T> |
| 28 | load_directory(const string& dirPath, |
| 29 | ndn::io::IoEncoding encoding = ndn::io::IoEncoding::BASE_64) { |
| 30 | vector<T> structures; |
Mickey Sweatt | 599bfef | 2016-04-05 19:11:20 -0700 | [diff] [blame^] | 31 | std::set<string> fileNames; |
Mickey Sweatt | 527b049 | 2016-03-02 11:07:48 -0800 | [diff] [blame] | 32 | if (fs::exists(dirPath)) { |
Mickey Sweatt | 599bfef | 2016-04-05 19:11:20 -0700 | [diff] [blame^] | 33 | for(fs::recursive_directory_iterator it(dirPath); |
| 34 | it != fs::recursive_directory_iterator(); |
Mickey Sweatt | 527b049 | 2016-03-02 11:07:48 -0800 | [diff] [blame] | 35 | ++it) |
| 36 | { |
Mickey Sweatt | 599bfef | 2016-04-05 19:11:20 -0700 | [diff] [blame^] | 37 | fileNames.insert(it->path().string()); |
| 38 | } |
| 39 | for (const auto& f : fileNames) { |
| 40 | auto data_ptr = ndn::io::load<T>(f, encoding); |
Mickey Sweatt | 527b049 | 2016-03-02 11:07:48 -0800 | [diff] [blame] | 41 | if (nullptr != data_ptr) { |
| 42 | structures.push_back(*data_ptr); |
| 43 | } |
| 44 | } |
| 45 | } |
| 46 | structures.shrink_to_fit(); |
| 47 | return structures; |
| 48 | } |
| 49 | |
| 50 | } // end anonymous |
| 51 | |
| 52 | namespace ndn { |
| 53 | namespace ntorrent { |
| 54 | |
| 55 | // TODO(msweatt) Move this to a utility |
| 56 | static vector<ndn::Data> |
| 57 | packetize_file(const fs::path& filePath, |
| 58 | const ndn::Name& commonPrefix, |
| 59 | size_t dataPacketSize, |
| 60 | size_t subManifestSize, |
| 61 | size_t subManifestNum) |
| 62 | { |
| 63 | BOOST_ASSERT(0 < dataPacketSize); |
| 64 | size_t APPROX_BUFFER_SIZE = std::numeric_limits<int>::max(); // 2 * 1024 * 1024 *1024 |
| 65 | auto file_size = fs::file_size(filePath); |
| 66 | auto start_offset = subManifestNum * subManifestSize * dataPacketSize; |
| 67 | // determine the number of bytes in this submanifest |
| 68 | auto subManifestLength = subManifestSize * dataPacketSize; |
| 69 | auto remainingFileLength = file_size - start_offset; |
| 70 | subManifestLength = remainingFileLength < subManifestLength |
| 71 | ? remainingFileLength |
| 72 | : subManifestLength; |
| 73 | vector<ndn::Data> packets; |
| 74 | packets.reserve(subManifestLength/dataPacketSize + 1); |
| 75 | fs::ifstream fs(filePath, fs::ifstream::binary); |
| 76 | if (!fs) { |
| 77 | BOOST_THROW_EXCEPTION(FileManifest::Error("IO Error when opening" + filePath.string())); |
| 78 | } |
| 79 | // ensure that buffer is large enough to contain whole packets |
| 80 | // buffer size is either the entire file or the smallest number of data packets >= 2 GB |
| 81 | auto buffer_size = |
| 82 | subManifestLength < APPROX_BUFFER_SIZE ? |
| 83 | subManifestLength : |
| 84 | APPROX_BUFFER_SIZE % dataPacketSize == 0 ? |
| 85 | APPROX_BUFFER_SIZE : |
| 86 | APPROX_BUFFER_SIZE + dataPacketSize - (APPROX_BUFFER_SIZE % dataPacketSize); |
| 87 | vector<char> file_bytes; |
| 88 | file_bytes.reserve(buffer_size); |
| 89 | size_t bytes_read = 0; |
| 90 | fs.seekg(start_offset); |
| 91 | while(fs && bytes_read < subManifestLength && !fs.eof()) { |
| 92 | // read the file into the buffer |
| 93 | fs.read(&file_bytes.front(), buffer_size); |
| 94 | auto read_size = fs.gcount(); |
| 95 | if (fs.bad() || read_size < 0) { |
| 96 | BOOST_THROW_EXCEPTION(FileManifest::Error("IO Error when reading" + filePath.string())); |
| 97 | } |
| 98 | bytes_read += read_size; |
| 99 | char *curr_start = &file_bytes.front(); |
| 100 | for (size_t i = 0u; i < buffer_size; i += dataPacketSize) { |
| 101 | // Build a packet from the data |
| 102 | Name packetName = commonPrefix; |
| 103 | packetName.appendSequenceNumber(packets.size()); |
| 104 | Data d(packetName); |
| 105 | auto content_length = i + dataPacketSize > buffer_size ? buffer_size - i : dataPacketSize; |
| 106 | d.setContent(encoding::makeBinaryBlock(tlv::Content, curr_start, content_length)); |
| 107 | curr_start += content_length; |
| 108 | // append to the collection |
| 109 | packets.push_back(d); |
| 110 | } |
| 111 | file_bytes.clear(); |
| 112 | // recompute the buffer_size |
| 113 | buffer_size = |
| 114 | subManifestLength - bytes_read < APPROX_BUFFER_SIZE ? |
| 115 | subManifestLength - bytes_read : |
| 116 | APPROX_BUFFER_SIZE % dataPacketSize == 0 ? |
| 117 | APPROX_BUFFER_SIZE : |
| 118 | APPROX_BUFFER_SIZE + dataPacketSize - (APPROX_BUFFER_SIZE % dataPacketSize); |
| 119 | } |
| 120 | fs.close(); |
| 121 | packets.shrink_to_fit(); |
| 122 | ndn::security::KeyChain key_chain; |
| 123 | // sign all the packets |
| 124 | for (auto& p : packets) { |
| 125 | key_chain.sign(p, signingWithSha256()); |
| 126 | } |
| 127 | return packets; |
| 128 | } |
| 129 | |
| 130 | static vector<TorrentFile> |
| 131 | intializeTorrentSegments(const string& torrentFilePath, const Name& initialSegmentName) |
| 132 | { |
| 133 | security::KeyChain key_chain; |
| 134 | Name currSegmentFullName = initialSegmentName; |
| 135 | vector<TorrentFile> torrentSegments = load_directory<TorrentFile>(torrentFilePath); |
Mickey Sweatt | 599bfef | 2016-04-05 19:11:20 -0700 | [diff] [blame^] | 136 | |
Mickey Sweatt | 527b049 | 2016-03-02 11:07:48 -0800 | [diff] [blame] | 137 | // Starting with the initial segment name, verify the names, loading next name from torrentSegment |
| 138 | for (auto it = torrentSegments.begin(); it != torrentSegments.end(); ++it) { |
| 139 | TorrentFile& segment = *it; |
| 140 | key_chain.sign(segment, signingWithSha256()); |
| 141 | if (segment.getFullName() != currSegmentFullName) { |
| 142 | vector<TorrentFile> correctSegments(torrentSegments.begin(), it); |
| 143 | torrentSegments.swap(correctSegments); |
| 144 | break; |
| 145 | } |
| 146 | // load the next full name |
| 147 | if (nullptr == segment.getTorrentFilePtr()) { |
| 148 | break; |
| 149 | } |
| 150 | currSegmentFullName = *segment.getTorrentFilePtr(); |
| 151 | } |
| 152 | return torrentSegments; |
| 153 | } |
| 154 | |
| 155 | static vector<FileManifest> |
| 156 | intializeFileManifests(const string& manifestPath, vector<TorrentFile> torrentSegments) |
| 157 | { |
| 158 | security::KeyChain key_chain; |
| 159 | |
| 160 | vector<FileManifest> manifests = load_directory<FileManifest>(manifestPath); |
| 161 | |
| 162 | // sign the manifests |
| 163 | std::for_each(manifests.begin(), manifests.end(), |
| 164 | [&key_chain](FileManifest& m){ |
| 165 | key_chain.sign(m,signingWithSha256()); |
| 166 | }); |
| 167 | |
| 168 | // put all names of manifests from the valid torrent files into a set |
| 169 | std::set<ndn::Name> validManifestNames; |
| 170 | for (const auto& segment : torrentSegments) { |
| 171 | const auto& catalog = segment.getCatalog(); |
| 172 | validManifestNames.insert(catalog.begin(), catalog.end()); |
| 173 | } |
| 174 | |
| 175 | // put all names of file manifests from disk into a set |
| 176 | std::set<ndn::Name> loadedManifestNames; |
| 177 | std::for_each(manifests.begin(), manifests.end(), |
| 178 | [&loadedManifestNames](const FileManifest& m){ |
| 179 | loadedManifestNames.insert(m.getFullName()); |
| 180 | }); |
| 181 | |
| 182 | // the set of fileManifests that we have is simply the intersection |
| 183 | std::set<Name> output; |
| 184 | std::set_intersection(validManifestNames.begin() , validManifestNames.end(), |
| 185 | loadedManifestNames.begin(), loadedManifestNames.end(), |
| 186 | std::inserter(output, output.begin())); |
| 187 | |
| 188 | // filter out those manifests that are not in this set |
| 189 | std::remove_if(manifests.begin(), |
| 190 | manifests.end(), |
| 191 | [&output](const FileManifest& m) { |
| 192 | return (output.end() == output.find(m.name())); |
| 193 | }); |
| 194 | |
| 195 | // order the manifests in the same order they are in the torrent |
| 196 | std::vector<Name> catalogNames; |
| 197 | for (const auto& segment : torrentSegments) { |
| 198 | const auto& catalog = segment.getCatalog(); |
| 199 | catalogNames.insert(catalogNames.end(), catalog.begin(), catalog.end()); |
| 200 | } |
| 201 | size_t curr_index = 0; |
| 202 | for (auto name : catalogNames) { |
| 203 | auto it = std::find_if(manifests.begin(), manifests.end(), |
| 204 | [&name](const FileManifest& m) { |
| 205 | return m.getFullName() == name; |
| 206 | }); |
| 207 | if (it != manifests.end()) { |
| 208 | // not already in the correct position |
| 209 | if (it != manifests.begin() + curr_index) { |
| 210 | std::swap(manifests[curr_index], *it); |
| 211 | } |
| 212 | ++curr_index; |
| 213 | } |
| 214 | } |
| 215 | |
| 216 | return manifests; |
| 217 | } |
| 218 | |
| 219 | static vector<Data> |
| 220 | intializeDataPackets(const string& filePath, |
| 221 | const FileManifest manifest, |
| 222 | const TorrentFile& torrentFile) |
| 223 | { |
| 224 | vector<Data> packets; |
Mickey Sweatt | 599bfef | 2016-04-05 19:11:20 -0700 | [diff] [blame^] | 225 | auto subManifestNum = manifest.submanifest_number(); |
Mickey Sweatt | 527b049 | 2016-03-02 11:07:48 -0800 | [diff] [blame] | 226 | |
| 227 | packets = packetize_file(filePath, |
| 228 | manifest.name(), |
| 229 | manifest.data_packet_size(), |
| 230 | manifest.catalog().size(), |
| 231 | subManifestNum); |
| 232 | |
| 233 | auto catalog = manifest.catalog(); |
| 234 | |
| 235 | // Filter out invalid packet names |
| 236 | std::remove_if(packets.begin(), packets.end(), |
| 237 | [&packets, &catalog](const Data& p) { |
| 238 | return catalog.end() == std::find(catalog.begin(), |
| 239 | catalog.end(), |
| 240 | p.getFullName()); |
| 241 | }); |
| 242 | return packets; |
| 243 | } |
| 244 | |
Mickey Sweatt | afda1f1 | 2016-04-04 17:15:11 -0700 | [diff] [blame] | 245 | static std::pair<std::shared_ptr<fs::fstream>, std::vector<bool>> |
| 246 | initializeFileState(const string& dataPath, |
| 247 | const FileManifest& manifest) |
| 248 | { |
| 249 | // construct the file name |
Mickey Sweatt | 599bfef | 2016-04-05 19:11:20 -0700 | [diff] [blame^] | 250 | auto fileName = manifest.file_name(); |
Mickey Sweatt | afda1f1 | 2016-04-04 17:15:11 -0700 | [diff] [blame] | 251 | auto filePath = dataPath + fileName; |
| 252 | vector<bool> fileBitMap(manifest.catalog().size()); |
| 253 | auto fbits = fs::fstream::out | fs::fstream::binary; |
| 254 | // if file exists, use in O/W use concatenate mode |
| 255 | fbits |= fs::exists(filePath) ? fs::fstream::in : fs::fstream::ate; |
| 256 | auto s = std::make_shared<fs::fstream>(filePath, fbits); |
| 257 | if (!*s) { |
| 258 | BOOST_THROW_EXCEPTION(io::Error("Cannot open: " + dataPath)); |
| 259 | } |
| 260 | return std::make_pair(s, fileBitMap); |
| 261 | } |
| 262 | |
Mickey Sweatt | 527b049 | 2016-03-02 11:07:48 -0800 | [diff] [blame] | 263 | void TorrentManager::Initialize() |
| 264 | { |
| 265 | // .../<torrent_name>/torrent-file/<implicit_digest> |
| 266 | string dataPath = ".appdata/" + m_torrentFileName.get(-3).toUri(); |
| 267 | string manifestPath = dataPath +"/manifests"; |
| 268 | string torrentFilePath = dataPath +"/torrent_files"; |
| 269 | |
| 270 | // get the torrent file segments and manifests that we have. |
| 271 | if (!fs::exists(torrentFilePath)) { |
| 272 | return; |
| 273 | } |
| 274 | m_torrentSegments = intializeTorrentSegments(torrentFilePath, m_torrentFileName); |
| 275 | if (m_torrentSegments.empty()) { |
| 276 | return; |
| 277 | } |
| 278 | m_fileManifests = intializeFileManifests(manifestPath, m_torrentSegments); |
| 279 | auto currTorrentFile_it = m_torrentSegments.begin(); |
| 280 | for (const auto& m : m_fileManifests) { |
| 281 | // find the appropriate torrent file |
| 282 | auto currCatalog = currTorrentFile_it->getCatalog(); |
| 283 | while (currCatalog.end() == std::find(currCatalog.begin(), currCatalog.end(), m.getFullName())) |
| 284 | { |
| 285 | ++currTorrentFile_it; |
| 286 | currCatalog = currTorrentFile_it->getCatalog(); |
| 287 | } |
| 288 | // construct the file name |
Mickey Sweatt | 599bfef | 2016-04-05 19:11:20 -0700 | [diff] [blame^] | 289 | auto fileName = m.file_name(); |
Mickey Sweatt | afda1f1 | 2016-04-04 17:15:11 -0700 | [diff] [blame] | 290 | fs::path filePath = m_dataPath + fileName; |
Mickey Sweatt | 527b049 | 2016-03-02 11:07:48 -0800 | [diff] [blame] | 291 | // If there are any valid packets, add corresponding state to manager |
Mickey Sweatt | afda1f1 | 2016-04-04 17:15:11 -0700 | [diff] [blame] | 292 | if (!fs::exists(filePath)) { |
| 293 | boost::filesystem::create_directories(filePath.parent_path()); |
| 294 | continue; |
| 295 | } |
| 296 | auto packets = intializeDataPackets(filePath.string(), m, *currTorrentFile_it); |
Mickey Sweatt | 527b049 | 2016-03-02 11:07:48 -0800 | [diff] [blame] | 297 | if (!packets.empty()) { |
Mickey Sweatt | afda1f1 | 2016-04-04 17:15:11 -0700 | [diff] [blame] | 298 | m_fileStates[m.getFullName()] = initializeFileState(m_dataPath, m); |
| 299 | auto& fileBitMap = m_fileStates[m.getFullName()].second; |
Mickey Sweatt | 527b049 | 2016-03-02 11:07:48 -0800 | [diff] [blame] | 300 | auto read_it = packets.begin(); |
| 301 | size_t i = 0; |
Mickey Sweatt | afda1f1 | 2016-04-04 17:15:11 -0700 | [diff] [blame] | 302 | for (auto name : m.catalog()) { |
Mickey Sweatt | 527b049 | 2016-03-02 11:07:48 -0800 | [diff] [blame] | 303 | if (name == read_it->getFullName()) { |
| 304 | ++read_it; |
Mickey Sweatt | afda1f1 | 2016-04-04 17:15:11 -0700 | [diff] [blame] | 305 | fileBitMap[i] = true; |
Mickey Sweatt | 527b049 | 2016-03-02 11:07:48 -0800 | [diff] [blame] | 306 | } |
| 307 | ++i; |
| 308 | } |
| 309 | for (const auto& d : packets) { |
| 310 | seed(d); |
| 311 | } |
Mickey Sweatt | 527b049 | 2016-03-02 11:07:48 -0800 | [diff] [blame] | 312 | } |
| 313 | } |
| 314 | for (const auto& t : m_torrentSegments) { |
| 315 | seed(t); |
| 316 | } |
| 317 | for (const auto& m : m_fileManifests) { |
| 318 | seed(m); |
| 319 | } |
Mickey Sweatt | afda1f1 | 2016-04-04 17:15:11 -0700 | [diff] [blame] | 320 | } |
Mickey Sweatt | 527b049 | 2016-03-02 11:07:48 -0800 | [diff] [blame] | 321 | |
Mickey Sweatt | afda1f1 | 2016-04-04 17:15:11 -0700 | [diff] [blame] | 322 | bool TorrentManager::writeData(const Data& packet) |
| 323 | { |
| 324 | // find correct manifest |
| 325 | const auto& packetName = packet.getName(); |
| 326 | auto manifest_it = std::find_if(m_fileManifests.begin(), m_fileManifests.end(), |
| 327 | [&packetName](const FileManifest& m) { |
| 328 | return m.getName().isPrefixOf(packetName); |
| 329 | }); |
| 330 | if (m_fileManifests.end() == manifest_it) { |
| 331 | return false; |
| 332 | } |
| 333 | // get file state out |
| 334 | auto& fileState = m_fileStates[manifest_it->getFullName()]; |
| 335 | // if there is no open stream to the file |
| 336 | if (nullptr == fileState.first) { |
| 337 | fileState = initializeFileState(m_dataPath, *manifest_it); |
| 338 | } |
Mickey Sweatt | 599bfef | 2016-04-05 19:11:20 -0700 | [diff] [blame^] | 339 | auto packetNum = packetName.get(packetName.size() - 1).toSequenceNumber(); |
Mickey Sweatt | afda1f1 | 2016-04-04 17:15:11 -0700 | [diff] [blame] | 340 | // if we already have the packet, do not rewrite it. |
| 341 | if (fileState.second[packetNum]) { |
| 342 | return false; |
| 343 | } |
| 344 | auto packetOffset = packetNum * manifest_it->data_packet_size(); |
| 345 | // write data to disk |
| 346 | fileState.first->seekg(packetOffset); |
| 347 | try { |
| 348 | auto content = packet.getContent(); |
| 349 | std::vector<char> data(content.value_begin(), content.value_end()); |
| 350 | fileState.first->write(&data[0], data.size()); |
| 351 | } |
| 352 | catch (io::Error &e) { |
| 353 | std::cerr << e.what() << std::endl; |
| 354 | return false; |
| 355 | } |
| 356 | // update bitmap |
| 357 | fileState.second[packetNum] = true; |
| 358 | return true; |
Mickey Sweatt | 527b049 | 2016-03-02 11:07:48 -0800 | [diff] [blame] | 359 | } |
| 360 | |
Mickey Sweatt | 599bfef | 2016-04-05 19:11:20 -0700 | [diff] [blame^] | 361 | bool TorrentManager::writeTorrentSegment(const TorrentFile& segment, const std::string& path) |
| 362 | { |
| 363 | // validate that this torrent segment belongs to our torrent |
| 364 | auto torrentPrefix = m_torrentFileName.getSubName(0, m_torrentFileName.size() - 1); |
| 365 | if (!torrentPrefix.isPrefixOf(segment.getName())) { |
| 366 | return false; |
| 367 | } |
| 368 | |
| 369 | auto segmentNum = segment.getSegmentNumber(); |
| 370 | // check if we already have it |
| 371 | if (m_torrentSegments.end() != std::find(m_torrentSegments.begin(), m_torrentSegments.end(), |
| 372 | segment)) |
| 373 | { |
| 374 | return false; |
| 375 | } |
| 376 | // write to disk at path |
| 377 | if (!fs::exists(path)) { |
| 378 | fs::create_directories(path); |
| 379 | } |
| 380 | auto filename = path + to_string(segmentNum); |
| 381 | // if there is already a file on disk for this torrent segment, determine if we should override |
| 382 | if (fs::exists(filename)) { |
| 383 | auto segmentOnDisk_ptr = io::load<TorrentFile>(filename); |
| 384 | if (nullptr != segmentOnDisk_ptr && *segmentOnDisk_ptr == segment) { |
| 385 | return false; |
| 386 | } |
| 387 | } |
| 388 | io::save(segment, filename); |
| 389 | // add to collection |
| 390 | auto it = std::find_if(m_torrentSegments.begin(), m_torrentSegments.end(), |
| 391 | [segmentNum](const TorrentFile& t){ |
| 392 | return t.getSegmentNumber() > segmentNum; |
| 393 | }); |
| 394 | m_torrentSegments.insert(it, segment); |
| 395 | return true; |
| 396 | } |
| 397 | |
| 398 | bool TorrentManager::writeFileManifest(const FileManifest& manifest, const std::string& path) |
| 399 | { |
| 400 | auto subManifestNum = manifest.submanifest_number(); |
| 401 | fs::path filename = path + manifest.file_name() + "/" + to_string(subManifestNum); |
| 402 | // check if we already have it |
| 403 | if (m_fileManifests.end() != std::find(m_fileManifests.begin(), m_fileManifests.end(), |
| 404 | manifest)) |
| 405 | { |
| 406 | return false; |
| 407 | } |
| 408 | |
| 409 | // write to disk at path |
| 410 | if (!fs::exists(filename.parent_path())) { |
| 411 | boost::filesystem::create_directories(filename.parent_path()); |
| 412 | } |
| 413 | // if there is already a file on disk for this torrent segment, determine if we should override |
| 414 | if (fs::exists(filename)) { |
| 415 | auto submanifestOnDisk_ptr = io::load<FileManifest>(filename.string()); |
| 416 | if (nullptr != submanifestOnDisk_ptr && *submanifestOnDisk_ptr == manifest) { |
| 417 | return false; |
| 418 | } |
| 419 | } |
| 420 | io::save(manifest, filename.string()); |
| 421 | // add to collection |
| 422 | // add to collection |
| 423 | auto it = std::find_if(m_fileManifests.begin(), m_fileManifests.end(), |
| 424 | [&manifest](const FileManifest& m){ |
| 425 | return m.file_name() > manifest.file_name() |
| 426 | || (m.file_name() == manifest.file_name() |
| 427 | && (m.submanifest_number() > manifest.submanifest_number())); |
| 428 | }); |
| 429 | m_fileManifests.insert(it, manifest); |
| 430 | return true; |
| 431 | } |
| 432 | |
Mickey Sweatt | 527b049 | 2016-03-02 11:07:48 -0800 | [diff] [blame] | 433 | void TorrentManager::seed(const Data& data) const { |
| 434 | // TODO(msweatt) IMPLEMENT ME |
| 435 | } |
| 436 | |
| 437 | } // end ntorrent |
| 438 | } // end ndn |