Meki Cherkaoui | f441d3a | 2012-04-22 15:17:52 -0700 | [diff] [blame] | 1 | /** |
| 2 | * Simple xml 2 javascript object parser based on sax.js |
| 3 | * |
| 4 | * https://github.com/emberfeather/node-xml2object |
| 5 | */ |
| 6 | var emitter = require('events').EventEmitter; |
| 7 | var fs = require('fs'); |
| 8 | var sax = require('./sax'); |
| 9 | var util = require('util'); |
| 10 | |
| 11 | var xml2object = function(xmlFile, elements) { |
| 12 | elements = elements || []; |
| 13 | |
| 14 | this._hasStarted = false; |
| 15 | |
| 16 | var self = this; |
| 17 | var currentObject; |
| 18 | var inObject = false; |
| 19 | var inObjectName; |
| 20 | var ancestors = []; |
| 21 | |
| 22 | this.fileStream = fs.createReadStream(xmlFile); |
| 23 | this.saxStream = sax.createStream(true); |
| 24 | |
| 25 | this.saxStream.on("opentag", function (args) { |
| 26 | if(!inObject) { |
| 27 | // If we are not in an object and not tracking the element |
| 28 | // then we don't need to do anything |
| 29 | if (elements.indexOf(args.name) < 0) { |
| 30 | return; |
| 31 | } |
| 32 | |
| 33 | // Start tracking a new object |
| 34 | inObject = true; |
| 35 | inObjectName = args.name; |
| 36 | |
| 37 | currentObject = {}; |
| 38 | } |
| 39 | |
| 40 | if (!(args.name in currentObject)) { |
| 41 | currentObject[args.name] = args.attributes; |
| 42 | } else if (!util.isArray(currentObject[args.name])) { |
| 43 | // Put the existing object in an array. |
| 44 | var newArray = [currentObject[args.name]]; |
| 45 | |
| 46 | // Add the new object to the array. |
| 47 | newArray.push(args.attributes); |
| 48 | |
| 49 | // Point to the new array. |
| 50 | currentObject[args.name] = newArray; |
| 51 | } else { |
| 52 | // An array already exists, push the attributes on to it. |
| 53 | currentObject[args.name].push(args.attributes); |
| 54 | } |
| 55 | |
| 56 | // Store the current (old) parent. |
| 57 | ancestors.push(currentObject); |
| 58 | |
| 59 | // We are now working with this object, so it becomes the current parent. |
| 60 | if (currentObject[args.name] instanceof Array) { |
| 61 | // If it is an array, get the last element of the array. |
| 62 | currentObject = currentObject[args.name][currentObject[args.name].length - 1]; |
| 63 | } else { |
| 64 | // Otherwise, use the object itself. |
| 65 | currentObject = currentObject[args.name]; |
| 66 | } |
| 67 | }); |
| 68 | |
| 69 | this.saxStream.on("text", function (data) { |
| 70 | if(!inObject) { |
| 71 | return; |
| 72 | } |
| 73 | |
| 74 | data = data.trim(); |
| 75 | |
| 76 | if (!data.length) { |
| 77 | return; |
| 78 | } |
| 79 | |
| 80 | currentObject['$t'] = (currentObject['$t'] || "") + data; |
| 81 | }); |
| 82 | |
| 83 | this.saxStream.on("closetag", function (name) { |
| 84 | if(!inObject) { |
| 85 | return; |
| 86 | } |
| 87 | |
| 88 | if(inObject && inObjectName === name) { |
| 89 | // Finished building the object |
| 90 | self.emit('object', name, currentObject); |
| 91 | |
| 92 | inObject = false; |
| 93 | ancestors = []; |
| 94 | |
| 95 | return; |
| 96 | } |
| 97 | |
| 98 | if(ancestors.length) { |
| 99 | var ancestor = ancestors.pop(); |
| 100 | var keys = Object.keys(currentObject); |
| 101 | |
| 102 | if (keys.length == 1 && '$t' in currentObject) { |
| 103 | // Convert the text only objects into just the text |
| 104 | if (ancestor[name] instanceof Array) { |
| 105 | ancestor[name].push(ancestor[name].pop()['$t']); |
| 106 | } else { |
| 107 | ancestor[name] = currentObject['$t']; |
| 108 | } |
| 109 | } else if (!keys.length) { |
| 110 | // Remove empty keys |
| 111 | delete ancestor[name]; |
| 112 | } |
| 113 | |
| 114 | currentObject = ancestor; |
| 115 | } else { |
| 116 | currentObject = {}; |
| 117 | } |
| 118 | }); |
| 119 | |
| 120 | // Rebroadcast the error and keep going |
| 121 | this.saxStream.on("error", function (e) { |
| 122 | this.emit('error', e); |
| 123 | |
| 124 | // clear the error and resume |
| 125 | this._parser.error = null; |
| 126 | this._parser.resume(); |
| 127 | }); |
| 128 | |
| 129 | // Rebroadcast the end of the file read |
| 130 | this.fileStream.on("end", function() { |
| 131 | self.emit("end"); |
| 132 | }); |
| 133 | }; |
| 134 | |
| 135 | util.inherits(xml2object, emitter); |
| 136 | |
| 137 | xml2object.prototype.start = function() { |
| 138 | // Can only start once |
| 139 | if(this._hasStarted) { |
| 140 | return; |
| 141 | } |
| 142 | |
| 143 | this._hasStarted = true; |
| 144 | |
| 145 | this.emit('start'); |
| 146 | |
| 147 | // Start the streaming! |
| 148 | this.fileStream.pipe(this.saxStream); |
| 149 | }; |
| 150 | |
| 151 | |
| 152 | //TEST/////////////////////////////////////////////////////////////////////////////// |
| 153 | //var xml2object = require('xml2object'); |
| 154 | |
| 155 | // Create a new xml parser with an array of xml elements to look for |
| 156 | /*var parser = new xml2object('./src/encoding/ContentObject1.xml', [ 'ContentObject' ]); |
| 157 | |
| 158 | // Bind to the object event to work with the objects found in the XML file |
| 159 | parser.on('object', function(name, obj) { |
| 160 | console.log('Found an object: %s', name); |
| 161 | console.log(obj); |
| 162 | }); |
| 163 | |
| 164 | // Bind to the file end event to tell when the file is done being streamed |
| 165 | parser.on('end', function(name, obj) { |
| 166 | console.log('Finished parsing xml!'); |
| 167 | }); |
| 168 | |
| 169 | // Start parsing the XML |
| 170 | parser.start();*/ |